Spaces:
Running
Running
Update services/image_ocr.py
Browse files- services/image_ocr.py +51 -18
services/image_ocr.py
CHANGED
@@ -1,21 +1,54 @@
|
|
1 |
-
from PIL import Image
|
2 |
-
from io import BytesIO
|
3 |
-
import requests
|
4 |
-
from transformers import pipeline
|
5 |
|
6 |
-
class ImageOCRService:
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from PIL import Image
|
2 |
+
# from io import BytesIO
|
3 |
+
# import requests
|
4 |
+
# from transformers import pipeline
|
5 |
|
6 |
+
# class ImageOCRService:
|
7 |
+
# def __init__(self):
|
8 |
+
# self.pipe = pipeline("image-text-to-text", model="ds4sd/SmolDocling-256M-preview")
|
9 |
+
|
10 |
+
# def extract_text(self, image_url: str) -> str:
|
11 |
+
# response = requests.get(image_url)
|
12 |
+
# image = Image.open(BytesIO(response.content)).convert("RGB")
|
13 |
|
14 |
+
# result = self.pipe([{
|
15 |
+
# "role": "user",
|
16 |
+
# "content": [
|
17 |
+
# {"type": "image", "image": image},
|
18 |
+
# {"type": "text", "text": "extract text from image"}
|
19 |
+
# ]
|
20 |
+
# }])
|
21 |
+
# return result[0]['generated_text'][1]['content'] if result else ""
|
22 |
|
23 |
+
from PIL import Image
|
24 |
+
import google.generativeai as genai
|
25 |
+
import json
|
26 |
+
from dotenv import load_dotenv
|
27 |
+
import os
|
28 |
+
load_dotenv()
|
29 |
+
api_key = os.getenv("GOOGLE_API_KEY")
|
30 |
+
class ImageClassifier:
|
31 |
+
def __init__(self):
|
32 |
+
self.prompt = prompt = """
|
33 |
+
Classify the image into toxic and non toxic or safe or not safe.
|
34 |
+
Output format:
|
35 |
+
toxic:boolean
|
36 |
+
non_toxic:boolean
|
37 |
+
if text is present in image extract text from image :
|
38 |
+
Output format:
|
39 |
+
toxic:boolean
|
40 |
+
non_toxic:boolean
|
41 |
+
text:string
|
42 |
+
Give output in json format and dont include anything in output
|
43 |
+
"""
|
44 |
+
def classify(self, image: Image.Image) -> str:
|
45 |
+
genai.configure(api_key=api_key)
|
46 |
+
model = genai.GenerativeModel('gemini-2.0-flash')
|
47 |
+
response = model.generate_content(
|
48 |
+
[self.prompt,image]
|
49 |
+
)
|
50 |
+
text = response.text.strip().replace("```json", "").replace("```", "").strip()
|
51 |
+
try:
|
52 |
+
return json.loads(text)
|
53 |
+
except Exception:
|
54 |
+
return {"raw": text}
|