Mridul2003 commited on
Commit
877a062
·
verified ·
1 Parent(s): 28df136

Update services/image_ocr.py

Browse files
Files changed (1) hide show
  1. services/image_ocr.py +51 -18
services/image_ocr.py CHANGED
@@ -1,21 +1,54 @@
1
- from PIL import Image
2
- from io import BytesIO
3
- import requests
4
- from transformers import pipeline
5
 
6
- class ImageOCRService:
7
- def __init__(self):
8
- self.pipe = pipeline("image-text-to-text", model="ds4sd/SmolDocling-256M-preview")
 
 
 
 
9
 
10
- def extract_text(self, image_url: str) -> str:
11
- response = requests.get(image_url)
12
- image = Image.open(BytesIO(response.content)).convert("RGB")
 
 
 
 
 
13
 
14
- result = self.pipe([{
15
- "role": "user",
16
- "content": [
17
- {"type": "image", "image": image},
18
- {"type": "text", "text": "extract text from image"}
19
- ]
20
- }])
21
- return result[0]['generated_text'][1]['content'] if result else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from PIL import Image
2
+ # from io import BytesIO
3
+ # import requests
4
+ # from transformers import pipeline
5
 
6
+ # class ImageOCRService:
7
+ # def __init__(self):
8
+ # self.pipe = pipeline("image-text-to-text", model="ds4sd/SmolDocling-256M-preview")
9
+
10
+ # def extract_text(self, image_url: str) -> str:
11
+ # response = requests.get(image_url)
12
+ # image = Image.open(BytesIO(response.content)).convert("RGB")
13
 
14
+ # result = self.pipe([{
15
+ # "role": "user",
16
+ # "content": [
17
+ # {"type": "image", "image": image},
18
+ # {"type": "text", "text": "extract text from image"}
19
+ # ]
20
+ # }])
21
+ # return result[0]['generated_text'][1]['content'] if result else ""
22
 
23
+ from PIL import Image
24
+ import google.generativeai as genai
25
+ import json
26
+ from dotenv import load_dotenv
27
+ import os
28
+ load_dotenv()
29
+ api_key = os.getenv("GOOGLE_API_KEY")
30
+ class ImageClassifier:
31
+ def __init__(self):
32
+ self.prompt = prompt = """
33
+ Classify the image into toxic and non toxic or safe or not safe.
34
+ Output format:
35
+ toxic:boolean
36
+ non_toxic:boolean
37
+ if text is present in image extract text from image :
38
+ Output format:
39
+ toxic:boolean
40
+ non_toxic:boolean
41
+ text:string
42
+ Give output in json format and dont include anything in output
43
+ """
44
+ def classify(self, image: Image.Image) -> str:
45
+ genai.configure(api_key=api_key)
46
+ model = genai.GenerativeModel('gemini-2.0-flash')
47
+ response = model.generate_content(
48
+ [self.prompt,image]
49
+ )
50
+ text = response.text.strip().replace("```json", "").replace("```", "").strip()
51
+ try:
52
+ return json.loads(text)
53
+ except Exception:
54
+ return {"raw": text}