Spaces:

UniquePratham
/

DualTextOCRFusion

Sleeping

UniquePratham commited on Sep 29, 2024

Commit

42cb48e

verified ·

1 Parent(s): 4a67835

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,9 +9,10 @@ import torch
 import tempfile
 import os
 import re
 # Page configuration
-st.set_page_config(page_title="OCR Application", page_icon="🖼️", layout="wide")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load Surya OCR Models (English + Hindi)
@@ -48,12 +49,23 @@ def clean_extracted_text(text):
     return cleaned_text
 # Polish the text using a model
-def polish_text_with_ai(cleaned_text, model, tokenizer):
-    # Use a text generation pipeline for better language flow
-    from transformers import pipeline
-    nlp = pipeline('text-generation', model=model, tokenizer=tokenizer)
     prompt = f"Correct and clean the following text: '{cleaned_text}' and make it meaningful."
-    polished_text = nlp(prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
     return polished_text
 # Extract text using GOT
@@ -137,7 +149,7 @@ if predict_button and uploaded_file:
         # Optionally, polish text with AI model for better language flow
         if model_choice in ["GOT_CPU", "GOT_GPU"]:
-            polished_text = polish_text_with_ai(cleaned_text, got_model, tokenizer)
         else:
             polished_text = cleaned_text

 import tempfile
 import os
 import re
+from groq import Groq
 # Page configuration
+st.set_page_config(page_title="DualTextOCRFusion", page_icon="🔍", layout="wide")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load Surya OCR Models (English + Hindi)
     return cleaned_text
 # Polish the text using a model
+def polish_text_with_ai(cleaned_text):
     prompt = f"Correct and clean the following text: '{cleaned_text}' and make it meaningful."
+    client = Groq(api_key="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")
+    chat_completion = client.chat.completions.create(
+    messages=[
+            {
+                "role": "system",
+                "content": "You are a meaningful sentence pedantic, you remove extra spaces in between words and word to make the sentence meaningful in English/Hindi/Hinglish according to the sentence."
+            },
+            {
+                "role": "user",
+                "content": prompt,
+            }
+        ],
+        model="gemma2-9b-it",
+    )
+    polished_text=(chat_completion.choices[0].message.content
     return polished_text
 # Extract text using GOT
         # Optionally, polish text with AI model for better language flow
         if model_choice in ["GOT_CPU", "GOT_GPU"]:
+            polished_text = polish_text_with_ai(cleaned_text)
         else:
             polished_text = cleaned_text