Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,9 +9,10 @@ import torch
|
|
| 9 |
import tempfile
|
| 10 |
import os
|
| 11 |
import re
|
|
|
|
| 12 |
|
| 13 |
# Page configuration
|
| 14 |
-
st.set_page_config(page_title="
|
| 15 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 16 |
|
| 17 |
# Load Surya OCR Models (English + Hindi)
|
|
@@ -48,12 +49,23 @@ def clean_extracted_text(text):
|
|
| 48 |
return cleaned_text
|
| 49 |
|
| 50 |
# Polish the text using a model
|
| 51 |
-
def polish_text_with_ai(cleaned_text
|
| 52 |
-
# Use a text generation pipeline for better language flow
|
| 53 |
-
from transformers import pipeline
|
| 54 |
-
nlp = pipeline('text-generation', model=model, tokenizer=tokenizer)
|
| 55 |
prompt = f"Correct and clean the following text: '{cleaned_text}' and make it meaningful."
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
return polished_text
|
| 58 |
|
| 59 |
# Extract text using GOT
|
|
@@ -137,7 +149,7 @@ if predict_button and uploaded_file:
|
|
| 137 |
|
| 138 |
# Optionally, polish text with AI model for better language flow
|
| 139 |
if model_choice in ["GOT_CPU", "GOT_GPU"]:
|
| 140 |
-
polished_text = polish_text_with_ai(cleaned_text
|
| 141 |
else:
|
| 142 |
polished_text = cleaned_text
|
| 143 |
|
|
|
|
| 9 |
import tempfile
|
| 10 |
import os
|
| 11 |
import re
|
| 12 |
+
from groq import Groq
|
| 13 |
|
| 14 |
# Page configuration
|
| 15 |
+
st.set_page_config(page_title="DualTextOCRFusion", page_icon="🔍", layout="wide")
|
| 16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 17 |
|
| 18 |
# Load Surya OCR Models (English + Hindi)
|
|
|
|
| 49 |
return cleaned_text
|
| 50 |
|
| 51 |
# Polish the text using a model
|
| 52 |
+
def polish_text_with_ai(cleaned_text):
|
|
|
|
|
|
|
|
|
|
| 53 |
prompt = f"Correct and clean the following text: '{cleaned_text}' and make it meaningful."
|
| 54 |
+
client = Groq(api_key="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")
|
| 55 |
+
chat_completion = client.chat.completions.create(
|
| 56 |
+
messages=[
|
| 57 |
+
{
|
| 58 |
+
"role": "system",
|
| 59 |
+
"content": "You are a meaningful sentence pedantic, you remove extra spaces in between words and word to make the sentence meaningful in English/Hindi/Hinglish according to the sentence."
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"role": "user",
|
| 63 |
+
"content": prompt,
|
| 64 |
+
}
|
| 65 |
+
],
|
| 66 |
+
model="gemma2-9b-it",
|
| 67 |
+
)
|
| 68 |
+
polished_text=(chat_completion.choices[0].message.content
|
| 69 |
return polished_text
|
| 70 |
|
| 71 |
# Extract text using GOT
|
|
|
|
| 149 |
|
| 150 |
# Optionally, polish text with AI model for better language flow
|
| 151 |
if model_choice in ["GOT_CPU", "GOT_GPU"]:
|
| 152 |
+
polished_text = polish_text_with_ai(cleaned_text)
|
| 153 |
else:
|
| 154 |
polished_text = cleaned_text
|
| 155 |
|