Spaces:
Running
Running
UniquePratham
commited on
Commit
โข
42cb48e
1
Parent(s):
4a67835
Update app.py
Browse files
app.py
CHANGED
@@ -9,9 +9,10 @@ import torch
|
|
9 |
import tempfile
|
10 |
import os
|
11 |
import re
|
|
|
12 |
|
13 |
# Page configuration
|
14 |
-
st.set_page_config(page_title="
|
15 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
|
17 |
# Load Surya OCR Models (English + Hindi)
|
@@ -48,12 +49,23 @@ def clean_extracted_text(text):
|
|
48 |
return cleaned_text
|
49 |
|
50 |
# Polish the text using a model
|
51 |
-
def polish_text_with_ai(cleaned_text
|
52 |
-
# Use a text generation pipeline for better language flow
|
53 |
-
from transformers import pipeline
|
54 |
-
nlp = pipeline('text-generation', model=model, tokenizer=tokenizer)
|
55 |
prompt = f"Correct and clean the following text: '{cleaned_text}' and make it meaningful."
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
return polished_text
|
58 |
|
59 |
# Extract text using GOT
|
@@ -137,7 +149,7 @@ if predict_button and uploaded_file:
|
|
137 |
|
138 |
# Optionally, polish text with AI model for better language flow
|
139 |
if model_choice in ["GOT_CPU", "GOT_GPU"]:
|
140 |
-
polished_text = polish_text_with_ai(cleaned_text
|
141 |
else:
|
142 |
polished_text = cleaned_text
|
143 |
|
|
|
9 |
import tempfile
|
10 |
import os
|
11 |
import re
|
12 |
+
from groq import Groq
|
13 |
|
14 |
# Page configuration
|
15 |
+
st.set_page_config(page_title="DualTextOCRFusion", page_icon="๐", layout="wide")
|
16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
17 |
|
18 |
# Load Surya OCR Models (English + Hindi)
|
|
|
49 |
return cleaned_text
|
50 |
|
51 |
# Polish the text using a model
|
52 |
+
def polish_text_with_ai(cleaned_text):
|
|
|
|
|
|
|
53 |
prompt = f"Correct and clean the following text: '{cleaned_text}' and make it meaningful."
|
54 |
+
client = Groq(api_key="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")
|
55 |
+
chat_completion = client.chat.completions.create(
|
56 |
+
messages=[
|
57 |
+
{
|
58 |
+
"role": "system",
|
59 |
+
"content": "You are a meaningful sentence pedantic, you remove extra spaces in between words and word to make the sentence meaningful in English/Hindi/Hinglish according to the sentence."
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"role": "user",
|
63 |
+
"content": prompt,
|
64 |
+
}
|
65 |
+
],
|
66 |
+
model="gemma2-9b-it",
|
67 |
+
)
|
68 |
+
polished_text=(chat_completion.choices[0].message.content
|
69 |
return polished_text
|
70 |
|
71 |
# Extract text using GOT
|
|
|
149 |
|
150 |
# Optionally, polish text with AI model for better language flow
|
151 |
if model_choice in ["GOT_CPU", "GOT_GPU"]:
|
152 |
+
polished_text = polish_text_with_ai(cleaned_text)
|
153 |
else:
|
154 |
polished_text = cleaned_text
|
155 |
|