UniquePratham commited on
Commit
8308624
1 Parent(s): 7297505

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -29
app.py CHANGED
@@ -107,9 +107,9 @@ def extract_text_qwen(image_file, model, processor):
107
  # Function to highlight the keyword in the text
108
 
109
 
110
- def highlight_text(text_sentence, start, end):
111
  text_highlighter(
112
- text=text_sentence,
113
  labels=[("KEYWORD", "#0000FF")],
114
  annotations=[
115
  {"start": start, "end": end, "tag": "KEYWORD"},
@@ -165,7 +165,7 @@ if uploaded_file:
165
  images_dir = 'images'
166
  os.makedirs(images_dir, exist_ok=True)
167
  image_path = os.path.join(
168
- images_dir, "temp_file.jpg" if clipboard_use else uploaded_file.name)
169
  with open(image_path, 'wb') as f:
170
  f.write(uploaded_file.getvalue())
171
 
@@ -175,12 +175,21 @@ if uploaded_file:
175
  result_path = os.path.join(
176
  results_dir, "temp_file_result.json" if clipboard_use else f"{uploaded_file.name}_result.json")
177
 
 
 
 
 
 
 
 
178
  # Handle predictions
179
  if predict_button:
180
  if os.path.exists(result_path):
181
  with open(result_path, 'r') as f:
182
  result_data = json.load(f)
183
- extracted_text = result_data["polished_text"]
 
 
184
  else:
185
  with st.spinner("Processing..."):
186
  if model_choice == "GOT_CPU":
@@ -199,30 +208,35 @@ if uploaded_file:
199
  image_path, qwen_model, qwen_processor)
200
 
201
  # Clean and polish extracted text
202
- cleaned_text = clean_extracted_text(extracted_text)
203
- polished_text = polish_text_with_ai(cleaned_text) if model_choice in [
204
- "GOT_CPU", "GOT_GPU"] else cleaned_text
 
205
 
206
  # Save results to JSON file
207
- result_data = {"extracted_text": extracted_text,
208
- "cleaner_text": cleaned_text, "polished_text": polished_text}
209
- with open(result_path, 'w') as f:
210
- json.dump(result_data, f)
211
-
212
- # Display extracted text
213
- st.subheader("Extracted Text (Cleaned & Polished)")
214
- st.markdown(cleaned_text, unsafe_allow_html=True)
215
- st.markdown(polished_text, unsafe_allow_html=True)
216
-
217
- # Input search term with real-time update on key press
218
- search_query = st_keyup("Search in extracted text:")
219
-
220
- if search_query:
221
- index = extracted_text.find(search_query)
222
- start = index
223
- len = search_query.length
224
- end = index + len
225
- if index != -1:
226
- highlight_text(extracted_text, start, end)
227
- else:
228
- st.write("No Search Found.")
 
 
 
 
 
107
  # Function to highlight the keyword in the text
108
 
109
 
110
+ def highlight_text(cleaned_text, start, end):
111
  text_highlighter(
112
+ text=cleaned_text,
113
  labels=[("KEYWORD", "#0000FF")],
114
  annotations=[
115
  {"start": start, "end": end, "tag": "KEYWORD"},
 
165
  images_dir = 'images'
166
  os.makedirs(images_dir, exist_ok=True)
167
  image_path = os.path.join(
168
+ images_dir, "temp_file.png" if clipboard_use else uploaded_file.name)
169
  with open(image_path, 'wb') as f:
170
  f.write(uploaded_file.getvalue())
171
 
 
175
  result_path = os.path.join(
176
  results_dir, "temp_file_result.json" if clipboard_use else f"{uploaded_file.name}_result.json")
177
 
178
+ # Display extracted text
179
+ st.subheader("Extracted Text (Cleaned & Polished)")
180
+ if 'cleaned_text' not in st.session_state:
181
+ st.session_state.cleaned_text = ""
182
+ if 'polished_text' not in st.session_state:
183
+ st.session_state.polished_text = ""
184
+
185
  # Handle predictions
186
  if predict_button:
187
  if os.path.exists(result_path):
188
  with open(result_path, 'r') as f:
189
  result_data = json.load(f)
190
+ extracted_text = result_data["extracted_text"]
191
+ cleaned_text = result_data["cleaned_text"]
192
+ polished_text = result_data["polished_text"]
193
  else:
194
  with st.spinner("Processing..."):
195
  if model_choice == "GOT_CPU":
 
208
  image_path, qwen_model, qwen_processor)
209
 
210
  # Clean and polish extracted text
211
+ if not cleaned_text and polished_text:
212
+ cleaned_text = clean_extracted_text(extracted_text)
213
+ polished_text = polish_text_with_ai(cleaned_text) if model_choice in [
214
+ "GOT_CPU", "GOT_GPU"] else cleaned_text
215
 
216
  # Save results to JSON file
217
+ if not os.path.exists(result_path):
218
+ result_data = {"extracted_text": extracted_text,
219
+ "cleaned_text": cleaned_text, "polished_text": polished_text}
220
+ with open(result_path, 'w') as f:
221
+ json.dump(result_data, f)
222
+
223
+ # Save results to session state
224
+ st.session_state.cleaned_text = cleaned_text
225
+ st.session_state.polished_text = polished_text
226
+
227
+ # Display extracted text
228
+ st.markdown(st.session_state.cleaned_text, unsafe_allow_html=True)
229
+ st.markdown(st.session_state.polished_text, unsafe_allow_html=True)
230
+
231
+ # Input search term with real-time update on key press
232
+ search_query = st_keyup("Search in extracted text:")
233
+
234
+ if search_query:
235
+ index = st.session_state.cleaned_text.find(search_query)
236
+ start = index
237
+ len = search_query.length
238
+ end = index + len
239
+ if index != -1:
240
+ highlight_text(st.session_state.cleaned_text, start, end)
241
+ else:
242
+ st.write("No Search Found.")