UniquePratham commited on
Commit
a0652de
β€’
1 Parent(s): aba3166

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -54
app.py CHANGED
@@ -1,80 +1,73 @@
1
- # app.py
2
-
3
  import streamlit as st
4
- from ocr_cpu import extract_text_got, clean_text_with_ai # Import OCR and AI cleaning functions
5
  import json
6
 
7
- # --- UI Styling ---
8
- st.set_page_config(page_title="DualTextOCRFusion", layout="centered", page_icon="πŸ”")
9
 
10
  st.markdown(
11
  """
12
  <style>
13
- .reportview-container {
14
- background: #f4f4f4;
15
- }
16
- .sidebar .sidebar-content {
17
- background: #e0e0e0;
18
- }
19
- h1 {
20
- color: #007BFF;
21
- }
22
- .upload-btn {
23
- background-color: #007BFF;
24
- color: white;
25
- padding: 10px;
26
- border-radius: 5px;
27
- text-align: center;
28
- }
29
  </style>
30
- """,
31
- unsafe_allow_html=True
32
  )
33
 
34
- # --- Title ---
35
- st.title("πŸ” DualTextOCRFusion")
36
- st.write("Upload an image with **Hindi**, **English**, or **Hinglish** text to extract and clean text for keyword search.")
37
 
38
  # --- Image Upload Section ---
39
- uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])
 
 
 
 
40
 
41
  if uploaded_file is not None:
42
  st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)
43
 
44
- # Extract text from the image using GOT OCR function
45
- with st.spinner("Extracting text from the image..."):
46
  try:
47
- extracted_text = extract_text_got(uploaded_file) # Use GOT OCR to extract text
 
 
 
 
 
 
 
48
  if not extracted_text.strip():
49
- st.warning("No text extracted from the image.")
50
- cleaned_text = ""
51
  else:
52
- # Clean the extracted text using AI
53
- with st.spinner("Cleaning the extracted text using AI..."):
54
- cleaned_text = clean_text_with_ai(extracted_text)
55
- if cleaned_text.startswith("Error"):
56
- st.error(cleaned_text)
57
- else:
58
- st.success("Text extraction and cleaning successful.")
59
  except Exception as e:
60
  st.error(f"Error during text extraction: {str(e)}")
61
- extracted_text = cleaned_text = ""
62
 
63
- # Display cleaned text
64
- if cleaned_text and not cleaned_text.startswith("Error"):
65
- st.subheader("Cleaned Extracted Text")
66
- st.text_area("Cleaned Text", cleaned_text, height=250)
67
 
68
- # Save cleaned text for search
 
 
 
 
69
  with open("extracted_text.json", "w") as json_file:
70
- json.dump({"text": cleaned_text}, json_file)
71
 
72
- # --- Keyword Search ---
73
- st.subheader("Search for Keywords")
74
- keyword = st.text_input("Enter a keyword to search in the cleaned text")
75
 
76
- if keyword:
77
- if keyword.lower() in cleaned_text.lower():
78
- st.success(f"Keyword **'{keyword}'** found in the text!")
79
- else:
80
- st.error(f"Keyword **'{keyword}'** not found.")
 
 
 
1
  import streamlit as st
2
+ from ocr_cpu import extract_text_got, extract_text_qwen, extract_text_llama, clean_extracted_text
3
  import json
4
 
5
+ # Set up page layout and styling
6
+ st.set_page_config(page_title="MultiModel OCR Fusion", layout="centered", page_icon="πŸ“„")
7
 
8
  st.markdown(
9
  """
10
  <style>
11
+ .reportview-container { background: #f4f4f4; }
12
+ .sidebar .sidebar-content { background: #e0e0e0; }
13
+ h1 { color: #007BFF; }
14
+ .upload-btn { background-color: #007BFF; color: white; padding: 10px; border-radius: 5px; text-align: center; }
 
 
 
 
 
 
 
 
 
 
 
 
15
  </style>
16
+ """, unsafe_allow_html=True
 
17
  )
18
 
19
+ # --- Title Section ---
20
+ st.title("πŸ“„ MultiModel OCR Fusion")
21
+ st.write("Upload an image to extract and clean text using multiple OCR models (GOT, Qwen, LLaMA).")
22
 
23
  # --- Image Upload Section ---
24
+ uploaded_file = st.file_uploader("Upload an image file", type=["jpg", "jpeg", "png"])
25
+
26
+ # Model selection
27
+ st.sidebar.title("Model Selection")
28
+ model_choice = st.sidebar.selectbox("Choose OCR Model", ("GOT", "Qwen", "LLaMA"))
29
 
30
  if uploaded_file is not None:
31
  st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)
32
 
33
+ # Extract text from the image based on selected model
34
+ with st.spinner(f"Extracting text using the {model_choice} model..."):
35
  try:
36
+ if model_choice == "GOT":
37
+ extracted_text = extract_text_got(uploaded_file)
38
+ elif model_choice == "Qwen":
39
+ extracted_text = extract_text_qwen(uploaded_file)
40
+ elif model_choice == "LLaMA":
41
+ extracted_text = extract_text_llama(uploaded_file)
42
+
43
+ # If no text extracted
44
  if not extracted_text.strip():
45
+ st.warning(f"No text extracted using {model_choice}.")
 
46
  else:
47
+ # Clean the extracted text
48
+ cleaned_text = clean_extracted_text(extracted_text)
 
 
 
 
 
49
  except Exception as e:
50
  st.error(f"Error during text extraction: {str(e)}")
51
+ extracted_text, cleaned_text = "", ""
52
 
53
+ # --- Display Extracted and Cleaned Text ---
54
+ st.subheader(f"Extracted Text using {model_choice}")
55
+ st.text_area(f"Raw Text ({model_choice})", extracted_text, height=200)
 
56
 
57
+ st.subheader("Cleaned Text (AI-processed)")
58
+ st.text_area("Cleaned Text", cleaned_text, height=200)
59
+
60
+ # Save extracted text for further use
61
+ if extracted_text:
62
  with open("extracted_text.json", "w") as json_file:
63
+ json.dump({"text": extracted_text}, json_file)
64
 
65
+ # --- Keyword Search ---
66
+ st.subheader("Search for Keywords")
67
+ keyword = st.text_input("Enter a keyword to search in the extracted text")
68
 
69
+ if keyword:
70
+ if keyword.lower() in cleaned_text.lower():
71
+ st.success(f"Keyword **'{keyword}'** found in the cleaned text!")
72
+ else:
73
+ st.error(f"Keyword **'{keyword}'** not found.")