UniquePratham's picture
Update app.py
a0652de verified
raw
history blame
2.86 kB
import streamlit as st
from ocr_cpu import extract_text_got, extract_text_qwen, extract_text_llama, clean_extracted_text
import json
# Set up page layout and styling
st.set_page_config(page_title="MultiModel OCR Fusion", layout="centered", page_icon="πŸ“„")
st.markdown(
"""
<style>
.reportview-container { background: #f4f4f4; }
.sidebar .sidebar-content { background: #e0e0e0; }
h1 { color: #007BFF; }
.upload-btn { background-color: #007BFF; color: white; padding: 10px; border-radius: 5px; text-align: center; }
</style>
""", unsafe_allow_html=True
)
# --- Title Section ---
st.title("πŸ“„ MultiModel OCR Fusion")
st.write("Upload an image to extract and clean text using multiple OCR models (GOT, Qwen, LLaMA).")
# --- Image Upload Section ---
uploaded_file = st.file_uploader("Upload an image file", type=["jpg", "jpeg", "png"])
# Model selection
st.sidebar.title("Model Selection")
model_choice = st.sidebar.selectbox("Choose OCR Model", ("GOT", "Qwen", "LLaMA"))
if uploaded_file is not None:
st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)
# Extract text from the image based on selected model
with st.spinner(f"Extracting text using the {model_choice} model..."):
try:
if model_choice == "GOT":
extracted_text = extract_text_got(uploaded_file)
elif model_choice == "Qwen":
extracted_text = extract_text_qwen(uploaded_file)
elif model_choice == "LLaMA":
extracted_text = extract_text_llama(uploaded_file)
# If no text extracted
if not extracted_text.strip():
st.warning(f"No text extracted using {model_choice}.")
else:
# Clean the extracted text
cleaned_text = clean_extracted_text(extracted_text)
except Exception as e:
st.error(f"Error during text extraction: {str(e)}")
extracted_text, cleaned_text = "", ""
# --- Display Extracted and Cleaned Text ---
st.subheader(f"Extracted Text using {model_choice}")
st.text_area(f"Raw Text ({model_choice})", extracted_text, height=200)
st.subheader("Cleaned Text (AI-processed)")
st.text_area("Cleaned Text", cleaned_text, height=200)
# Save extracted text for further use
if extracted_text:
with open("extracted_text.json", "w") as json_file:
json.dump({"text": extracted_text}, json_file)
# --- Keyword Search ---
st.subheader("Search for Keywords")
keyword = st.text_input("Enter a keyword to search in the extracted text")
if keyword:
if keyword.lower() in cleaned_text.lower():
st.success(f"Keyword **'{keyword}'** found in the cleaned text!")
else:
st.error(f"Keyword **'{keyword}'** not found.")