import streamlit as st from ocr_cpu import extract_text_got, extract_text_qwen, extract_text_llama, clean_extracted_text import json # Set up page layout and styling st.set_page_config(page_title="MultiModel OCR Fusion", layout="centered", page_icon="📄") st.markdown( """ """, unsafe_allow_html=True ) # --- Title Section --- st.title("📄 MultiModel OCR Fusion") st.write("Upload an image to extract and clean text using multiple OCR models (GOT, Qwen, LLaMA).") # --- Image Upload Section --- uploaded_file = st.file_uploader("Upload an image file", type=["jpg", "jpeg", "png"]) # Model selection st.sidebar.title("Model Selection") model_choice = st.sidebar.selectbox("Choose OCR Model", ("GOT", "Qwen", "LLaMA")) if uploaded_file is not None: st.image(uploaded_file, caption='Uploaded Image', use_column_width=True) # Extract text from the image based on selected model with st.spinner(f"Extracting text using the {model_choice} model..."): try: if model_choice == "GOT": extracted_text = extract_text_got(uploaded_file) elif model_choice == "Qwen": extracted_text = extract_text_qwen(uploaded_file) elif model_choice == "LLaMA": extracted_text = extract_text_llama(uploaded_file) # If no text extracted if not extracted_text.strip(): st.warning(f"No text extracted using {model_choice}.") else: # Clean the extracted text cleaned_text = clean_extracted_text(extracted_text) except Exception as e: st.error(f"Error during text extraction: {str(e)}") extracted_text, cleaned_text = "", "" # --- Display Extracted and Cleaned Text --- st.subheader(f"Extracted Text using {model_choice}") st.text_area(f"Raw Text ({model_choice})", extracted_text, height=200) st.subheader("Cleaned Text (AI-processed)") st.text_area("Cleaned Text", cleaned_text, height=200) # Save extracted text for further use if extracted_text: with open("extracted_text.json", "w") as json_file: json.dump({"text": extracted_text}, json_file) # --- Keyword Search --- st.subheader("Search for Keywords") keyword = st.text_input("Enter a keyword to search in the extracted text") if keyword: if keyword.lower() in cleaned_text.lower(): st.success(f"Keyword **'{keyword}'** found in the cleaned text!") else: st.error(f"Keyword **'{keyword}'** not found.")