Spaces:

VinayHajare
/

OCRHub

Sleeping

App Files Files Community

VinayHajare commited on Dec 31, 2024

Commit

34bb902

verified ·

1 Parent(s): 8a3322f

Create app.py

Browse files

Files changed (1) hide show

app.py +250 -0

app.py ADDED Viewed

	@@ -0,0 +1,250 @@

+import streamlit as st
+from ocr_processor import OCRProcessor
+import tempfile
+import os
+from PIL import Image
+import json
+# Page configuration
+st.set_page_config(
+    page_title="OCR Hub",
+    page_icon="🔍",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS for better UI
+st.markdown("""
+    <style>
+    .stApp {
+        max-width: 100%;
+        padding: 1rem;
+    }
+    .main {
+        background-color: #f8f9fa;
+    }
+    .stButton button {
+        width: 100%;
+        border-radius: 5px;
+        height: 3em;
+        background-color: #4CAF50;
+        color: white;
+    }
+    .stSelectbox {
+        margin-bottom: 1rem;
+    }
+    .upload-text {
+        text-align: center;
+        padding: 2rem;
+        border: 2px dashed #ccc;
+        border-radius: 10px;
+        background-color: #ffffff;
+    }
+    .stImage {
+        border-radius: 10px;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    }
+    .gallery {
+        display: grid;
+        grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
+        gap: 1rem;
+        padding: 1rem;
+    }
+    .gallery-item {
+        border: 1px solid #ddd;
+        border-radius: 8px;
+        padding: 0.5rem;
+        background: white;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+def get_available_models():
+    return ["llava:7b", "MiniCPM-V","llama3.2-vision:11b"]
+def process_single_image(processor, image_path, format_type, enable_preprocessing):
+    """Process a single image and return the result"""
+    try:
+        result = processor.process_image(
+            image_path=image_path,
+            format_type=format_type,
+            preprocess=enable_preprocessing
+        )
+        return result
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
+def process_batch_images(processor, image_paths, format_type, enable_preprocessing):
+    """Process multiple images and return results"""
+    try:
+        results = processor.process_batch(
+            input_path=image_paths,
+            format_type=format_type,
+            preprocess=enable_preprocessing
+        )
+        return results
+    except Exception as e:
+        return {"error": str(e)}
+def main():
+    st.title("🔍 OCR Hub")
+    st.markdown("<p style='text-align: center; color: #666;'>Powered by Ollama Vision Models</p>", unsafe_allow_html=True)
+    # Sidebar controls
+    with st.sidebar:
+        st.header("🎮 Controls")
+        selected_model = st.selectbox(
+            "🤖 Select Vision Model",
+            get_available_models(),
+            index=0,
+        )
+        format_type = st.selectbox(
+            "📄 Output Format",
+            ["markdown", "text", "json", "structured", "key_value"],
+            help="Choose how you want the extracted text to be formatted"
+        )
+        max_workers = st.slider(
+            "🔄 Parallel Processing",
+            min_value=1,
+            max_value=8,
+            value=2,
+            help="Number of images to process in parallel (for batch processing)"
+        )
+        enable_preprocessing = st.checkbox(
+            "🔍 Enable Preprocessing",
+            value=True,
+            help="Apply image enhancement and preprocessing"
+        )
+        st.markdown("---")
+        # Model info box
+        if selected_model == "llava:7b":
+            st.info("LLaVA 7B: Efficient vision-language model optimized for real-time processing")
+        elif selected_model == "MiniCPM-V":
+            st.info("MiniCPM-V 2.6: A GPT-4V Level MLLM for Single Image, Multi Image and Video, outperforms GPT-4o mini, Gemini 1.5 Pro and Claude 3.5 Sonnet")
+        else:
+            st.info("Llama 3.2 Vision: Advanced model with high accuracy for complex text extraction")
+    # Initialize OCR Processor
+    processor = OCRProcessor(model_name=selected_model, max_workers=max_workers)
+    # Main content area with tabs
+    tab1, tab2 = st.tabs(["📸 Image Processing", "ℹ️ About"])
+    with tab1:
+        # File upload area with multiple file support
+        uploaded_files = st.file_uploader(
+            "Drop your images here",
+            type=['png', 'jpg', 'jpeg', 'tiff', 'bmp', 'pdf'],
+            accept_multiple_files=True,
+            help="Supported formats: PNG, JPG, JPEG, TIFF, BMP, PDF"
+        )
+        if uploaded_files:
+            # Create a temporary directory for uploaded files
+            with tempfile.TemporaryDirectory() as temp_dir:
+                image_paths = []
+                # Save uploaded files and collect paths
+                for uploaded_file in uploaded_files:
+                    temp_path = os.path.join(temp_dir, uploaded_file.name)
+                    with open(temp_path, "wb") as f:
+                        f.write(uploaded_file.getvalue())
+                    image_paths.append(temp_path)
+                # Display images in a gallery
+                st.subheader(f"📸 Input Images ({len(uploaded_files)} files)")
+                cols = st.columns(min(len(uploaded_files), 4))
+                for idx, uploaded_file in enumerate(uploaded_files):
+                    with cols[idx % 4]:
+                        image = Image.open(uploaded_file)
+                        st.image(image, use_container_width=True, caption=uploaded_file.name)
+                # Process button
+                if st.button("🚀 Process Images"):
+                    with st.spinner("Processing images..."):
+                        if len(image_paths) == 1:
+                            # Single image processing
+                            result = process_single_image(
+                                processor,
+                                image_paths[0],
+                                format_type,
+                                enable_preprocessing
+                            )
+                            st.subheader("📝 Extracted Text")
+                            st.markdown(result)
+                            # Download button for single result
+                            st.download_button(
+                                "📥 Download Result",
+                                result,
+                                file_name=f"ocr_result.{format_type}",
+                                mime="text/plain"
+                            )
+                        else:
+                            # Batch processing
+                            results = process_batch_images(
+                                processor,
+                                image_paths,
+                                format_type,
+                                enable_preprocessing
+                            )
+                            # Display statistics
+                            st.subheader("📊 Processing Statistics")
+                            col1, col2, col3 = st.columns(3)
+                            with col1:
+                                st.metric("Total Images", results['statistics']['total'])
+                            with col2:
+                                st.metric("Successful", results['statistics']['successful'])
+                            with col3:
+                                st.metric("Failed", results['statistics']['failed'])
+                            # Display results
+                            st.subheader("📝 Extracted Text")
+                            for file_path, text in results['results'].items():
+                                with st.expander(f"Result: {os.path.basename(file_path)}"):
+                                    st.markdown(text)
+                            # Display errors if any
+                            if results['errors']:
+                                st.error("⚠️ Some files had errors:")
+                                for file_path, error in results['errors'].items():
+                                    st.warning(f"{os.path.basename(file_path)}: {error}")
+                            # Download all results as JSON
+                            if st.button("📥 Download All Results"):
+                                json_results = json.dumps(results, indent=2)
+                                st.download_button(
+                                    "📥 Download Results JSON",
+                                    json_results,
+                                    file_name="ocr_results.json",
+                                    mime="application/json"
+                                )
+    with tab2:
+        st.header("About OCR Hub")
+        st.markdown("""
+        This application uses state-of-the-art vision language models through Ollama to extract text from images.
+        ### Features:
+        - 🖼️ Support for multiple image formats
+        - 📦 Batch processing capability
+        - 🔄 Parallel processing
+        - 🔍 Image preprocessing and enhancement
+        - 📊 Multiple output formats
+        - 📥 Easy result download
+        ### Models:
+        - **LLaVA 7B**: Efficient vision-language model for real-time processing
+        - **Llama 3.2 Vision**: Advanced model with high accuracy for complex documents
+        - **MiniCPM-V 2.6**: Process images with any aspect ratio and up to 1.8 million pixels (e.g., 1344x1344)
+        """)
+if __name__ == "__main__":
+    main()