Spaces:

impresso-project
/

ocrqa-demo

Sleeping

App Files Files

xet

Community

maslionok commited on Sep 11

Commit

1ee396e

1 Parent(s): 2d73d72

fist commit

Browse files

Files changed (4) hide show

Dockerfile +9 -0
README.md +34 -11
app.py +118 -0
logo.jpeg +0 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,9 @@

+FROM python:3.9
+WORKDIR /code
+RUN pip install --no-cache-dir --upgrade gradio==4.44.0 impresso-pipelines[ocrqa]==0.4.6.6
+COPY . /code
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,11 +1,34 @@
----
-title: Ocrqa Demo
-emoji: 😻
-colorFrom: yellow
-colorTo: blue
-sdk: docker
-pinned: false
-short_description: Demo of ocrqa pipeline from impresso pipelines
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# OCR Quality Assessment Pipeline Demo
+This demo showcases the **OCR Quality Assessment Pipeline** from the Impresso project, which analyzes and improves text extracted from OCR (Optical Character Recognition).
+## Features
+- **OCR Error Detection**: Identifies common OCR mistakes and artifacts
+- **Quality Assessment**: Evaluates the overall quality of OCR text
+- **Text Correction**: Suggests improvements for detected errors
+- **Interactive Interface**: User-friendly Gradio web interface
+## Usage
+The demo accepts OCR text input and provides:
+- Quality assessment scores
+- Detected OCR errors
+- Suggested corrections
+- Processed/improved text
+## Example
+Try the provided German text example that contains typical OCR errors like:
+- Character misrecognition (e.g., "Zaubrisch" instead of "Zauberisch")
+- Spacing issues (e.g., "nacb" instead of "nach")
+- Punctuation errors (e.g., "d:m" instead of "dem")
+## Installation
+```bash
+pip install -r requirements.txt
+python app.py
+```
+The demo will be available at `http://localhost:7860`

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import os
+# Redirect cache to a writable path inside container
+os.environ["XDG_CACHE_HOME"] = "/tmp/.cache"
+import gradio as gr
+from impresso_pipelines.ocrqa import OCRQAPipeline
+pipeline = OCRQAPipeline()
+# Example OCR text (German text with typical OCR errors)
+EXAMPLE_TEXT = """Vieles Seltsame geschieht auf Erden :
+Nichts Seltsameres sieht der Mond
+Als das Glück, das im Knopfloch wohnt.
+Zaubrisch faßt es den ernsten Mann.
+Ohne nach Weib u. Kind zu fragen
+Reitet er aus, nach dem Glück zu jagen,
+Nur nacb ihm war stets sein Vegehr.
+Aber neben ihm reitet der Dämon her
+Des Ehrgeizes mit finsterer Tücke,
+Und so jagt er zuletzt auf die Brücke,
+Die über dem Abgrund, d:m nächtlich schwarzen
+Jählings abbricht."""
+def process_ocr_qa(text):
+    try:
+        result = pipeline(text)
+        # Format the output for better readability
+        if isinstance(result, dict):
+            output_lines = []
+            for key, value in result.items():
+                if key == 'corrections':
+                    output_lines.append(f"📝 **{key.replace('_', ' ').title()}:**")
+                    if isinstance(value, list) and value:
+                        for correction in value:
+                            output_lines.append(f"  • {correction}")
+                    elif isinstance(value, dict) and value:
+                        for sub_key, sub_value in value.items():
+                            output_lines.append(f"  • {sub_key}: {sub_value}")
+                    else:
+                        output_lines.append(f"  No corrections found")
+                elif key == 'quality_score':
+                    output_lines.append(f"⭐ **Quality Score:** {value}")
+                elif key == 'processed_text':
+                    output_lines.append(f"✨ **Processed Text:**\n{value}")
+                else:
+                    output_lines.append(f"🔍 **{key.replace('_', ' ').title()}:** {value}")
+            return "\n\n".join(output_lines)
+        else:
+            return f"✨ **Processed Result:**\n{result}"
+    except Exception as e:
+        print("❌ Pipeline error:", e)
+        return f"Error: {e}"
+# Create the interface with logo and improved description
+with gr.Blocks(title="OCR QA Demo") as demo:
+    # Add logo at the top
+    gr.Image("logo.jpeg", label=None, show_label=False, container=False, height=100)
+    gr.Markdown(
+        """
+        # 🔍 OCR Quality Assessment Pipeline Demo
+        **OCR Quality Assessment** demonstrates how text extracted from OCR (Optical Character Recognition)
+        is analyzed and improved in the **Impresso** project. This pipeline identifies OCR errors,
+        assesses text quality, and provides corrections for better text processing.
+        Try the example below (German text with typical OCR errors) or enter your own OCR text to see how it gets processed!
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(
+                label="Enter OCR Text",
+                value=EXAMPLE_TEXT,
+                lines=8,
+                placeholder="Enter your OCR text here..."
+            )
+            submit_btn = gr.Button("🔍 Analyze OCR Quality", variant="primary")
+        with gr.Column():
+            with gr.Row():
+                output = gr.Textbox(
+                    label="Analysis Results",
+                    lines=15,
+                    placeholder="Results will appear here...",
+                    scale=10
+                )
+                info_btn = gr.Button("Pipeline Info", size="sm", scale=1)
+    # Info modal/accordion for pipeline details
+    with gr.Accordion("📝 About the OCR QA Pipeline", open=False, visible=False) as info_accordion:
+        gr.Markdown(
+            """
+            - **OCR Error Detection**: Identifies common OCR mistakes and artifacts
+            - **Quality Assessment**: Evaluates the overall quality of OCR text
+            - **Text Correction**: Suggests improvements for detected errors
+            - **Language Processing**: Handles multilingual OCR text processing
+            """
+        )
+    submit_btn.click(
+        fn=process_ocr_qa,
+        inputs=[text_input],
+        outputs=output
+    )
+    # Toggle info visibility when info button is clicked
+    info_btn.click(
+        fn=lambda: gr.Accordion(visible=True, open=True),
+        outputs=info_accordion
+    )
+demo.launch(server_name="0.0.0.0", server_port=7860)

logo.jpeg ADDED Viewed