Spaces:
Running
Running
import os | |
# Redirect cache to a writable path inside container | |
os.environ["XDG_CACHE_HOME"] = "/tmp/.cache" | |
import gradio as gr | |
from impresso_pipelines.ocrqa import OCRQAPipeline | |
pipeline = OCRQAPipeline() | |
# Example OCR text (German text with typical OCR errors) | |
EXAMPLE_TEXT = """Vieles Seltsame geschieht auf Erden : | |
Nichts Seltsameres sieht der Mond | |
Als das Glück, das im Knopfloch wohnt. | |
Zaubrisch faßt es den ernsten Mann. | |
Ohne nach Weib u. Kind zu fragen | |
Reitet er aus, nach dem Glück zu jagen, | |
Nur nacb ihm war stets sein Vegehr. | |
Aber neben ihm reitet der Dämon her | |
Des Ehrgeizes mit finsterer Tücke, | |
Und so jagt er zuletzt auf die Brücke, | |
Die über dem Abgrund, d:m nächtlich schwarzen | |
Jählings abbricht.""" | |
def process_ocr_qa(text): | |
try: | |
result = pipeline(text) | |
# Format the output for better readability | |
if isinstance(result, dict): | |
output_lines = [] | |
for key, value in result.items(): | |
if key == 'corrections': | |
output_lines.append(f"📝 **{key.replace('_', ' ').title()}:**") | |
if isinstance(value, list) and value: | |
for correction in value: | |
output_lines.append(f" • {correction}") | |
elif isinstance(value, dict) and value: | |
for sub_key, sub_value in value.items(): | |
output_lines.append(f" • {sub_key}: {sub_value}") | |
else: | |
output_lines.append(f" No corrections found") | |
elif key == 'quality_score': | |
output_lines.append(f"⭐ **Quality Score:** {value}") | |
elif key == 'processed_text': | |
output_lines.append(f"✨ **Processed Text:**\n{value}") | |
else: | |
output_lines.append(f"🔍 **{key.replace('_', ' ').title()}:** {value}") | |
return "\n\n".join(output_lines) | |
else: | |
return f"✨ **Processed Result:**\n{result}" | |
except Exception as e: | |
print("❌ Pipeline error:", e) | |
return f"Error: {e}" | |
# Create the interface with logo and improved description | |
with gr.Blocks(title="OCR QA Demo") as demo: | |
# Add logo at the top | |
gr.Image("logo.jpeg", label=None, show_label=False, container=False, height=100) | |
gr.Markdown( | |
""" | |
# 🔍 OCR Quality Assessment Pipeline Demo | |
**OCR Quality Assessment** demonstrates how text extracted from OCR (Optical Character Recognition) | |
is analyzed and improved in the **Impresso** project. This pipeline identifies OCR errors, | |
assesses text quality, and provides corrections for better text processing. | |
Try the example below (German text with typical OCR errors) or enter your own OCR text to see how it gets processed! | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
text_input = gr.Textbox( | |
label="Enter OCR Text", | |
value=EXAMPLE_TEXT, | |
lines=8, | |
placeholder="Enter your OCR text here..." | |
) | |
submit_btn = gr.Button("🔍 Analyze OCR Quality", variant="primary") | |
with gr.Column(): | |
with gr.Row(): | |
output = gr.Textbox( | |
label="Analysis Results", | |
lines=15, | |
placeholder="Results will appear here...", | |
scale=10 | |
) | |
info_btn = gr.Button("Pipeline Info", size="sm", scale=1) | |
# Info modal/accordion for pipeline details | |
with gr.Accordion("📝 About the OCR QA Pipeline", open=False, visible=False) as info_accordion: | |
gr.Markdown( | |
""" | |
- **OCR Error Detection**: Identifies common OCR mistakes and artifacts | |
- **Quality Assessment**: Evaluates the overall quality of OCR text | |
- **Text Correction**: Suggests improvements for detected errors | |
- **Language Processing**: Handles multilingual OCR text processing | |
""" | |
) | |
submit_btn.click( | |
fn=process_ocr_qa, | |
inputs=[text_input], | |
outputs=output | |
) | |
# Toggle info visibility when info button is clicked | |
info_btn.click( | |
fn=lambda: gr.Accordion(visible=True, open=True), | |
outputs=info_accordion | |
) | |
demo.launch(server_name="0.0.0.0", server_port=7860) |