Spaces:

impresso-project
/

ocrqa-demo

Running

ocrqa-demo / app.py

maslionok

fist commit

1ee396e 13 days ago

4.51 kB

	import os

	# Redirect cache to a writable path inside container
	os.environ["XDG_CACHE_HOME"] = "/tmp/.cache"

	import gradio as gr
	from impresso_pipelines.ocrqa import OCRQAPipeline

	pipeline = OCRQAPipeline()

	# Example OCR text (German text with typical OCR errors)
	EXAMPLE_TEXT = """Vieles Seltsame geschieht auf Erden :
	Nichts Seltsameres sieht der Mond
	Als das Glück, das im Knopfloch wohnt.
	Zaubrisch faßt es den ernsten Mann.
	Ohne nach Weib u. Kind zu fragen
	Reitet er aus, nach dem Glück zu jagen,
	Nur nacb ihm war stets sein Vegehr.
	Aber neben ihm reitet der Dämon her
	Des Ehrgeizes mit finsterer Tücke,
	Und so jagt er zuletzt auf die Brücke,
	Die über dem Abgrund, d:m nächtlich schwarzen
	Jählings abbricht."""

	def process_ocr_qa(text):
	try:
	result = pipeline(text)

	# Format the output for better readability
	if isinstance(result, dict):
	output_lines = []
	for key, value in result.items():
	if key == 'corrections':
	output_lines.append(f"📝 {key.replace('_', ' ').title()}:")
	if isinstance(value, list) and value:
	for correction in value:
	output_lines.append(f" • {correction}")
	elif isinstance(value, dict) and value:
	for sub_key, sub_value in value.items():
	output_lines.append(f" • {sub_key}: {sub_value}")
	else:
	output_lines.append(f" No corrections found")
	elif key == 'quality_score':
	output_lines.append(f"⭐ Quality Score: {value}")
	elif key == 'processed_text':
	output_lines.append(f"✨ Processed Text:\n{value}")
	else:
	output_lines.append(f"🔍 {key.replace('_', ' ').title()}: {value}")

	return "\n\n".join(output_lines)
	else:
	return f"✨ Processed Result:\n{result}"

	except Exception as e:
	print("❌ Pipeline error:", e)
	return f"Error: {e}"

	# Create the interface with logo and improved description
	with gr.Blocks(title="OCR QA Demo") as demo:
	# Add logo at the top
	gr.Image("logo.jpeg", label=None, show_label=False, container=False, height=100)

	gr.Markdown(
	"""
	# 🔍 OCR Quality Assessment Pipeline Demo

	OCR Quality Assessment demonstrates how text extracted from OCR (Optical Character Recognition)
	is analyzed and improved in the Impresso project. This pipeline identifies OCR errors,
	assesses text quality, and provides corrections for better text processing.

	Try the example below (German text with typical OCR errors) or enter your own OCR text to see how it gets processed!
	"""
	)

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	label="Enter OCR Text",
	value=EXAMPLE_TEXT,
	lines=8,
	placeholder="Enter your OCR text here..."
	)
	submit_btn = gr.Button("🔍 Analyze OCR Quality", variant="primary")

	with gr.Column():
	with gr.Row():
	output = gr.Textbox(
	label="Analysis Results",
	lines=15,
	placeholder="Results will appear here...",
	scale=10
	)
	info_btn = gr.Button("Pipeline Info", size="sm", scale=1)

	# Info modal/accordion for pipeline details
	with gr.Accordion("📝 About the OCR QA Pipeline", open=False, visible=False) as info_accordion:
	gr.Markdown(
	"""
	- OCR Error Detection: Identifies common OCR mistakes and artifacts
	- Quality Assessment: Evaluates the overall quality of OCR text
	- Text Correction: Suggests improvements for detected errors
	- Language Processing: Handles multilingual OCR text processing
	"""
	)

	submit_btn.click(
	fn=process_ocr_qa,
	inputs=[text_input],
	outputs=output
	)

	# Toggle info visibility when info button is clicked
	info_btn.click(
	fn=lambda: gr.Accordion(visible=True, open=True),
	outputs=info_accordion
	)

	demo.launch(server_name="0.0.0.0", server_port=7860)