maslionok commited on
Commit
1f2982b
Β·
1 Parent(s): ac94924
Files changed (1) hide show
  1. app.py +36 -17
app.py CHANGED
@@ -24,27 +24,46 @@ JΓ€hlings abbricht."""
24
 
25
  def process_ocr_qa(text):
26
  try:
27
- result = pipeline(text)
28
-
29
  # Format the output for better readability
30
  if isinstance(result, dict):
31
  output_lines = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  for key, value in result.items():
33
- if key == 'corrections':
34
- output_lines.append(f"πŸ“ **{key.replace('_', ' ').title()}:**")
35
- if isinstance(value, list) and value:
36
- for correction in value:
37
- output_lines.append(f" β€’ {correction}")
38
- elif isinstance(value, dict) and value:
39
- for sub_key, sub_value in value.items():
40
- output_lines.append(f" β€’ {sub_key}: {sub_value}")
41
- else:
42
- output_lines.append(f" No corrections found")
43
- elif key == 'quality_score':
44
- output_lines.append(f"⭐ **Quality Score:** {value}")
45
- elif key == 'processed_text':
46
- output_lines.append(f"✨ **Processed Text:**\n{value}")
47
- else:
48
  output_lines.append(f"πŸ” **{key.replace('_', ' ').title()}:** {value}")
49
 
50
  return "\n\n".join(output_lines)
 
24
 
25
  def process_ocr_qa(text):
26
  try:
27
+ result = pipeline(text, diagnostics=True)
28
+
29
  # Format the output for better readability
30
  if isinstance(result, dict):
31
  output_lines = []
32
+
33
+ # Language detection
34
+ if 'language' in result:
35
+ output_lines.append(f"🌍 **Language:** {result['language']}")
36
+
37
+ # Quality score
38
+ if 'score' in result:
39
+ score = result['score']
40
+ score_emoji = "🟒" if score >= 0.8 else "🟑" if score >= 0.5 else "πŸ”΄"
41
+ output_lines.append(f"{score_emoji} **Quality Score:** {score:.2f}")
42
+
43
+ # Diagnostics section
44
+ if 'diagnostics' in result and result['diagnostics']:
45
+ diagnostics = result['diagnostics']
46
+ output_lines.append("πŸ“Š **Detailed Analysis:**")
47
+
48
+ # Model information
49
+ if 'model_id' in diagnostics:
50
+ output_lines.append(f" πŸ€– Model: {diagnostics['model_id']}")
51
+
52
+ # Known tokens
53
+ if 'known_tokens' in diagnostics and diagnostics['known_tokens']:
54
+ output_lines.append(f" βœ… Known tokens ({len(diagnostics['known_tokens'])}): {', '.join(diagnostics['known_tokens'][:10])}")
55
+ if len(diagnostics['known_tokens']) > 10:
56
+ output_lines.append(f" ... and {len(diagnostics['known_tokens']) - 10} more")
57
+
58
+ # Unknown tokens (potential OCR errors)
59
+ if 'unknown_tokens' in diagnostics and diagnostics['unknown_tokens']:
60
+ output_lines.append(f" ❌ Potential OCR errors ({len(diagnostics['unknown_tokens'])}): {', '.join(diagnostics['unknown_tokens'])}")
61
+ elif 'unknown_tokens' in diagnostics:
62
+ output_lines.append(" ✨ No potential OCR errors detected!")
63
+
64
+ # Other fields
65
  for key, value in result.items():
66
+ if key not in ['language', 'score', 'diagnostics']:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  output_lines.append(f"πŸ” **{key.replace('_', ' ').title()}:** {value}")
68
 
69
  return "\n\n".join(output_lines)