om440 om2044 commited on
Commit
bc15e88
·
verified ·
1 Parent(s): f7d34c1

Update app.py (#1)

Browse files

- Update app.py (1989a91dbcd13dd5d6e6005a44dc159603bd1221)


Co-authored-by: Ait Bouhmad Omar <[email protected]>

Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -58,7 +58,18 @@ def extract_text(image):
58
  result = result[result.lower().find("assistant") + len("assistant"):].strip()#hh
59
 
60
  # Remove any remaining conversation markers
61
- result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  print(result)
64
 
 
58
  result = result[result.lower().find("assistant") + len("assistant"):].strip()#hh
59
 
60
  # Remove any remaining conversation markers
61
+ result = result.replace("user", "").replace("Output ONLY the raw text as it appears in the image, nothing else."
62
+ "You have an image containing both handwritten and printed text in French and/or English, and also punctuation and underscores.\n"
63
+ "Your task: transcribe EXACTLY all visible text, preserving all characters, accents, punctuation, spacing, and line breaks.\n"
64
+ "Include tables and forms clearly if present.\n"
65
+ "Do NOT add any explanations, comments, summaries, or extra text.\n"
66
+ "Check the output first to not duplicate results."
67
+ "Preserve the original reading order, including line breaks and the natural layout of tables or forms. Output the text exactly as it appears visually, maintaining the structure."
68
+ "Don't indicate blank space."
69
+ "Don't separate handwritten and printed text."
70
+ "DO NOT confuse between '.' a point and '|' a border."
71
+ "Extract only the raw text and do not add any comment."
72
+ "Extract only the data available.", "").strip()
73
 
74
  print(result)
75