Spaces:

Emeritus-21
/

handwritten-text-recognition

Running on Zero

Emeritus-21 commited on 11 days ago

Commit

d11f062

verified ·

1 Parent(s): f3b428e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # app.py — HTR Space with Feedback Loop, Memory Post-Correction, and GRPO Export
 import os, time, json, hashlib, difflib, uuid, csv
 from datetime import datetime
 from collections import Counter, defaultdict
@@ -71,9 +72,10 @@ def _build_inputs(processor, tokenizer, image: Image.Image, prompt: str):
     if tokenizer and hasattr(tokenizer, "apply_chat_template"):
         chat_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        return processor(text=[chat_prompt], images=[image], return_tensors="pt")
-    return processor(text=[prompt], images=[image], return_tensors="pt")
 def _decode_text(model, processor, tokenizer, output_ids, prompt: str):
@@ -142,8 +144,9 @@ def _apply_memory(text: str, model_choice: str, enabled: bool):
             text = text.replace(wrong, right)
     # 2) Global replacements
     for wrong, right in rules.get("global", {}).items():
-        if wrong and right:
-            text = text.replace(wrong, right)
     return text
 def _compile_rules_from_feedback(min_count: int = 2, max_phrase_len: int = 40):
@@ -608,4 +611,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
 # The `if __name__ == "__main__":` block should be at the top level
 if __name__ == "__main__":
-    demo.queue(max_size=50).launch(share=True)

 # app.py — HTR Space with Feedback Loop, Memory Post-Correction, and GRPO Export
 import os, time, json, hashlib, difflib, uuid, csv
 from datetime import datetime
 from collections import Counter, defaultdict
     if tokenizer and hasattr(tokenizer, "apply_chat_template"):
         chat_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        # Explicitly set truncation=False to prevent the token mismatch error
+        return processor(text=[chat_prompt], images=[image], return_tensors="pt", truncation=False)
+    return processor(text=[prompt], images=[image], return_tensors="pt", truncation=False)
 def _decode_text(model, processor, tokenizer, output_ids, prompt: str):
             text = text.replace(wrong, right)
     # 2) Global replacements
     for wrong, right in rules.get("global", {}).items():
+        for wrong, right in rules.get("global", {}).items():
+            if wrong and right:
+                text = text.replace(wrong, right)
     return text
 def _compile_rules_from_feedback(min_count: int = 2, max_phrase_len: int = 40):
 # The `if __name__ == "__main__":` block should be at the top level
 if __name__ == "__main__":
+    demo.queue(max_size=50).launch(share=True)