Spaces:

Emeritus-21
/

handwritten-text-recognition

Runtime error

App Files Files Community

Emeritus-21 commited on Aug 24

Commit

c3250ac

verified ·

1 Parent(s): c1235af

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -67

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # app.py — HTR Space with Feedback Loop, Memory Post-Correction, and GRPO Export
 import os, time, json, hashlib, difflib, uuid, csv
 from datetime import datetime
 from collections import Counter, defaultdict
@@ -17,16 +18,17 @@ from jiwer import cer
 # ---------------- Storage & Paths ----------------
 os.makedirs("data", exist_ok=True)
-FEEDBACK_PATH = "data/feedback.jsonl"  # raw feedback log (per sample)
 MEMORY_RULES_PATH = "data/memory_rules.json"  # compiled post-correction rules
-GRPO_EXPORT_PATH = "data/grpo_prefs.jsonl"  # preference pairs for GRPO
-CSV_EXPORT_PATH = "data/feedback.csv"  # optional tabular export
 # ---------------- Models ----------------
 MODEL_PATHS = {
-    "Model 1 (Complex handwritings)": ("prithivMLmods/Qwen2.5-VL-7B-Abliterated-Caption-it", Qwen2_5_VLForConditionalGeneration),
-    "Model 2 (simple and scanned handwriting)": ("nanonets/Nanonets-OCR-s", Qwen2_5_VLForConditionalGeneration),
 }
 MAX_NEW_TOKENS_DEFAULT = 512
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -109,6 +111,7 @@ def _safe_text(text: str) -> str:
     return (text or "").strip()
 def _hash_image(image: Image.Image) -> str:
     img_bytes = image.tobytes()
     return hashlib.sha256(img_bytes).hexdigest()
@@ -130,16 +133,22 @@ def _apply_memory(text: str, model_choice: str, enabled: bool):
     if not enabled or not text:
         return text
     rules = _load_memory_rules()
     by_model = rules.get("by_model", {}).get(model_choice, {})
     for wrong, right in by_model.items():
         if wrong and right:
             text = text.replace(wrong, right)
     for wrong, right in rules.get("global", {}).items():
         if wrong and right:
             text = text.replace(wrong, right)
     return text
 def _compile_rules_from_feedback(min_count: int = 2, max_phrase_len: int = 40):
     changes_counter_global = Counter()
     changes_counter_by_model = defaultdict(Counter)
@@ -152,18 +161,20 @@ def _compile_rules_from_feedback(min_count: int = 2, max_phrase_len: int = 40):
                 row = json.loads(line)
             except Exception:
                 continue
-            if row.get("reward", 0) < 1:
                 continue
             pred = _safe_text(row.get("prediction", ""))
             corr = _safe_text(row.get("correction", "")) or _safe_text(row.get("ground_truth", ""))
             if not pred or not corr:
                 continue
             model_choice = row.get("model_choice", "")
             s = difflib.SequenceMatcher(None, pred, corr)
             for tag, i1, i2, j1, j2 in s.get_opcodes():
                 if tag in ("replace", "delete", "insert"):
                     wrong = pred[i1:i2]
                     right = corr[j1:j2]
                     if 0 < len(wrong) <= max_phrase_len or 0 < len(right) <= max_phrase_len:
                         if wrong.strip():
                             changes_counter_global[(wrong, right)] += 1
@@ -171,9 +182,11 @@ def _compile_rules_from_feedback(min_count: int = 2, max_phrase_len: int = 40):
                                 changes_counter_by_model[model_choice][(wrong, right)] += 1
     rules = {"global": {}, "by_model": {}}
     for (wrong, right), cnt in changes_counter_global.items():
         if cnt >= min_count and wrong and right and wrong != right:
             rules["global"][wrong] = right
     for model_choice, ctr in changes_counter_by_model.items():
         rules["by_model"].setdefault(model_choice, {})
         for (wrong, right), cnt in ctr.items():
@@ -189,10 +202,8 @@ def ocr_image(image: Image.Image, model_choice: str, query: str = None,
               temperature: float = 0.1, top_p: float = 1.0, top_k: int = 0, repetition_penalty: float = 1.0,
               use_memory: bool = True,
               progress=gr.Progress(track_tqdm=True)):
-    if image is None:
-        return "Please upload or capture an image."
-    if model_choice not in _loaded_models:
-        return f"Invalid model: {model_choice}"
     processor, model, tokenizer = _loaded_processors[model_choice], _loaded_models[model_choice], getattr(_loaded_processors[model_choice], "tokenizer", None)
     prompt = _default_prompt(query)
     batch = _build_inputs(processor, tokenizer, image, prompt).to(device)
@@ -200,25 +211,23 @@ def ocr_image(image: Image.Image, model_choice: str, query: str = None,
         output_ids = model.generate(**batch, max_new_tokens=max_new_tokens, do_sample=False,
                                     temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty)
     raw = _decode_text(model, processor, tokenizer, output_ids, prompt).replace("<|im_end|>", "").strip()
     post = _apply_memory(raw, model_choice, use_memory)
     return post
 # ---------------- Export Helpers ----------------
 def save_as_pdf(text):
     text = _safe_text(text)
-    if not text:
-        return None
     doc = SimpleDocTemplate("output.pdf")
     flowables = [Paragraph(t, getSampleStyleSheet()["Normal"]) for t in text.splitlines() if t != ""]
-    if not flowables:
-        flowables = [Paragraph(" ", getSampleStyleSheet()["Normal"])]
     doc.build(flowables)
     return "output.pdf"
 def save_as_word(text):
     text = _safe_text(text)
-    if not text:
-        return None
     doc = Document()
     for line in text.splitlines():
         doc.add_paragraph(line)
@@ -227,8 +236,7 @@ def save_as_word(text):
 def save_as_audio(text):
     text = _safe_text(text)
-    if not text:
-        return None
     try:
         tts = gTTS(text)
         tts.save("output.mp3")
@@ -239,6 +247,10 @@ def save_as_audio(text):
 # ---------------- Metrics Function ----------------
 def calculate_cer_score(ground_truth: str, prediction: str) -> str:
     if not ground_truth or not prediction:
         return "Cannot calculate CER: Missing ground truth or prediction."
     ground_truth_cleaned = " ".join(ground_truth.strip().split())
@@ -252,8 +264,9 @@ def _append_jsonl(path, obj):
         f.write(json.dumps(obj, ensure_ascii=False) + "\n")
 def _export_csv():
     if not os.path.exists(FEEDBACK_PATH):
-        return None
     rows = []
     with open(FEEDBACK_PATH, "r", encoding="utf-8") as f:
         for line in f:
@@ -263,7 +276,7 @@ def _export_csv():
                 pass
     if not rows:
         return None
-    keys = ["id", "timestamp", "model_choice", "image_sha256", "prompt", "prediction", "correction", "ground_truth", "reward", "cer"]
     with open(CSV_EXPORT_PATH, "w", newline="", encoding="utf-8") as f:
         w = csv.DictWriter(f, fieldnames=keys)
         w.writeheader()
@@ -274,12 +287,16 @@ def _export_csv():
 def save_feedback(image: Image.Image, model_choice: str, prompt: str,
                   prediction: str, correction: str, ground_truth: str, reward: int):
     if image is None:
-        return "Please provide the image again to link feedback."
     if not prediction and not correction and not ground_truth:
-        return "Nothing to save."
     image_hash = _hash_image(image)
     target = _safe_text(correction) or _safe_text(ground_truth)
     pred = _safe_text(prediction)
     cer_score = None
@@ -302,13 +319,18 @@ def save_feedback(image: Image.Image, model_choice: str, prompt: str,
         "cer": float(cer_score) if cer_score is not None else None,
     }
     _append_jsonl(FEEDBACK_PATH, row)
-    return f"✅ Feedback saved (reward={reward})."
 def compile_memory_rules():
     _compile_rules_from_feedback(min_count=2, max_phrase_len=60)
     return "✅ Memory rules recompiled from positive feedback."
 def export_grpo_preferences():
     if not os.path.exists(FEEDBACK_PATH):
         return "No feedback to export."
     count = 0
@@ -323,6 +345,7 @@ def export_grpo_preferences():
                 corr = _safe_text(row.get("correction", "")) or _safe_text(row.get("ground_truth", ""))
                 prompt = _safe_text(row.get("prompt", "")) or "Transcribe the image exactly."
                 if corr and pred and corr != pred and row.get("reward", 0) >= 0:
                     out = {
                         "prompt": prompt,
                         "image_sha256": row.get("image_sha256", ""),
@@ -334,16 +357,11 @@ def export_grpo_preferences():
                     count += 1
     return f"✅ Exported {count} GRPO preference pairs to {GRPO_EXPORT_PATH}."
-def get_grpo_file():
-    if os.path.exists(GRPO_EXPORT_PATH):
-        return GRPO_EXPORT_PATH
-    return None
-def get_csv_file():
-    _export_csv()
-    if os.path.exists(CSV_EXPORT_PATH):
-        return CSV_EXPORT_PATH
-    return None
 # ---------------- Evaluation Orchestration ----------------
 @spaces.GPU
@@ -372,6 +390,9 @@ MODEL_ID = os.environ.get("BASE_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")  # change
 OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "grpo_output")
 DATA_PATH = os.environ.get("DATA_PATH", "data/grpo_prefs.jsonl")
 def _jsonl_dataset(jsonl_path):
     data = []
     with open(jsonl_path, "r", encoding="utf-8") as f:
@@ -392,6 +413,7 @@ def main():
     if not data:
         print("No GRPO data found.")
         return
     from datasets import Dataset
     ds = Dataset.from_list(data)
@@ -400,6 +422,7 @@ def main():
         MODEL_ID, trust_remote_code=True, device_map="auto"
     )
     cfg = GRPOConfig(
         output_dir=OUTPUT_DIR,
         learning_rate=5e-6,
@@ -415,7 +438,7 @@ def main():
     trainer = GRPOTrainer(
         model=model,
-        ref_model=None,
         args=cfg,
         tokenizer=tok,
         train_dataset=ds
@@ -433,15 +456,15 @@ def _write_trainer_script():
     path = os.path.join("train", "grpo_train.py")
     with open(path, "w", encoding="utf-8") as f:
         f.write(TRAINER_SCRIPT)
-    return path, "✅ Trainer script written to train/grpo_train.py"
 # ---------------- Gradio Interface ----------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("## ✍🏾 wilson Handwritten OCR — with Feedback Loop, Memory & GRPO Export")
     model_choice = gr.Radio(choices=list(MODEL_PATHS.keys()),
-                             value=list(MODEL_PATHS.keys())[0],
-                             label="Select OCR Model")
     with gr.Tab("🖼 Image Inference"):
         query_input = gr.Textbox(label="Custom Prompt (optional)", placeholder="Leave empty for RAW structured output")
@@ -460,6 +483,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         raw_output = gr.Textbox(label="📜 Output (post-corrected if memory is ON)", lines=18, show_copy_button=True)
         gr.Markdown("### ✏️ Quick Feedback")
         correction_box = gr.Textbox(label="Your Correction (optional)", placeholder="Paste your corrected text here; leave empty if the output is perfect.", lines=8)
         ground_truth_box = gr.Textbox(label="Ground Truth (optional)", placeholder="If you have a reference transcription, paste it here.", lines=6)
@@ -486,12 +510,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         audio_btn.click(fn=save_as_audio, inputs=[raw_output], outputs=[audio_file])
         def _clear():
-            return ("", None, "", MAX_NEW_TOKENS_DEFAULT, 0.1, 1.0, 0, 1.0, True, "", "", "")
         clear_btn.click(
             fn=_clear,
             outputs=[raw_output, image_input, query_input, max_new_tokens, temperature, top_p, top_k, repetition_penalty, use_memory, correction_box, ground_truth_box, feedback_status]
         )
         btn_good.click(
             fn=lambda img, mc, prmpt, pred, corr, gt: save_feedback(img, mc, prmpt, pred, corr, gt, reward=1),
             inputs=[image_input, model_choice, query_input, raw_output, correction_box, ground_truth_box],
@@ -527,51 +552,38 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Tab("✏️ Feedback & Memory"):
         gr.Markdown("""
-**Pipeline**
-1) Save feedback (👍 / 👎) and add corrections.
-2) Click **Build/Refresh Memory** to generate auto-fix rules from positive feedback.
 3) Keep **Enable Memory Post-correction** checked on inference/eval tabs.
         """)
         build_mem_btn = gr.Button("🧠 Build/Refresh Memory from Feedback")
         mem_status = gr.Markdown()
         build_mem_btn.click(fn=compile_memory_rules, outputs=[mem_status])
         csv_status = gr.Markdown()
-        gr.Markdown("---")
-        gr.Markdown("### ⬇️ Download Feedback Data")
-        with gr.Row():
-            download_csv_btn = gr.Button("⬇️ Download Feedback as CSV")
-            download_csv_file = gr.File(label="CSV File")
-        download_csv_btn.click(fn=get_csv_file, outputs=[download_csv_file])
     with gr.Tab("🧪 GRPO / Dataset"):
         gr.Markdown("""
 **GRPO Fine-tuning** (run offline or in a training Space):
 - Click **Export GRPO Preferences** to produce `data/grpo_prefs.jsonl` of (prompt, chosen, rejected).
 - Click **Write Trainer Script** to create `train/grpo_train.py`.
-- Then run:
 ```bash
 pip install trl accelerate peft transformers datasets
 python train/grpo_train.py
-    """)
-    export_grpo_btn = gr.Button("📦 Export GRPO Preferences")
-    grpo_status = gr.Markdown()
-    export_grpo_file = gr.File(label="GRPO Preferences File")
-    write_trainer_btn = gr.Button("📜 Write Trainer Script")
-    trainer_status = gr.Markdown()
-    trainer_file = gr.File(label="Trainer Script File")
-    export_grpo_btn.click(fn=export_grpo_preferences, outputs=[grpo_status])
-    export_grpo_btn.click(fn=get_grpo_file, outputs=[export_grpo_file])
-    write_trainer_btn.click(fn=_write_trainer_script, outputs=[trainer_file, trainer_status])
-...
 if __name__ == "__main__":
-    # This line must be indented with 4 spaces or a single tab
-    demo.queue(max_size=50).launch(share=True)

 # app.py — HTR Space with Feedback Loop, Memory Post-Correction, and GRPO Export
 import os, time, json, hashlib, difflib, uuid, csv
 from datetime import datetime
 from collections import Counter, defaultdict
 # ---------------- Storage & Paths ----------------
 os.makedirs("data", exist_ok=True)
+FEEDBACK_PATH = "data/feedback.jsonl"         # raw feedback log (per sample)
 MEMORY_RULES_PATH = "data/memory_rules.json"  # compiled post-correction rules
+GRPO_EXPORT_PATH = "data/grpo_prefs.jsonl"    # preference pairs for GRPO
+CSV_EXPORT_PATH = "data/feedback.csv"         # optional tabular export
 # ---------------- Models ----------------
 MODEL_PATHS = {
+    "Model 1 (Complex handwrittings )": ("prithivMLmods/Qwen2.5-VL-7B-Abliterated-Caption-it", Qwen2_5_VLForConditionalGeneration),
+    "Model 2 (simple and scanned handwritting )": ("nanonets/Nanonets-OCR-s", Qwen2_5_VLForConditionalGeneration),
 }
+# Model 3 removed to conserve memory.
 MAX_NEW_TOKENS_DEFAULT = 512
 device = "cuda" if torch.cuda.is_available() else "cpu"
     return (text or "").strip()
 def _hash_image(image: Image.Image) -> str:
+    # stable hash for dedup / linking feedback to the same page
     img_bytes = image.tobytes()
     return hashlib.sha256(img_bytes).hexdigest()
     if not enabled or not text:
         return text
     rules = _load_memory_rules()
+    # 1) Model-specific replacements
     by_model = rules.get("by_model", {}).get(model_choice, {})
     for wrong, right in by_model.items():
         if wrong and right:
             text = text.replace(wrong, right)
+    # 2) Global replacements
     for wrong, right in rules.get("global", {}).items():
         if wrong and right:
             text = text.replace(wrong, right)
     return text
 def _compile_rules_from_feedback(min_count: int = 2, max_phrase_len: int = 40):
+    """
+    Build replacement rules by mining feedback pairs (prediction -> correction).
+    We extract phrases that consistently changed, with frequency >= min_count.
+    """
     changes_counter_global = Counter()
     changes_counter_by_model = defaultdict(Counter)
                 row = json.loads(line)
             except Exception:
                 continue
+            if row.get("reward", 0) < 1:  # only learn from thumbs-up or explicit 'accepted_correction'
                 continue
             pred = _safe_text(row.get("prediction", ""))
             corr = _safe_text(row.get("correction", "")) or _safe_text(row.get("ground_truth", ""))
             if not pred or not corr:
                 continue
             model_choice = row.get("model_choice", "")
+            # Extract ops
             s = difflib.SequenceMatcher(None, pred, corr)
             for tag, i1, i2, j1, j2 in s.get_opcodes():
                 if tag in ("replace", "delete", "insert"):
                     wrong = pred[i1:i2]
                     right = corr[j1:j2]
+                    # keep short-ish tokens/phrases
                     if 0 < len(wrong) <= max_phrase_len or 0 < len(right) <= max_phrase_len:
                         if wrong.strip():
                             changes_counter_global[(wrong, right)] += 1
                                 changes_counter_by_model[model_choice][(wrong, right)] += 1
     rules = {"global": {}, "by_model": {}}
+    # Global
     for (wrong, right), cnt in changes_counter_global.items():
         if cnt >= min_count and wrong and right and wrong != right:
             rules["global"][wrong] = right
+    # Per model
     for model_choice, ctr in changes_counter_by_model.items():
         rules["by_model"].setdefault(model_choice, {})
         for (wrong, right), cnt in ctr.items():
               temperature: float = 0.1, top_p: float = 1.0, top_k: int = 0, repetition_penalty: float = 1.0,
               use_memory: bool = True,
               progress=gr.Progress(track_tqdm=True)):
+    if image is None: return "Please upload or capture an image."
+    if model_choice not in _loaded_models: return f"Invalid model: {model_choice}"
     processor, model, tokenizer = _loaded_processors[model_choice], _loaded_models[model_choice], getattr(_loaded_processors[model_choice], "tokenizer", None)
     prompt = _default_prompt(query)
     batch = _build_inputs(processor, tokenizer, image, prompt).to(device)
         output_ids = model.generate(**batch, max_new_tokens=max_new_tokens, do_sample=False,
                                     temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty)
     raw = _decode_text(model, processor, tokenizer, output_ids, prompt).replace("<|im_end|>", "").strip()
+    # Apply memory post-correction
     post = _apply_memory(raw, model_choice, use_memory)
     return post
 # ---------------- Export Helpers ----------------
 def save_as_pdf(text):
     text = _safe_text(text)
+    if not text: return None
     doc = SimpleDocTemplate("output.pdf")
     flowables = [Paragraph(t, getSampleStyleSheet()["Normal"]) for t in text.splitlines() if t != ""]
+    if not flowables: flowables = [Paragraph(" ", getSampleStyleSheet()["Normal"])]
     doc.build(flowables)
     return "output.pdf"
 def save_as_word(text):
     text = _safe_text(text)
+    if not text: return None
     doc = Document()
     for line in text.splitlines():
         doc.add_paragraph(line)
 def save_as_audio(text):
     text = _safe_text(text)
+    if not text: return None
     try:
         tts = gTTS(text)
         tts.save("output.mp3")
 # ---------------- Metrics Function ----------------
 def calculate_cer_score(ground_truth: str, prediction: str) -> str:
+    """
+    Calculates the Character Error Rate (CER).
+    A CER of 0.0 means the prediction is perfect.
+    """
     if not ground_truth or not prediction:
         return "Cannot calculate CER: Missing ground truth or prediction."
     ground_truth_cleaned = " ".join(ground_truth.strip().split())
         f.write(json.dumps(obj, ensure_ascii=False) + "\n")
 def _export_csv():
+    # optional: CSV summary for spreadsheet views
     if not os.path.exists(FEEDBACK_PATH):
+        return CSV_EXPORT_PATH if os.path.exists(CSV_EXPORT_PATH) else None
     rows = []
     with open(FEEDBACK_PATH, "r", encoding="utf-8") as f:
         for line in f:
                 pass
     if not rows:
         return None
+    keys = ["id","timestamp","model_choice","image_sha256","prompt","prediction","correction","ground_truth","reward","cer"]
     with open(CSV_EXPORT_PATH, "w", newline="", encoding="utf-8") as f:
         w = csv.DictWriter(f, fieldnames=keys)
         w.writeheader()
 def save_feedback(image: Image.Image, model_choice: str, prompt: str,
                   prediction: str, correction: str, ground_truth: str, reward: int):
+    """
+    reward: 1 = good/accepted, 0 = neutral, -1 = bad
+    """
     if image is None:
+        return "Please provide the image again to link feedback.", 0
     if not prediction and not correction and not ground_truth:
+        return "Nothing to save.", 0
     image_hash = _hash_image(image)
+    # best target = correction, else ground_truth, else prediction
     target = _safe_text(correction) or _safe_text(ground_truth)
     pred = _safe_text(prediction)
     cer_score = None
         "cer": float(cer_score) if cer_score is not None else None,
     }
     _append_jsonl(FEEDBACK_PATH, row)
+    return f"✅ Feedback saved (reward={reward}).", 1
 def compile_memory_rules():
     _compile_rules_from_feedback(min_count=2, max_phrase_len=60)
     return "✅ Memory rules recompiled from positive feedback."
 def export_grpo_preferences():
+    """
+    Build preference pairs for GRPO training:
+    - chosen: correction/ground_truth when present
+    - rejected: original prediction
+    """
     if not os.path.exists(FEEDBACK_PATH):
         return "No feedback to export."
     count = 0
                 corr = _safe_text(row.get("correction", "")) or _safe_text(row.get("ground_truth", ""))
                 prompt = _safe_text(row.get("prompt", "")) or "Transcribe the image exactly."
                 if corr and pred and corr != pred and row.get("reward", 0) >= 0:
+                    # One preference datapoint
                     out = {
                         "prompt": prompt,
                         "image_sha256": row.get("image_sha256", ""),
                     count += 1
     return f"✅ Exported {count} GRPO preference pairs to {GRPO_EXPORT_PATH}."
+def export_csv():
+    p = _export_csv()
+    if p:
+        return f"✅ CSV exported: {p}"
+    return "No data to export."
 # ---------------- Evaluation Orchestration ----------------
 @spaces.GPU
 OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "grpo_output")
 DATA_PATH = os.environ.get("DATA_PATH", "data/grpo_prefs.jsonl")
+# Our jsonl: each line has prompt, chosen, rejected (and image_sha256/model_choice optionally)
+# We'll format as required by TRL: prompt + responses with one preferred
 def _jsonl_dataset(jsonl_path):
     data = []
     with open(jsonl_path, "r", encoding="utf-8") as f:
     if not data:
         print("No GRPO data found.")
         return
+    # Create a HuggingFace datasets Dataset from memory
     from datasets import Dataset
     ds = Dataset.from_list(data)
         MODEL_ID, trust_remote_code=True, device_map="auto"
     )
+    # Minimal config — tune to your GPU
     cfg = GRPOConfig(
         output_dir=OUTPUT_DIR,
         learning_rate=5e-6,
     trainer = GRPOTrainer(
         model=model,
+        ref_model=None,  # let TRL create a frozen copy internally
         args=cfg,
         tokenizer=tok,
         train_dataset=ds
     path = os.path.join("train", "grpo_train.py")
     with open(path, "w", encoding="utf-8") as f:
         f.write(TRAINER_SCRIPT)
+    return path
 # ---------------- Gradio Interface ----------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("## ✍🏾 wilson Handwritten OCR — with Feedback Loop, Memory & GRPO Export")
     model_choice = gr.Radio(choices=list(MODEL_PATHS.keys()),
+                            value=list(MODEL_PATHS.keys())[0],
+                            label="Select OCR Model")
     with gr.Tab("🖼 Image Inference"):
         query_input = gr.Textbox(label="Custom Prompt (optional)", placeholder="Leave empty for RAW structured output")
         raw_output = gr.Textbox(label="📜 Output (post-corrected if memory is ON)", lines=18, show_copy_button=True)
+        # Quick Feedback strip
         gr.Markdown("### ✏️ Quick Feedback")
         correction_box = gr.Textbox(label="Your Correction (optional)", placeholder="Paste your corrected text here; leave empty if the output is perfect.", lines=8)
         ground_truth_box = gr.Textbox(label="Ground Truth (optional)", placeholder="If you have a reference transcription, paste it here.", lines=6)
         audio_btn.click(fn=save_as_audio, inputs=[raw_output], outputs=[audio_file])
         def _clear():
+            return ("", None, "", MAX_NEW_TOKENS_DEFAULT, 0.1, 1.0, 0, 1.0, True, "", "", "",)
         clear_btn.click(
             fn=_clear,
             outputs=[raw_output, image_input, query_input, max_new_tokens, temperature, top_p, top_k, repetition_penalty, use_memory, correction_box, ground_truth_box, feedback_status]
         )
+        # Quick feedback save
         btn_good.click(
             fn=lambda img, mc, prmpt, pred, corr, gt: save_feedback(img, mc, prmpt, pred, corr, gt, reward=1),
             inputs=[image_input, model_choice, query_input, raw_output, correction_box, ground_truth_box],
     with gr.Tab("✏️ Feedback & Memory"):
         gr.Markdown("""
+**Pipeline**
+1) Save feedback (👍 / 👎) and add corrections.
+2) Click **Build/Refresh Memory** to generate auto-fix rules from positive feedback.
 3) Keep **Enable Memory Post-correction** checked on inference/eval tabs.
         """)
         build_mem_btn = gr.Button("🧠 Build/Refresh Memory from Feedback")
         mem_status = gr.Markdown()
         build_mem_btn.click(fn=compile_memory_rules, outputs=[mem_status])
+        csv_btn = gr.Button("📤 Export Feedback as CSV")
         csv_status = gr.Markdown()
+        csv_btn.click(fn=export_csv, outputs=[csv_status])
     with gr.Tab("🧪 GRPO / Dataset"):
         gr.Markdown("""
 **GRPO Fine-tuning** (run offline or in a training Space):
 - Click **Export GRPO Preferences** to produce `data/grpo_prefs.jsonl` of (prompt, chosen, rejected).
 - Click **Write Trainer Script** to create `train/grpo_train.py`.
+- Then run:
 ```bash
 pip install trl accelerate peft transformers datasets
 python train/grpo_train.py
+```
+Set `BASE_MODEL`/`OUTPUT_DIR` env vars if you like.
+        """)
+        grpo_btn = gr.Button("📦 Export GRPO Preferences")
+        grpo_status = gr.Markdown()
+        grpo_btn.click(fn=export_grpo_preferences, outputs=[grpo_status])
+        write_script_btn = gr.Button("📝 Write grpo_train.py")
+        write_script_status = gr.Markdown()
+        write_script_btn.click(fn=lambda: f"✅ Trainer script written to `{_write_trainer_script()}`", outputs=[write_script_status])
 if __name__ == "__main__":
+    demo.queue(max_size=50).launch(share=True)