Spaces:

Toadoum
/

French-Ngambay-Translation

Sleeping

App Files Files Community

Toadoum commited on Aug 31

Commit

548806b

verified ·

1 Parent(s): c3a97cb

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -88

app.py CHANGED Viewed

@@ -1,13 +1,36 @@
 import os
 import io
 import re
 from typing import List, Tuple
 import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 import docx
 from docx.enum.text import WD_ALIGN_PARAGRAPH
 from docx.text.paragraph import Paragraph as DocxParagraph
 import fitz  # PyMuPDF
 from reportlab.lib.pagesizes import A4
 from reportlab.lib.styles import getSampleStyleSheet
@@ -16,15 +39,12 @@ from reportlab.platypus import SimpleDocTemplate, Paragraph as RLParagraph, Spac
 from reportlab.lib.units import cm
 from html import escape as html_escape
-# --- Disable compile/dynamo to avoid meta tensor issues ---
-os.environ["TORCH_COMPILE_DISABLE"] = "1"
-os.environ["TORCHDYNAMO_DISABLE"] = "1"
-os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1")
 # --- Config ---
 MODEL_REPO = "Toadoum/ngambay-fr-v1"
 FR_CODE_PREFERRED = "fra_Latn"   # French (NLLB)
-FR_CODE_ALT = "fr_Latn"          # Some custom models use this
 NG_CODE_PREFERRED = "sba_Latn"   # Ngambay (Saba) Latin
 # --- Inference params ---
@@ -32,31 +52,62 @@ MAX_NEW_TOKENS = 256
 TEMPERATURE = 0.0  # not used when do_sample=False
 # --- Device selection ---
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
-# --- Load model & tokenizer ---
-print("Loading tokenizer and model...")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
-# Load model with appropriate dtype and device
-model_kwargs = {"torch_dtype": torch.float16} if device == "cuda" else {}
-model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_REPO, **model_kwargs)
-model = model.to(device)  # Move model to device after full loading
-print(f"Model loaded on: {model.device}")
-# Ensure a pad token to avoid generate() quirks
-if tokenizer.pad_token_id is None:
-    if tokenizer.eos_token is not None:
-        tokenizer.pad_token = tokenizer.eos_token
-    elif tokenizer.unk_token is not None:
-        tokenizer.pad_token = tokenizer.unk_token
-    else:
-        tokenizer.add_special_tokens({"pad_token": "<pad>"})
-        model.resize_token_embeddings(len(tokenizer))
-    model.config.pad_token_id = tokenizer.pad_token_id
-# --- Language code resolution ---
 def _resolve_lang_code(preferred: str, alt: str | None) -> str:
     codes = getattr(tokenizer, "lang_code_to_id", None)
     if isinstance(codes, dict) and len(codes) > 0:
@@ -66,13 +117,11 @@ def _resolve_lang_code(preferred: str, alt: str | None) -> str:
             return alt
     if hasattr(tokenizer, "get_lang_id"):
         try:
-            tokenizer.get_lang_id(preferred)
-            return preferred
         except Exception:
             if alt:
                 try:
-                    tokenizer.get_lang_id(alt)
-                    return alt
                 except Exception:
                     pass
     return preferred
@@ -80,7 +129,7 @@ def _resolve_lang_code(preferred: str, alt: str | None) -> str:
 FR_CODE = _resolve_lang_code(FR_CODE_PREFERRED, FR_CODE_ALT)
 NG_CODE = _resolve_lang_code(NG_CODE_PREFERRED, None)
-# --- Helpers ---
 def _token_len(s: str) -> int:
     return len(tokenizer.encode(s, add_special_tokens=False))
@@ -96,26 +145,18 @@ def chunk_text_for_translation(text: str, max_src_tokens: int = 380) -> List[str
     chunks, current = [], ""
     for sent in sentences:
         if not current:
-            current = sent
-            continue
         candidate = f"{current} {sent}"
         if _token_len(candidate) <= max_src_tokens:
             current = candidate
         else:
-            chunks.append(current.strip())
-            current = sent
     if current.strip():
         chunks.append(current.strip())
     return chunks if chunks else ([text] if text.strip() else [])
-# --- Translation functions ---
 def _translate_with_pipeline(text: str) -> str:
-    translator = pipeline(
-        task="translation",
-        model=model,
-        tokenizer=tokenizer,
-        device=device,
-    )
     out = translator(
         text,
         src_lang=FR_CODE,
@@ -126,11 +167,13 @@ def _translate_with_pipeline(text: str) -> str:
     key = "translation_text" if "translation_text" in out[0] else "generated_text"
     return out[0][key]
 def _translate_with_generate(text: str) -> str:
     if hasattr(tokenizer, "src_lang"):
         tokenizer.src_lang = FR_CODE
-    inputs = tokenizer(text, return_tensors="pt").to(device)
     forced_bos = None
     lang2id = getattr(tokenizer, "lang_code_to_id", None)
     if isinstance(lang2id, dict) and NG_CODE in lang2id:
@@ -141,38 +184,40 @@ def _translate_with_generate(text: str) -> str:
         except Exception:
             forced_bos = None
     gen_kwargs = dict(max_new_tokens=MAX_NEW_TOKENS, do_sample=False)
     if forced_bos is not None:
-        gen_kwargs["forced_bos_token_id"] = torch.tensor([forced_bos], device=device)
     with torch.no_grad():
         output_ids = model.generate(**inputs, **gen_kwargs)
     return tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
-# --- Public APIs ---
 def translate_text_simple(text: str) -> str:
     text = (text or "").strip()
     if not text:
         return ""
     try:
         return _translate_with_pipeline(text)
-    except Exception as e:
-        print(f"Pipeline error: {e}. Falling back to generate().")
         return _translate_with_generate(text)
 def translate_large_text(text: str) -> str:
     chunks = chunk_text_for_translation(text)
     outputs = []
     for ch in chunks:
         try:
             outputs.append(_translate_with_pipeline(ch))
-        except Exception as e:
-            print(f"Pipeline error for chunk: {e}. Falling back to generate().")
             outputs.append(_translate_with_generate(ch))
     return "\n".join(outputs).strip()
-# --- DOCX helpers ---
 def is_heading(par: DocxParagraph) -> Tuple[bool, int]:
     style_name = (par.style.name or "").lower()
     if not style_name:
@@ -188,21 +233,20 @@ def translate_docx_bytes(file_bytes: bytes) -> bytes:
     f = io.BytesIO(file_bytes)
     doc = docx.Document(f)
     new = docx.Document()
     for par in doc.paragraphs:
         text = par.text or ""
         if not text.strip():
-            new.add_paragraph("")
-            continue
         is_head, lvl = is_heading(par)
         translated = translate_large_text(text)
         if is_head:
             new.add_heading(translated, level=min(max(lvl, 1), 9))
         else:
             np = new.add_paragraph(translated)
-            try:
-                np.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
-            except Exception:
-                pass
     for table in doc.tables:
         new_table = new.add_table(rows=len(table.rows), cols=len(table.columns))
         for r_idx, row in enumerate(table.rows):
@@ -212,21 +256,20 @@ def translate_docx_bytes(file_bytes: bytes) -> bytes:
                 tgt_cell = new_table.cell(r_idx, c_idx)
                 tgt_cell.text = translated
                 for p in tgt_cell.paragraphs:
-                    try:
-                        p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
-                    except Exception:
-                        pass
     out = io.BytesIO()
     new.save(out)
     return out.getvalue()
-# --- PDF helpers ---
 def extract_pdf_text_blocks(pdf_bytes: bytes) -> List[List[str]]:
     pages_blocks: List[List[str]] = []
     doc = fitz.open(stream=pdf_bytes, filetype="pdf")
     for page in doc:
         blocks = page.get_text("blocks") or []
-        blocks.sort(key=lambda b: (round(b[1], 1), round(b[0], 1)))
         page_texts = []
         for b in blocks:
             text = (b[4] if len(b) > 4 else "") or ""
@@ -245,13 +288,10 @@ def build_pdf_from_blocks(translated_pages: List[List[str]]) -> bytes:
         topMargin=2*cm, bottomMargin=2*cm
     )
     styles = getSampleStyleSheet()
-    body = styles["BodyText"]
-    body.alignment = TA_JUSTIFY
-    body.leading = 14
     story = []
     for p_idx, blocks in enumerate(translated_pages):
-        if p_idx > 0:
-            story.append(PageBreak())
         for blk in blocks:
             safe = html_escape(blk).replace("\n", "<br/>")
             story.append(RLParagraph(safe, body))
@@ -267,7 +307,7 @@ def translate_pdf_bytes(file_bytes: bytes) -> bytes:
         translated_pages.append(t_blocks)
     return build_pdf_from_blocks(translated_pages)
-# --- Gradio file handler ---
 def translate_document(file_path: str):
     if not file_path:
         return None, "Veuillez sélectionner un fichier .docx ou .pdf"
@@ -275,23 +315,24 @@ def translate_document(file_path: str):
         name = os.path.basename(file_path)
         with open(file_path, "rb") as f:
             data = f.read()
         if name.lower().endswith(".docx"):
             out_bytes = translate_docx_bytes(data)
             out_path = "translated_ngambay.docx"
-            with open(out_path, "wb") as f:
-                f.write(out_bytes)
             return out_path, "✅ Traduction DOCX terminée (paragraphes justifiés)."
         if name.lower().endswith(".pdf"):
             out_bytes = translate_pdf_bytes(data)
             out_path = "translated_ngambay.pdf"
-            with open(out_path, "wb") as f:
-                f.write(out_bytes)
             return out_path, "✅ Traduction PDF terminée (paragraphes justifiés)."
         return None, "Type de fichier non supporté. Choisissez .docx ou .pdf"
     except Exception as e:
         return None, f"❌ Erreur pendant la traduction: {e}"
-# --- UI ---
 theme = gr.themes.Soft(
     primary_hue="indigo",
     radius_size="lg",
@@ -303,9 +344,9 @@ theme = gr.themes.Soft(
 CUSTOM_CSS = """
 .gradio-container {max-width: 980px !important;}
-.header-card {
-    background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%);
-    color: white; padding: 22px; border-radius: 18px;
     box-shadow: 0 10px 30px rgba(79,70,229,.25);
     transition: transform .2s ease;
 }
@@ -313,9 +354,9 @@ CUSTOM_CSS = """
 .header-title { font-size: 26px; font-weight: 800; margin: 0 0 6px 0; letter-spacing: .2px; }
 .header-sub { opacity: .98; font-size: 14px; }
 .brand { display:flex; align-items:center; gap:10px; justify-content:space-between; flex-wrap:wrap; }
-.badge {
-    display:inline-block; background: rgba(255,255,255,.18);
-    padding: 4px 10px; border-radius: 999px; font-size: 12px;
     border: 1px solid rgba(255,255,255,.25);
 }
 .footer-note { margin-top: 8px; color: #64748b; font-size: 12px; text-align: center; }
@@ -355,7 +396,9 @@ with gr.Blocks(
             </div>
             """
         )
     with gr.Tabs():
         with gr.Tab("Traduction de texte"):
             with gr.Row():
                 with gr.Column(scale=5):
@@ -386,19 +429,24 @@ with gr.Blocks(
                         show_copy_button=True
                     )
             gr.Markdown('<div class="footer-note">Astuce : collez un paragraphe complet pour un meilleur contexte.</div>')
         with gr.Tab("Traduction de document (.docx / .pdf)"):
             with gr.Row():
                 with gr.Column(scale=5):
                     doc_inp = gr.File(
                         label="Sélectionnez un document (.docx ou .pdf)",
                         file_types=[".docx", ".pdf"],
-                        type="filepath"
                     )
                     run_doc = gr.Button("Traduire le document", variant="primary")
                 with gr.Column(scale=5):
                     doc_out = gr.File(label="Fichier traduit (télécharger)")
                     doc_status = gr.Markdown("")
             run_doc.click(translate_document, inputs=doc_inp, outputs=[doc_out, doc_status])
     gr.HTML(
         """
         <div class="support-banner">
@@ -414,13 +462,17 @@ with gr.Blocks(
         </div>
         """
     )
     btn.click(translate_text_simple, inputs=src, outputs=tgt)
     clear_btn.click(lambda: ("", ""), outputs=[src, tgt])
 if __name__ == "__main__":
     demo.queue(default_concurrency_limit=4).launch(
-        ssr_mode=False,
-        share=False if os.environ.get("SPACE_ID") else True,
         server_name="0.0.0.0",
         server_port=int(os.environ.get("PORT", 7860)),
         show_error=True,

+# ==== Français -> Ngambay Translator App (meta-safe on HF Spaces) ====
 import os
 import io
 import re
 from typing import List, Tuple
+# --- Disable compile/dynamo/fake tensor paths EARLY (before torch import) ---
+os.environ["TORCH_COMPILE_DISABLE"] = "1"
+os.environ["TORCHDYNAMO_DISABLE"] = "1"
+os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1")
 import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+# Try to hard-disable dynamo at runtime too (belt & suspenders)
+try:
+    import torch._dynamo as dynamo
+    dynamo.config.suppress_errors = True
+    # wrap helper to decorate functions
+    def no_compile(fn):
+        return dynamo.disable(fn)
+except Exception:
+    def no_compile(fn):
+        return fn
+# --- DOCX (python-docx) ---
 import docx
 from docx.enum.text import WD_ALIGN_PARAGRAPH
 from docx.text.paragraph import Paragraph as DocxParagraph
+# --- PDF read & write ---
 import fitz  # PyMuPDF
 from reportlab.lib.pagesizes import A4
 from reportlab.lib.styles import getSampleStyleSheet
 from reportlab.lib.units import cm
 from html import escape as html_escape
 # --- Config ---
 MODEL_REPO = "Toadoum/ngambay-fr-v1"
+# Prefer NLLB codes; auto-resolve alternates if needed.
 FR_CODE_PREFERRED = "fra_Latn"   # French (NLLB)
+FR_CODE_ALT       = "fr_Latn"    # Some custom models use this
 NG_CODE_PREFERRED = "sba_Latn"   # Ngambay (Saba) Latin
 # --- Inference params ---
 TEMPERATURE = 0.0  # not used when do_sample=False
 # --- Device selection ---
+device = 0 if torch.cuda.is_available() else -1
+device_str = "cuda" if torch.cuda.is_available() else "cpu"
+# ---------- Load model & tokenizer with META-SAFE path ----------
+def _has_meta_tensors(m: torch.nn.Module) -> bool:
+    try:
+        return any(p.is_meta for p in m.parameters()) or any(b.is_meta for b in m.buffers())
+    except Exception:
+        # Fallback check by device type
+        return any(getattr(p, "device", None) and p.device.type == "meta" for p in m.parameters())
+def _ensure_pad_token(tok, mdl):
+    if tok.pad_token_id is None:
+        if tok.eos_token is not None:
+            tok.pad_token = tok.eos_token
+        elif tok.unk_token is not None:
+            tok.pad_token = tok.unk_token
+        else:
+            tok.add_special_tokens({"pad_token": "<pad>"})
+            mdl.resize_token_embeddings(len(tok))
+    mdl.config.pad_token_id = tok.pad_token_id
+def _load_model_and_tokenizer():
+    tok = AutoTokenizer.from_pretrained(MODEL_REPO)
+    # First load WITHOUT low_cpu_mem_usage to avoid meta-inits on some stacks
+    mdl = AutoModelForSeq2SeqLM.from_pretrained(
+        MODEL_REPO,
+        low_cpu_mem_usage=False,          # critical: avoid meta init
+        torch_dtype=torch.float16 if torch.cuda.is_available() else None,
+    )
+    if _has_meta_tensors(mdl):
+        # Fallback: force a "real" load
+        del mdl
+        mdl = AutoModelForSeq2SeqLM.from_pretrained(
+            MODEL_REPO,
+            low_cpu_mem_usage=False,
+            torch_dtype=None,              # ensure real tensors on CPU first
+        )
+    # Move model AFTER we've verified no meta weights
+    mdl = mdl.to(device_str)
+    # Ensure pad token to avoid generate() quirks
+    _ensure_pad_token(tok, mdl)
+    return tok, mdl
+tokenizer, model = _load_model_and_tokenizer()
+translator = pipeline(
+    task="translation",
+    model=model,
+    tokenizer=tokenizer,
+    device=device,
+    framework="pt",
+)
 def _resolve_lang_code(preferred: str, alt: str | None) -> str:
     codes = getattr(tokenizer, "lang_code_to_id", None)
     if isinstance(codes, dict) and len(codes) > 0:
             return alt
     if hasattr(tokenizer, "get_lang_id"):
         try:
+            tokenizer.get_lang_id(preferred); return preferred
         except Exception:
             if alt:
                 try:
+                    tokenizer.get_lang_id(alt); return alt
                 except Exception:
                     pass
     return preferred
 FR_CODE = _resolve_lang_code(FR_CODE_PREFERRED, FR_CODE_ALT)
 NG_CODE = _resolve_lang_code(NG_CODE_PREFERRED, None)
+# ---------- helpers ----------
 def _token_len(s: str) -> int:
     return len(tokenizer.encode(s, add_special_tokens=False))
     chunks, current = [], ""
     for sent in sentences:
         if not current:
+            current = sent; continue
         candidate = f"{current} {sent}"
         if _token_len(candidate) <= max_src_tokens:
             current = candidate
         else:
+            chunks.append(current.strip()); current = sent
     if current.strip():
         chunks.append(current.strip())
     return chunks if chunks else ([text] if text.strip() else [])
+@no_compile
 def _translate_with_pipeline(text: str) -> str:
     out = translator(
         text,
         src_lang=FR_CODE,
     key = "translation_text" if "translation_text" in out[0] else "generated_text"
     return out[0][key]
+@no_compile
 def _translate_with_generate(text: str) -> str:
+    # Set src language if supported
     if hasattr(tokenizer, "src_lang"):
         tokenizer.src_lang = FR_CODE
+    # Determine forced BOS for target language
     forced_bos = None
     lang2id = getattr(tokenizer, "lang_code_to_id", None)
     if isinstance(lang2id, dict) and NG_CODE in lang2id:
         except Exception:
             forced_bos = None
+    inputs = tokenizer(text, return_tensors="pt")
+    inputs = {k: v.to(device_str) for k, v in inputs.items()}
     gen_kwargs = dict(max_new_tokens=MAX_NEW_TOKENS, do_sample=False)
     if forced_bos is not None:
+        gen_kwargs["forced_bos_token_id"] = forced_bos
     with torch.no_grad():
         output_ids = model.generate(**inputs, **gen_kwargs)
     return tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
+# ---------- Public translate APIs ----------
+@no_compile
 def translate_text_simple(text: str) -> str:
     text = (text or "").strip()
     if not text:
         return ""
     try:
         return _translate_with_pipeline(text)
+    except Exception:
         return _translate_with_generate(text)
+@no_compile
 def translate_large_text(text: str) -> str:
     chunks = chunk_text_for_translation(text)
     outputs = []
     for ch in chunks:
         try:
             outputs.append(_translate_with_pipeline(ch))
+        except Exception:
             outputs.append(_translate_with_generate(ch))
     return "\n".join(outputs).strip()
+# ---------- DOCX helpers ----------
 def is_heading(par: DocxParagraph) -> Tuple[bool, int]:
     style_name = (par.style.name or "").lower()
     if not style_name:
     f = io.BytesIO(file_bytes)
     doc = docx.Document(f)
     new = docx.Document()
     for par in doc.paragraphs:
         text = par.text or ""
         if not text.strip():
+            new.add_paragraph(""); continue
         is_head, lvl = is_heading(par)
         translated = translate_large_text(text)
         if is_head:
             new.add_heading(translated, level=min(max(lvl, 1), 9))
         else:
             np = new.add_paragraph(translated)
+            try: np.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
+            except Exception: pass
     for table in doc.tables:
         new_table = new.add_table(rows=len(table.rows), cols=len(table.columns))
         for r_idx, row in enumerate(table.rows):
                 tgt_cell = new_table.cell(r_idx, c_idx)
                 tgt_cell.text = translated
                 for p in tgt_cell.paragraphs:
+                    try: p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
+                    except Exception: pass
     out = io.BytesIO()
     new.save(out)
     return out.getvalue()
+# ---------- PDF helpers ----------
 def extract_pdf_text_blocks(pdf_bytes: bytes) -> List[List[str]]:
     pages_blocks: List[List[str]] = []
     doc = fitz.open(stream=pdf_bytes, filetype="pdf")
     for page in doc:
         blocks = page.get_text("blocks") or []
+        blocks.sort(key=lambda b: (round(b[1], 1), round(b[0], 1)))  # (y, x)
         page_texts = []
         for b in blocks:
             text = (b[4] if len(b) > 4 else "") or ""
         topMargin=2*cm, bottomMargin=2*cm
     )
     styles = getSampleStyleSheet()
+    body = styles["BodyText"]; body.alignment = TA_JUSTIFY; body.leading = 14
     story = []
     for p_idx, blocks in enumerate(translated_pages):
+        if p_idx > 0: story.append(PageBreak())
         for blk in blocks:
             safe = html_escape(blk).replace("\n", "<br/>")
             story.append(RLParagraph(safe, body))
         translated_pages.append(t_blocks)
     return build_pdf_from_blocks(translated_pages)
+# ---------- Gradio file handler ----------
 def translate_document(file_path: str):
     if not file_path:
         return None, "Veuillez sélectionner un fichier .docx ou .pdf"
         name = os.path.basename(file_path)
         with open(file_path, "rb") as f:
             data = f.read()
         if name.lower().endswith(".docx"):
             out_bytes = translate_docx_bytes(data)
             out_path = "translated_ngambay.docx"
+            with open(out_path, "wb") as f: f.write(out_bytes)
             return out_path, "✅ Traduction DOCX terminée (paragraphes justifiés)."
         if name.lower().endswith(".pdf"):
             out_bytes = translate_pdf_bytes(data)
             out_path = "translated_ngambay.pdf"
+            with open(out_path, "wb") as f: f.write(out_bytes)
             return out_path, "✅ Traduction PDF terminée (paragraphes justifiés)."
         return None, "Type de fichier non supporté. Choisissez .docx ou .pdf"
     except Exception as e:
         return None, f"❌ Erreur pendant la traduction: {e}"
+# ================== UI ==================
 theme = gr.themes.Soft(
     primary_hue="indigo",
     radius_size="lg",
 CUSTOM_CSS = """
 .gradio-container {max-width: 980px !important;}
+.header-card {
+    background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%);
+    color: white; padding: 22px; border-radius: 18px;
     box-shadow: 0 10px 30px rgba(79,70,229,.25);
     transition: transform .2s ease;
 }
 .header-title { font-size: 26px; font-weight: 800; margin: 0 0 6px 0; letter-spacing: .2px; }
 .header-sub { opacity: .98; font-size: 14px; }
 .brand { display:flex; align-items:center; gap:10px; justify-content:space-between; flex-wrap:wrap; }
+.badge {
+    display:inline-block; background: rgba(255,255,255,.18);
+    padding: 4px 10px; border-radius: 999px; font-size: 12px;
     border: 1px solid rgba(255,255,255,.25);
 }
 .footer-note { margin-top: 8px; color: #64748b; font-size: 12px; text-align: center; }
             </div>
             """
         )
     with gr.Tabs():
+        # -------- Tab 1: Texte --------
         with gr.Tab("Traduction de texte"):
             with gr.Row():
                 with gr.Column(scale=5):
                         show_copy_button=True
                     )
             gr.Markdown('<div class="footer-note">Astuce : collez un paragraphe complet pour un meilleur contexte.</div>')
+        # -------- Tab 2: Documents --------
         with gr.Tab("Traduction de document (.docx / .pdf)"):
             with gr.Row():
                 with gr.Column(scale=5):
                     doc_inp = gr.File(
                         label="Sélectionnez un document (.docx ou .pdf)",
                         file_types=[".docx", ".pdf"],
+                        type="filepath"  # returns temp filepath
                     )
                     run_doc = gr.Button("Traduire le document", variant="primary")
                 with gr.Column(scale=5):
                     doc_out = gr.File(label="Fichier traduit (télécharger)")
                     doc_status = gr.Markdown("")
             run_doc.click(translate_document, inputs=doc_inp, outputs=[doc_out, doc_status])
+    # Contribution banner
     gr.HTML(
         """
         <div class="support-banner">
         </div>
         """
     )
+    # Text actions
     btn.click(translate_text_simple, inputs=src, outputs=tgt)
     clear_btn.click(lambda: ("", ""), outputs=[src, tgt])
 if __name__ == "__main__":
+    # On HF Spaces: disable SSR and don't use share=True
+    on_spaces = bool(os.environ.get("SPACE_ID"))
     demo.queue(default_concurrency_limit=4).launch(
+        ssr_mode=False,                                   # key fix for meta tensors
+        share=False if on_spaces else True,               # share=True not supported on Spaces
         server_name="0.0.0.0",
         server_port=int(os.environ.get("PORT", 7860)),
         show_error=True,