Toadoum commited on
Commit
548806b
·
verified ·
1 Parent(s): c3a97cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -88
app.py CHANGED
@@ -1,13 +1,36 @@
 
 
1
  import os
2
  import io
3
  import re
4
  from typing import List, Tuple
 
 
 
 
 
 
5
  import torch
6
  import gradio as gr
7
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  import docx
9
  from docx.enum.text import WD_ALIGN_PARAGRAPH
10
  from docx.text.paragraph import Paragraph as DocxParagraph
 
 
11
  import fitz # PyMuPDF
12
  from reportlab.lib.pagesizes import A4
13
  from reportlab.lib.styles import getSampleStyleSheet
@@ -16,15 +39,12 @@ from reportlab.platypus import SimpleDocTemplate, Paragraph as RLParagraph, Spac
16
  from reportlab.lib.units import cm
17
  from html import escape as html_escape
18
 
19
- # --- Disable compile/dynamo to avoid meta tensor issues ---
20
- os.environ["TORCH_COMPILE_DISABLE"] = "1"
21
- os.environ["TORCHDYNAMO_DISABLE"] = "1"
22
- os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1")
23
-
24
  # --- Config ---
25
  MODEL_REPO = "Toadoum/ngambay-fr-v1"
 
 
26
  FR_CODE_PREFERRED = "fra_Latn" # French (NLLB)
27
- FR_CODE_ALT = "fr_Latn" # Some custom models use this
28
  NG_CODE_PREFERRED = "sba_Latn" # Ngambay (Saba) Latin
29
 
30
  # --- Inference params ---
@@ -32,31 +52,62 @@ MAX_NEW_TOKENS = 256
32
  TEMPERATURE = 0.0 # not used when do_sample=False
33
 
34
  # --- Device selection ---
35
- device = "cuda" if torch.cuda.is_available() else "cpu"
36
- print(f"Using device: {device}")
37
-
38
- # --- Load model & tokenizer ---
39
- print("Loading tokenizer and model...")
40
- tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
41
-
42
- # Load model with appropriate dtype and device
43
- model_kwargs = {"torch_dtype": torch.float16} if device == "cuda" else {}
44
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_REPO, **model_kwargs)
45
- model = model.to(device) # Move model to device after full loading
46
- print(f"Model loaded on: {model.device}")
47
-
48
- # Ensure a pad token to avoid generate() quirks
49
- if tokenizer.pad_token_id is None:
50
- if tokenizer.eos_token is not None:
51
- tokenizer.pad_token = tokenizer.eos_token
52
- elif tokenizer.unk_token is not None:
53
- tokenizer.pad_token = tokenizer.unk_token
54
- else:
55
- tokenizer.add_special_tokens({"pad_token": "<pad>"})
56
- model.resize_token_embeddings(len(tokenizer))
57
- model.config.pad_token_id = tokenizer.pad_token_id
58
-
59
- # --- Language code resolution ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  def _resolve_lang_code(preferred: str, alt: str | None) -> str:
61
  codes = getattr(tokenizer, "lang_code_to_id", None)
62
  if isinstance(codes, dict) and len(codes) > 0:
@@ -66,13 +117,11 @@ def _resolve_lang_code(preferred: str, alt: str | None) -> str:
66
  return alt
67
  if hasattr(tokenizer, "get_lang_id"):
68
  try:
69
- tokenizer.get_lang_id(preferred)
70
- return preferred
71
  except Exception:
72
  if alt:
73
  try:
74
- tokenizer.get_lang_id(alt)
75
- return alt
76
  except Exception:
77
  pass
78
  return preferred
@@ -80,7 +129,7 @@ def _resolve_lang_code(preferred: str, alt: str | None) -> str:
80
  FR_CODE = _resolve_lang_code(FR_CODE_PREFERRED, FR_CODE_ALT)
81
  NG_CODE = _resolve_lang_code(NG_CODE_PREFERRED, None)
82
 
83
- # --- Helpers ---
84
  def _token_len(s: str) -> int:
85
  return len(tokenizer.encode(s, add_special_tokens=False))
86
 
@@ -96,26 +145,18 @@ def chunk_text_for_translation(text: str, max_src_tokens: int = 380) -> List[str
96
  chunks, current = [], ""
97
  for sent in sentences:
98
  if not current:
99
- current = sent
100
- continue
101
  candidate = f"{current} {sent}"
102
  if _token_len(candidate) <= max_src_tokens:
103
  current = candidate
104
  else:
105
- chunks.append(current.strip())
106
- current = sent
107
  if current.strip():
108
  chunks.append(current.strip())
109
  return chunks if chunks else ([text] if text.strip() else [])
110
 
111
- # --- Translation functions ---
112
  def _translate_with_pipeline(text: str) -> str:
113
- translator = pipeline(
114
- task="translation",
115
- model=model,
116
- tokenizer=tokenizer,
117
- device=device,
118
- )
119
  out = translator(
120
  text,
121
  src_lang=FR_CODE,
@@ -126,11 +167,13 @@ def _translate_with_pipeline(text: str) -> str:
126
  key = "translation_text" if "translation_text" in out[0] else "generated_text"
127
  return out[0][key]
128
 
 
129
  def _translate_with_generate(text: str) -> str:
 
130
  if hasattr(tokenizer, "src_lang"):
131
  tokenizer.src_lang = FR_CODE
132
- inputs = tokenizer(text, return_tensors="pt").to(device)
133
 
 
134
  forced_bos = None
135
  lang2id = getattr(tokenizer, "lang_code_to_id", None)
136
  if isinstance(lang2id, dict) and NG_CODE in lang2id:
@@ -141,38 +184,40 @@ def _translate_with_generate(text: str) -> str:
141
  except Exception:
142
  forced_bos = None
143
 
 
 
 
144
  gen_kwargs = dict(max_new_tokens=MAX_NEW_TOKENS, do_sample=False)
145
  if forced_bos is not None:
146
- gen_kwargs["forced_bos_token_id"] = torch.tensor([forced_bos], device=device)
147
 
148
  with torch.no_grad():
149
  output_ids = model.generate(**inputs, **gen_kwargs)
150
-
151
  return tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
152
 
153
- # --- Public APIs ---
 
154
  def translate_text_simple(text: str) -> str:
155
  text = (text or "").strip()
156
  if not text:
157
  return ""
158
  try:
159
  return _translate_with_pipeline(text)
160
- except Exception as e:
161
- print(f"Pipeline error: {e}. Falling back to generate().")
162
  return _translate_with_generate(text)
163
 
 
164
  def translate_large_text(text: str) -> str:
165
  chunks = chunk_text_for_translation(text)
166
  outputs = []
167
  for ch in chunks:
168
  try:
169
  outputs.append(_translate_with_pipeline(ch))
170
- except Exception as e:
171
- print(f"Pipeline error for chunk: {e}. Falling back to generate().")
172
  outputs.append(_translate_with_generate(ch))
173
  return "\n".join(outputs).strip()
174
 
175
- # --- DOCX helpers ---
176
  def is_heading(par: DocxParagraph) -> Tuple[bool, int]:
177
  style_name = (par.style.name or "").lower()
178
  if not style_name:
@@ -188,21 +233,20 @@ def translate_docx_bytes(file_bytes: bytes) -> bytes:
188
  f = io.BytesIO(file_bytes)
189
  doc = docx.Document(f)
190
  new = docx.Document()
 
191
  for par in doc.paragraphs:
192
  text = par.text or ""
193
  if not text.strip():
194
- new.add_paragraph("")
195
- continue
196
  is_head, lvl = is_heading(par)
197
  translated = translate_large_text(text)
198
  if is_head:
199
  new.add_heading(translated, level=min(max(lvl, 1), 9))
200
  else:
201
  np = new.add_paragraph(translated)
202
- try:
203
- np.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
204
- except Exception:
205
- pass
206
  for table in doc.tables:
207
  new_table = new.add_table(rows=len(table.rows), cols=len(table.columns))
208
  for r_idx, row in enumerate(table.rows):
@@ -212,21 +256,20 @@ def translate_docx_bytes(file_bytes: bytes) -> bytes:
212
  tgt_cell = new_table.cell(r_idx, c_idx)
213
  tgt_cell.text = translated
214
  for p in tgt_cell.paragraphs:
215
- try:
216
- p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
217
- except Exception:
218
- pass
219
  out = io.BytesIO()
220
  new.save(out)
221
  return out.getvalue()
222
 
223
- # --- PDF helpers ---
224
  def extract_pdf_text_blocks(pdf_bytes: bytes) -> List[List[str]]:
225
  pages_blocks: List[List[str]] = []
226
  doc = fitz.open(stream=pdf_bytes, filetype="pdf")
227
  for page in doc:
228
  blocks = page.get_text("blocks") or []
229
- blocks.sort(key=lambda b: (round(b[1], 1), round(b[0], 1)))
230
  page_texts = []
231
  for b in blocks:
232
  text = (b[4] if len(b) > 4 else "") or ""
@@ -245,13 +288,10 @@ def build_pdf_from_blocks(translated_pages: List[List[str]]) -> bytes:
245
  topMargin=2*cm, bottomMargin=2*cm
246
  )
247
  styles = getSampleStyleSheet()
248
- body = styles["BodyText"]
249
- body.alignment = TA_JUSTIFY
250
- body.leading = 14
251
  story = []
252
  for p_idx, blocks in enumerate(translated_pages):
253
- if p_idx > 0:
254
- story.append(PageBreak())
255
  for blk in blocks:
256
  safe = html_escape(blk).replace("\n", "<br/>")
257
  story.append(RLParagraph(safe, body))
@@ -267,7 +307,7 @@ def translate_pdf_bytes(file_bytes: bytes) -> bytes:
267
  translated_pages.append(t_blocks)
268
  return build_pdf_from_blocks(translated_pages)
269
 
270
- # --- Gradio file handler ---
271
  def translate_document(file_path: str):
272
  if not file_path:
273
  return None, "Veuillez sélectionner un fichier .docx ou .pdf"
@@ -275,23 +315,24 @@ def translate_document(file_path: str):
275
  name = os.path.basename(file_path)
276
  with open(file_path, "rb") as f:
277
  data = f.read()
 
278
  if name.lower().endswith(".docx"):
279
  out_bytes = translate_docx_bytes(data)
280
  out_path = "translated_ngambay.docx"
281
- with open(out_path, "wb") as f:
282
- f.write(out_bytes)
283
  return out_path, "✅ Traduction DOCX terminée (paragraphes justifiés)."
 
284
  if name.lower().endswith(".pdf"):
285
  out_bytes = translate_pdf_bytes(data)
286
  out_path = "translated_ngambay.pdf"
287
- with open(out_path, "wb") as f:
288
- f.write(out_bytes)
289
  return out_path, "✅ Traduction PDF terminée (paragraphes justifiés)."
 
290
  return None, "Type de fichier non supporté. Choisissez .docx ou .pdf"
291
  except Exception as e:
292
  return None, f"❌ Erreur pendant la traduction: {e}"
293
 
294
- # --- UI ---
295
  theme = gr.themes.Soft(
296
  primary_hue="indigo",
297
  radius_size="lg",
@@ -303,9 +344,9 @@ theme = gr.themes.Soft(
303
 
304
  CUSTOM_CSS = """
305
  .gradio-container {max-width: 980px !important;}
306
- .header-card {
307
- background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%);
308
- color: white; padding: 22px; border-radius: 18px;
309
  box-shadow: 0 10px 30px rgba(79,70,229,.25);
310
  transition: transform .2s ease;
311
  }
@@ -313,9 +354,9 @@ CUSTOM_CSS = """
313
  .header-title { font-size: 26px; font-weight: 800; margin: 0 0 6px 0; letter-spacing: .2px; }
314
  .header-sub { opacity: .98; font-size: 14px; }
315
  .brand { display:flex; align-items:center; gap:10px; justify-content:space-between; flex-wrap:wrap; }
316
- .badge {
317
- display:inline-block; background: rgba(255,255,255,.18);
318
- padding: 4px 10px; border-radius: 999px; font-size: 12px;
319
  border: 1px solid rgba(255,255,255,.25);
320
  }
321
  .footer-note { margin-top: 8px; color: #64748b; font-size: 12px; text-align: center; }
@@ -355,7 +396,9 @@ with gr.Blocks(
355
  </div>
356
  """
357
  )
 
358
  with gr.Tabs():
 
359
  with gr.Tab("Traduction de texte"):
360
  with gr.Row():
361
  with gr.Column(scale=5):
@@ -386,19 +429,24 @@ with gr.Blocks(
386
  show_copy_button=True
387
  )
388
  gr.Markdown('<div class="footer-note">Astuce : collez un paragraphe complet pour un meilleur contexte.</div>')
 
 
389
  with gr.Tab("Traduction de document (.docx / .pdf)"):
390
  with gr.Row():
391
  with gr.Column(scale=5):
392
  doc_inp = gr.File(
393
  label="Sélectionnez un document (.docx ou .pdf)",
394
  file_types=[".docx", ".pdf"],
395
- type="filepath"
396
  )
397
  run_doc = gr.Button("Traduire le document", variant="primary")
398
  with gr.Column(scale=5):
399
  doc_out = gr.File(label="Fichier traduit (télécharger)")
400
  doc_status = gr.Markdown("")
 
401
  run_doc.click(translate_document, inputs=doc_inp, outputs=[doc_out, doc_status])
 
 
402
  gr.HTML(
403
  """
404
  <div class="support-banner">
@@ -414,13 +462,17 @@ with gr.Blocks(
414
  </div>
415
  """
416
  )
 
 
417
  btn.click(translate_text_simple, inputs=src, outputs=tgt)
418
  clear_btn.click(lambda: ("", ""), outputs=[src, tgt])
419
 
420
  if __name__ == "__main__":
 
 
421
  demo.queue(default_concurrency_limit=4).launch(
422
- ssr_mode=False,
423
- share=False if os.environ.get("SPACE_ID") else True,
424
  server_name="0.0.0.0",
425
  server_port=int(os.environ.get("PORT", 7860)),
426
  show_error=True,
 
1
+ # ==== Français -> Ngambay Translator App (meta-safe on HF Spaces) ====
2
+
3
  import os
4
  import io
5
  import re
6
  from typing import List, Tuple
7
+
8
+ # --- Disable compile/dynamo/fake tensor paths EARLY (before torch import) ---
9
+ os.environ["TORCH_COMPILE_DISABLE"] = "1"
10
+ os.environ["TORCHDYNAMO_DISABLE"] = "1"
11
+ os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1")
12
+
13
  import torch
14
  import gradio as gr
15
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
16
+
17
+ # Try to hard-disable dynamo at runtime too (belt & suspenders)
18
+ try:
19
+ import torch._dynamo as dynamo
20
+ dynamo.config.suppress_errors = True
21
+ # wrap helper to decorate functions
22
+ def no_compile(fn):
23
+ return dynamo.disable(fn)
24
+ except Exception:
25
+ def no_compile(fn):
26
+ return fn
27
+
28
+ # --- DOCX (python-docx) ---
29
  import docx
30
  from docx.enum.text import WD_ALIGN_PARAGRAPH
31
  from docx.text.paragraph import Paragraph as DocxParagraph
32
+
33
+ # --- PDF read & write ---
34
  import fitz # PyMuPDF
35
  from reportlab.lib.pagesizes import A4
36
  from reportlab.lib.styles import getSampleStyleSheet
 
39
  from reportlab.lib.units import cm
40
  from html import escape as html_escape
41
 
 
 
 
 
 
42
  # --- Config ---
43
  MODEL_REPO = "Toadoum/ngambay-fr-v1"
44
+
45
+ # Prefer NLLB codes; auto-resolve alternates if needed.
46
  FR_CODE_PREFERRED = "fra_Latn" # French (NLLB)
47
+ FR_CODE_ALT = "fr_Latn" # Some custom models use this
48
  NG_CODE_PREFERRED = "sba_Latn" # Ngambay (Saba) Latin
49
 
50
  # --- Inference params ---
 
52
  TEMPERATURE = 0.0 # not used when do_sample=False
53
 
54
  # --- Device selection ---
55
+ device = 0 if torch.cuda.is_available() else -1
56
+ device_str = "cuda" if torch.cuda.is_available() else "cpu"
57
+
58
+ # ---------- Load model & tokenizer with META-SAFE path ----------
59
+ def _has_meta_tensors(m: torch.nn.Module) -> bool:
60
+ try:
61
+ return any(p.is_meta for p in m.parameters()) or any(b.is_meta for b in m.buffers())
62
+ except Exception:
63
+ # Fallback check by device type
64
+ return any(getattr(p, "device", None) and p.device.type == "meta" for p in m.parameters())
65
+
66
+ def _ensure_pad_token(tok, mdl):
67
+ if tok.pad_token_id is None:
68
+ if tok.eos_token is not None:
69
+ tok.pad_token = tok.eos_token
70
+ elif tok.unk_token is not None:
71
+ tok.pad_token = tok.unk_token
72
+ else:
73
+ tok.add_special_tokens({"pad_token": "<pad>"})
74
+ mdl.resize_token_embeddings(len(tok))
75
+ mdl.config.pad_token_id = tok.pad_token_id
76
+
77
+ def _load_model_and_tokenizer():
78
+ tok = AutoTokenizer.from_pretrained(MODEL_REPO)
79
+ # First load WITHOUT low_cpu_mem_usage to avoid meta-inits on some stacks
80
+ mdl = AutoModelForSeq2SeqLM.from_pretrained(
81
+ MODEL_REPO,
82
+ low_cpu_mem_usage=False, # critical: avoid meta init
83
+ torch_dtype=torch.float16 if torch.cuda.is_available() else None,
84
+ )
85
+ if _has_meta_tensors(mdl):
86
+ # Fallback: force a "real" load
87
+ del mdl
88
+ mdl = AutoModelForSeq2SeqLM.from_pretrained(
89
+ MODEL_REPO,
90
+ low_cpu_mem_usage=False,
91
+ torch_dtype=None, # ensure real tensors on CPU first
92
+ )
93
+
94
+ # Move model AFTER we've verified no meta weights
95
+ mdl = mdl.to(device_str)
96
+
97
+ # Ensure pad token to avoid generate() quirks
98
+ _ensure_pad_token(tok, mdl)
99
+ return tok, mdl
100
+
101
+ tokenizer, model = _load_model_and_tokenizer()
102
+
103
+ translator = pipeline(
104
+ task="translation",
105
+ model=model,
106
+ tokenizer=tokenizer,
107
+ device=device,
108
+ framework="pt",
109
+ )
110
+
111
  def _resolve_lang_code(preferred: str, alt: str | None) -> str:
112
  codes = getattr(tokenizer, "lang_code_to_id", None)
113
  if isinstance(codes, dict) and len(codes) > 0:
 
117
  return alt
118
  if hasattr(tokenizer, "get_lang_id"):
119
  try:
120
+ tokenizer.get_lang_id(preferred); return preferred
 
121
  except Exception:
122
  if alt:
123
  try:
124
+ tokenizer.get_lang_id(alt); return alt
 
125
  except Exception:
126
  pass
127
  return preferred
 
129
  FR_CODE = _resolve_lang_code(FR_CODE_PREFERRED, FR_CODE_ALT)
130
  NG_CODE = _resolve_lang_code(NG_CODE_PREFERRED, None)
131
 
132
+ # ---------- helpers ----------
133
  def _token_len(s: str) -> int:
134
  return len(tokenizer.encode(s, add_special_tokens=False))
135
 
 
145
  chunks, current = [], ""
146
  for sent in sentences:
147
  if not current:
148
+ current = sent; continue
 
149
  candidate = f"{current} {sent}"
150
  if _token_len(candidate) <= max_src_tokens:
151
  current = candidate
152
  else:
153
+ chunks.append(current.strip()); current = sent
 
154
  if current.strip():
155
  chunks.append(current.strip())
156
  return chunks if chunks else ([text] if text.strip() else [])
157
 
158
+ @no_compile
159
  def _translate_with_pipeline(text: str) -> str:
 
 
 
 
 
 
160
  out = translator(
161
  text,
162
  src_lang=FR_CODE,
 
167
  key = "translation_text" if "translation_text" in out[0] else "generated_text"
168
  return out[0][key]
169
 
170
+ @no_compile
171
  def _translate_with_generate(text: str) -> str:
172
+ # Set src language if supported
173
  if hasattr(tokenizer, "src_lang"):
174
  tokenizer.src_lang = FR_CODE
 
175
 
176
+ # Determine forced BOS for target language
177
  forced_bos = None
178
  lang2id = getattr(tokenizer, "lang_code_to_id", None)
179
  if isinstance(lang2id, dict) and NG_CODE in lang2id:
 
184
  except Exception:
185
  forced_bos = None
186
 
187
+ inputs = tokenizer(text, return_tensors="pt")
188
+ inputs = {k: v.to(device_str) for k, v in inputs.items()}
189
+
190
  gen_kwargs = dict(max_new_tokens=MAX_NEW_TOKENS, do_sample=False)
191
  if forced_bos is not None:
192
+ gen_kwargs["forced_bos_token_id"] = forced_bos
193
 
194
  with torch.no_grad():
195
  output_ids = model.generate(**inputs, **gen_kwargs)
 
196
  return tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
197
 
198
+ # ---------- Public translate APIs ----------
199
+ @no_compile
200
  def translate_text_simple(text: str) -> str:
201
  text = (text or "").strip()
202
  if not text:
203
  return ""
204
  try:
205
  return _translate_with_pipeline(text)
206
+ except Exception:
 
207
  return _translate_with_generate(text)
208
 
209
+ @no_compile
210
  def translate_large_text(text: str) -> str:
211
  chunks = chunk_text_for_translation(text)
212
  outputs = []
213
  for ch in chunks:
214
  try:
215
  outputs.append(_translate_with_pipeline(ch))
216
+ except Exception:
 
217
  outputs.append(_translate_with_generate(ch))
218
  return "\n".join(outputs).strip()
219
 
220
+ # ---------- DOCX helpers ----------
221
  def is_heading(par: DocxParagraph) -> Tuple[bool, int]:
222
  style_name = (par.style.name or "").lower()
223
  if not style_name:
 
233
  f = io.BytesIO(file_bytes)
234
  doc = docx.Document(f)
235
  new = docx.Document()
236
+
237
  for par in doc.paragraphs:
238
  text = par.text or ""
239
  if not text.strip():
240
+ new.add_paragraph(""); continue
 
241
  is_head, lvl = is_heading(par)
242
  translated = translate_large_text(text)
243
  if is_head:
244
  new.add_heading(translated, level=min(max(lvl, 1), 9))
245
  else:
246
  np = new.add_paragraph(translated)
247
+ try: np.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
248
+ except Exception: pass
249
+
 
250
  for table in doc.tables:
251
  new_table = new.add_table(rows=len(table.rows), cols=len(table.columns))
252
  for r_idx, row in enumerate(table.rows):
 
256
  tgt_cell = new_table.cell(r_idx, c_idx)
257
  tgt_cell.text = translated
258
  for p in tgt_cell.paragraphs:
259
+ try: p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
260
+ except Exception: pass
261
+
 
262
  out = io.BytesIO()
263
  new.save(out)
264
  return out.getvalue()
265
 
266
+ # ---------- PDF helpers ----------
267
  def extract_pdf_text_blocks(pdf_bytes: bytes) -> List[List[str]]:
268
  pages_blocks: List[List[str]] = []
269
  doc = fitz.open(stream=pdf_bytes, filetype="pdf")
270
  for page in doc:
271
  blocks = page.get_text("blocks") or []
272
+ blocks.sort(key=lambda b: (round(b[1], 1), round(b[0], 1))) # (y, x)
273
  page_texts = []
274
  for b in blocks:
275
  text = (b[4] if len(b) > 4 else "") or ""
 
288
  topMargin=2*cm, bottomMargin=2*cm
289
  )
290
  styles = getSampleStyleSheet()
291
+ body = styles["BodyText"]; body.alignment = TA_JUSTIFY; body.leading = 14
 
 
292
  story = []
293
  for p_idx, blocks in enumerate(translated_pages):
294
+ if p_idx > 0: story.append(PageBreak())
 
295
  for blk in blocks:
296
  safe = html_escape(blk).replace("\n", "<br/>")
297
  story.append(RLParagraph(safe, body))
 
307
  translated_pages.append(t_blocks)
308
  return build_pdf_from_blocks(translated_pages)
309
 
310
+ # ---------- Gradio file handler ----------
311
  def translate_document(file_path: str):
312
  if not file_path:
313
  return None, "Veuillez sélectionner un fichier .docx ou .pdf"
 
315
  name = os.path.basename(file_path)
316
  with open(file_path, "rb") as f:
317
  data = f.read()
318
+
319
  if name.lower().endswith(".docx"):
320
  out_bytes = translate_docx_bytes(data)
321
  out_path = "translated_ngambay.docx"
322
+ with open(out_path, "wb") as f: f.write(out_bytes)
 
323
  return out_path, "✅ Traduction DOCX terminée (paragraphes justifiés)."
324
+
325
  if name.lower().endswith(".pdf"):
326
  out_bytes = translate_pdf_bytes(data)
327
  out_path = "translated_ngambay.pdf"
328
+ with open(out_path, "wb") as f: f.write(out_bytes)
 
329
  return out_path, "✅ Traduction PDF terminée (paragraphes justifiés)."
330
+
331
  return None, "Type de fichier non supporté. Choisissez .docx ou .pdf"
332
  except Exception as e:
333
  return None, f"❌ Erreur pendant la traduction: {e}"
334
 
335
+ # ================== UI ==================
336
  theme = gr.themes.Soft(
337
  primary_hue="indigo",
338
  radius_size="lg",
 
344
 
345
  CUSTOM_CSS = """
346
  .gradio-container {max-width: 980px !important;}
347
+ .header-card {
348
+ background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%);
349
+ color: white; padding: 22px; border-radius: 18px;
350
  box-shadow: 0 10px 30px rgba(79,70,229,.25);
351
  transition: transform .2s ease;
352
  }
 
354
  .header-title { font-size: 26px; font-weight: 800; margin: 0 0 6px 0; letter-spacing: .2px; }
355
  .header-sub { opacity: .98; font-size: 14px; }
356
  .brand { display:flex; align-items:center; gap:10px; justify-content:space-between; flex-wrap:wrap; }
357
+ .badge {
358
+ display:inline-block; background: rgba(255,255,255,.18);
359
+ padding: 4px 10px; border-radius: 999px; font-size: 12px;
360
  border: 1px solid rgba(255,255,255,.25);
361
  }
362
  .footer-note { margin-top: 8px; color: #64748b; font-size: 12px; text-align: center; }
 
396
  </div>
397
  """
398
  )
399
+
400
  with gr.Tabs():
401
+ # -------- Tab 1: Texte --------
402
  with gr.Tab("Traduction de texte"):
403
  with gr.Row():
404
  with gr.Column(scale=5):
 
429
  show_copy_button=True
430
  )
431
  gr.Markdown('<div class="footer-note">Astuce : collez un paragraphe complet pour un meilleur contexte.</div>')
432
+
433
+ # -------- Tab 2: Documents --------
434
  with gr.Tab("Traduction de document (.docx / .pdf)"):
435
  with gr.Row():
436
  with gr.Column(scale=5):
437
  doc_inp = gr.File(
438
  label="Sélectionnez un document (.docx ou .pdf)",
439
  file_types=[".docx", ".pdf"],
440
+ type="filepath" # returns temp filepath
441
  )
442
  run_doc = gr.Button("Traduire le document", variant="primary")
443
  with gr.Column(scale=5):
444
  doc_out = gr.File(label="Fichier traduit (télécharger)")
445
  doc_status = gr.Markdown("")
446
+
447
  run_doc.click(translate_document, inputs=doc_inp, outputs=[doc_out, doc_status])
448
+
449
+ # Contribution banner
450
  gr.HTML(
451
  """
452
  <div class="support-banner">
 
462
  </div>
463
  """
464
  )
465
+
466
+ # Text actions
467
  btn.click(translate_text_simple, inputs=src, outputs=tgt)
468
  clear_btn.click(lambda: ("", ""), outputs=[src, tgt])
469
 
470
  if __name__ == "__main__":
471
+ # On HF Spaces: disable SSR and don't use share=True
472
+ on_spaces = bool(os.environ.get("SPACE_ID"))
473
  demo.queue(default_concurrency_limit=4).launch(
474
+ ssr_mode=False, # key fix for meta tensors
475
+ share=False if on_spaces else True, # share=True not supported on Spaces
476
  server_name="0.0.0.0",
477
  server_port=int(os.environ.get("PORT", 7860)),
478
  show_error=True,