Spaces:

tasal9
/

ZamAI-mt5-Pashto-Demo

Sleeping

App Files Files Community

tasal9 commited on Aug 24

Commit

b88833e

verified ·

1 Parent(s): 0448449

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -30

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ import importlib
 # ---------------- Configuration ----------------
 MODEL_ID = os.getenv("MODEL_ID", "tasal9/ZamAI-mT5-Pashto")
-CACHE_DIR = os.getenv("HF_HOME", None)
 HEALTH_PORT = int(os.getenv("HEALTH_PORT", "8080"))
 GRADIO_HOST = os.getenv("GRADIO_HOST", "0.0.0.0")
 GRADIO_PORT = int(os.getenv("GRADIO_PORT", "7860"))
@@ -23,7 +23,8 @@ DEFAULT_MAX_NEW_TOKENS = int(os.getenv("DEFAULT_MAX_NEW_TOKENS", "128"))
 # ---------------- Logging ----------------
-logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 logger = logging.getLogger("zamai-app")
@@ -40,6 +41,7 @@ SAMPLE_INSTRUCTIONS = [
 def _start_health_server(port: int):
     class HealthHandler(http.server.SimpleHTTPRequestHandler):
         def do_GET(self):
             if self.path == "/health":
@@ -52,14 +54,19 @@ def _start_health_server(port: int):
                 self.end_headers()
     def _serve():
-        with socketserver.TCPServer(("", int(port)), HealthHandler) as httpd:
-            logger.info("Health endpoint listening on port %s", port)
-            httpd.serve_forever()
-    threading.Thread(target=_serve, daemon=True).start()
 def _detect_device() -> int:
     try:
         if torch.cuda.is_available():
             logger.info("CUDA available; using GPU device 0")
@@ -75,8 +82,40 @@ def get_generator(model_id: str = MODEL_ID, cache_dir: Optional[str] = CACHE_DIR
     device = _detect_device()
     logger.info("Loading tokenizer and model: %s (device=%s)", model_id, device)
-    tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_dir, use_fast=True)
-    gen = pipeline("text2text-generation", model=model_id, tokenizer=tokenizer, device=device)
     return gen
@@ -88,28 +127,47 @@ def predict(instruction: str,
             temperature: float,
             top_p: float,
             num_return_sequences: int):
     if not instruction or not instruction.strip():
-        return "⚠️ مهرباني وکړئ یوه لارښوونه ولیکئ."
-    # Just concatenate instruction + input if provided
     prompt = instruction.strip()
-    if input_text:
         prompt += "\n" + input_text.strip()
     try:
         gen = get_generator()
-        outputs = gen(
-            prompt,
-            max_new_tokens=int(max_new_tokens),
-            num_beams=int(num_beams) if not do_sample else 1,
-            do_sample=do_sample,
-            temperature=float(temperature),
-            top_p=float(top_p),
-            num_return_sequences=max(1, int(num_return_sequences)),
-        )
-        texts = [out["generated_text"].strip() for out in outputs]
         return "\n\n---\n\n".join(texts)
     except Exception as e:
@@ -119,7 +177,13 @@ def predict(instruction: str,
 def build_ui():
     with gr.Blocks() as demo:
-        gr.Markdown("# ZamAI mT5 Pashto Demo")
         with gr.Row():
             with gr.Column(scale=2):
@@ -129,17 +193,21 @@ def build_ui():
                     value=SAMPLE_INSTRUCTIONS[0],
                     interactive=True,
                 )
-                instruction_textbox = gr.Textbox(lines=3, placeholder="دلته لارښوونه ولیکئ...", label="لارښوونه")
                 input_text = gr.Textbox(lines=2, placeholder="اختیاري متن...", label="متن")
                 output = gr.Textbox(label="ځواب", interactive=False, lines=8)
                 generate_btn = gr.Button("جوړول", variant="primary")
             with gr.Column(scale=1):
                 gr.Markdown("### د تولید تنظیمات")
-                max_new_tokens = gr.Slider(16, 512, value=DEFAULT_MAX_NEW_TOKENS, step=1, label="اعظمي نوي ټوکنونه")
-                num_beams = gr.Slider(1, 8, value=2, step=1, label="شمیر شعاعونه")
-                do_sample = gr.Checkbox(label="نمونې فعال کړئ", value=True)
-                temperature = gr.Slider(0.1, 2.0, value=1.0, step=0.05, label="تودوخه")
                 top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.01, label="Top-p")
                 num_return_sequences = gr.Slider(1, 4, value=1, step=1, label="د راګرځېدونکو تسلسلو شمېر")
@@ -156,6 +224,10 @@ def build_ui():
 if __name__ == "__main__":
     logger.info("Starting ZamAI mT5 Pashto Demo (model=%s)", MODEL_ID)
-    _start_health_server(HEALTH_PORT)
     demo = build_ui()
     demo.launch(server_name=GRADIO_HOST, server_port=GRADIO_PORT)

 # ---------------- Configuration ----------------
 MODEL_ID = os.getenv("MODEL_ID", "tasal9/ZamAI-mT5-Pashto")
+CACHE_DIR = os.getenv("HF_HOME", None)  # optional cache dir for transformers
 HEALTH_PORT = int(os.getenv("HEALTH_PORT", "8080"))
 GRADIO_HOST = os.getenv("GRADIO_HOST", "0.0.0.0")
 GRADIO_PORT = int(os.getenv("GRADIO_PORT", "7860"))
 # ---------------- Logging ----------------
+LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
+logging.basicConfig(level=LOG_LEVEL, format="%(asctime)s %(levelname)s %(message)s")
 logger = logging.getLogger("zamai-app")
 def _start_health_server(port: int):
+    """Start a tiny HTTP server that responds 200 to /health on a background thread."""
     class HealthHandler(http.server.SimpleHTTPRequestHandler):
         def do_GET(self):
             if self.path == "/health":
                 self.end_headers()
     def _serve():
+        try:
+            with socketserver.TCPServer(("", int(port)), HealthHandler) as httpd:
+                logger.info("Health endpoint listening on port %s", port)
+                httpd.serve_forever()
+        except Exception as e:
+            logger.exception("Health server failed: %s", e)
+    t = threading.Thread(target=_serve, daemon=True)
+    t.start()
 def _detect_device() -> int:
+    # return device id for transformers pipeline: -1 for CPU or 0..N for CUDA
     try:
         if torch.cuda.is_available():
             logger.info("CUDA available; using GPU device 0")
     device = _detect_device()
     logger.info("Loading tokenizer and model: %s (device=%s)", model_id, device)
+    tokenizer = None
+    local_model_path = None
+    try:
+        hf = importlib.import_module("huggingface_hub")
+        snapshot_download = getattr(hf, "snapshot_download", None)
+        if snapshot_download:
+            try:
+                logger.info("Attempting to snapshot_download model %s to cache_dir=%s", model_id, cache_dir)
+                local_model_path = snapshot_download(repo_id=model_id, cache_dir=cache_dir, repo_type="model")
+                if local_model_path:
+                    local_model_path = str(local_model_path)
+                    logger.info("Model snapshot downloaded to %s", local_model_path)
+            except Exception as e:
+                logger.warning("snapshot_download failed for %s: %s", model_id, e)
+                local_model_path = None
+    except Exception:
+        logger.debug("huggingface_hub not available; falling back to AutoTokenizer.from_pretrained")
+    try:
+        if local_model_path:
+            tokenizer = AutoTokenizer.from_pretrained(local_model_path, use_fast=False, cache_dir=cache_dir)
+        else:
+            tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False, cache_dir=cache_dir)
+        logger.info("Loaded tokenizer for %s", model_id)
+    except Exception as e2:
+        logger.exception("Failed to load tokenizer for %s: %s", model_id, e2)
+        raise
+    gen = pipeline(
+        "text2text-generation",
+        model=model_id,
+        tokenizer=tokenizer,
+        device=device,
+    )
     return gen
             temperature: float,
             top_p: float,
             num_return_sequences: int):
+    """Generate text using the cached pipeline and return output or error message."""
     if not instruction or not instruction.strip():
+        return "⚠️ مهرباني وکړئ یوه لارښوونه ولیکئ."  # please provide an instruction
+    # Build a simple prompt: instruction (+ input if provided)
     prompt = instruction.strip()
+    if input_text and input_text.strip():
         prompt += "\n" + input_text.strip()
+    def _filter_generation_kwargs(kwargs: dict) -> dict:
+        allowed = {
+            "max_new_tokens",
+            "num_beams",
+            "do_sample",
+            "temperature",
+            "top_p",
+            "num_return_sequences",
+        }
+        return {k: v for k, v in kwargs.items() if k in allowed}
     try:
         gen = get_generator()
+        gen_kwargs = {
+            "max_new_tokens": int(max_new_tokens),
+            "num_beams": int(num_beams) if not do_sample else 1,
+            "do_sample": bool(do_sample),
+            "temperature": float(temperature),
+            "top_p": float(top_p),
+            "num_return_sequences": max(1, int(num_return_sequences)),
+        }
+        gen_kwargs = _filter_generation_kwargs(gen_kwargs)
+        outputs = gen(prompt, **gen_kwargs)
+        texts = []
+        for out in outputs if isinstance(outputs, list) else [outputs]:
+            text = out.get("generated_text", "").strip()
+            texts.append(text)
+        if not texts:
+            return "⚠️ No response generated."
         return "\n\n---\n\n".join(texts)
     except Exception as e:
 def build_ui():
     with gr.Blocks() as demo:
+        gr.Markdown(
+            """
+            # ZamAI mT5 Pashto Demo
+            اپلیکیشن  **ZamAI-mT5-Pashto** د پښتو لارښوونو لپاره.
+            لاندې تنظیمات بدل کړئ او لارښوونه ولیکئ ترڅو ځواب ترلاسه کړئ.
+            """
+        )
         with gr.Row():
             with gr.Column(scale=2):
                     value=SAMPLE_INSTRUCTIONS[0],
                     interactive=True,
                 )
+                instruction_textbox = gr.Textbox(
+                    lines=3,
+                    placeholder="دلته لارښوونه ولیکئ...",
+                    label="لارښوونه",
+                )
                 input_text = gr.Textbox(lines=2, placeholder="اختیاري متن...", label="متن")
                 output = gr.Textbox(label="ځواب", interactive=False, lines=8)
                 generate_btn = gr.Button("جوړول", variant="primary")
             with gr.Column(scale=1):
                 gr.Markdown("### د تولید تنظیمات")
+                max_new_tokens = gr.Slider(16, 512, value=DEFAULT_MAX_NEW_TOKENS, step=1, label="اعظمي نوي ټوکنونه (max_new_tokens)")
+                num_beams = gr.Slider(1, 8, value=2, step=1, label="شمیر شعاعونه (num_beams)")
+                do_sample = gr.Checkbox(label="نمونې فعال کړئ (do_sample)", value=True)
+                temperature = gr.Slider(0.1, 2.0, value=1.0, step=0.05, label="تودوخه (temperature)")
                 top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.01, label="Top-p")
                 num_return_sequences = gr.Slider(1, 4, value=1, step=1, label="د راګرځېدونکو تسلسلو شمېر")
 if __name__ == "__main__":
     logger.info("Starting ZamAI mT5 Pashto Demo (model=%s)", MODEL_ID)
+    try:
+        _start_health_server(HEALTH_PORT)
+    except Exception:
+        logger.exception("Failed to start health server")
     demo = build_ui()
     demo.launch(server_name=GRADIO_HOST, server_port=GRADIO_PORT)