Spaces:

marcosremar2
/

llama-omni

Build error

App Files Files Community

marcosremar2 commited on 27 days ago

Commit

ec8ce73

1 Parent(s): a964a55

reerer

Browse files

Files changed (1) hide show

launch_llama_omni2.py +142 -86

launch_llama_omni2.py CHANGED Viewed

@@ -114,7 +114,7 @@ def start_controller():
     print("=== Starting LLaMA-Omni2 Controller ===")
     # First try to use our custom implementation
-    direct_controller_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "run_controller_directly.py")
     if os.path.exists(direct_controller_path):
         print(f"Using custom controller implementation: {direct_controller_path}")
         cmd = [
@@ -128,36 +128,72 @@ def start_controller():
         print(f"Controller started with PID: {process.pid}")
         return process
-    # Fall back to the extracted script
-    controller_path = os.path.join(EXTRACTION_DIR, "llama_omni2", "serve", "controller.py")
-    if not os.path.exists(controller_path):
-        print(f"Controller script not found at {controller_path}")
         return None
-    cmd = [
-        sys.executable, controller_path,
-        "--host", "0.0.0.0",
-        "--port", "10000"
-    ]
-    env = os.environ.copy()
-    if EXTRACTION_DIR not in env.get("PYTHONPATH", ""):
-        env["PYTHONPATH"] = f"{EXTRACTION_DIR}:{env.get('PYTHONPATH', '')}"
-    print(f"Running: {' '.join(cmd)}")
-    print(f"With PYTHONPATH: {env.get('PYTHONPATH')}")
-    process = subprocess.Popen(cmd, env=env)
-    print(f"Controller started with PID: {process.pid}")
-    return process
 def start_model_worker():
     """Start the LLaMA-Omni2 model worker directly"""
     print("=== Starting LLaMA-Omni2 Model Worker ===")
     # First try to use our custom implementation
-    direct_worker_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "run_model_worker_directly.py")
     if os.path.exists(direct_worker_path):
         print(f"Using custom model worker implementation: {direct_worker_path}")
         cmd = [
@@ -175,40 +211,27 @@ def start_model_worker():
         print(f"Model worker started with PID: {process.pid}")
         return process
-    # Fall back to the extracted script
-    model_worker_path = os.path.join(EXTRACTION_DIR, "llama_omni2", "serve", "model_worker.py")
-    if not os.path.exists(model_worker_path):
-        print(f"Model worker script not found at {model_worker_path}")
-        return None
-    cmd = [
-        sys.executable, model_worker_path,
-        "--host", "0.0.0.0",
-        "--controller", "http://localhost:10000",
-        "--port", "40000",
-        "--worker", "http://localhost:40000",
-        "--model-path", LLAMA_OMNI2_MODEL_PATH,
-        "--model-name", LLAMA_OMNI2_MODEL_NAME
-    ]
-    env = os.environ.copy()
-    if EXTRACTION_DIR not in env.get("PYTHONPATH", ""):
-        env["PYTHONPATH"] = f"{EXTRACTION_DIR}:{env.get('PYTHONPATH', '')}"
-    print(f"Running: {' '.join(cmd)}")
-    print(f"With PYTHONPATH: {env.get('PYTHONPATH')}")
-    process = subprocess.Popen(cmd, env=env)
-    print(f"Model worker started with PID: {process.pid}")
-    return process
 def start_gradio_server():
     """Start the LLaMA-Omni2 Gradio web server directly"""
     print("=== Starting LLaMA-Omni2 Gradio Server ===")
     # First try to use our custom implementation
-    direct_gradio_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "run_gradio_directly.py")
     if os.path.exists(direct_gradio_path):
         print(f"Using custom Gradio server implementation: {direct_gradio_path}")
         cmd = [
@@ -224,32 +247,59 @@ def start_gradio_server():
         print(f"Gradio server started with PID: {process.pid}")
         return process
-    # Fall back to the extracted script
-    gradio_server_path = os.path.join(EXTRACTION_DIR, "llama_omni2", "serve", "gradio_web_server.py")
-    if not os.path.exists(gradio_server_path):
-        print(f"Gradio server script not found at {gradio_server_path}")
         return None
-    cmd = [
-        sys.executable, gradio_server_path,
-        "--host", "0.0.0.0",
-        "--port", "7860",
-        "--controller-url", "http://localhost:10000",
-        "--model-list-mode", "reload",
-        "--vocoder-dir", COSYVOICE_PATH
-    ]
-    env = os.environ.copy()
-    if EXTRACTION_DIR not in env.get("PYTHONPATH", ""):
-        env["PYTHONPATH"] = f"{EXTRACTION_DIR}:{env.get('PYTHONPATH', '')}"
-    print(f"Running: {' '.join(cmd)}")
-    print(f"With PYTHONPATH: {env.get('PYTHONPATH')}")
-    process = subprocess.Popen(cmd, env=env)
-    print(f"Gradio server started with PID: {process.pid}")
-    return process
 def patch_extracted_files(extraction_dir):
     """Patch the extracted Python files to handle missing imports"""
@@ -338,25 +388,31 @@ def main():
     print("Checking and installing dependencies...")
     download_dependencies()
-    # Run extraction script if not already extracted
-    if not os.path.exists(os.path.join(EXTRACTION_DIR, "llama_omni2", "serve")):
-        if not run_extraction_script():
-            print("Failed to extract LLaMA-Omni2 scripts. Exiting.")
-            return 1
-    else:
-        print("LLaMA-Omni2 scripts already extracted.")
     # Ensure the module structure is complete
     ensure_module_structure(EXTRACTION_DIR)
-    # Patch the extracted Python files to handle missing imports
-    patch_extracted_files(EXTRACTION_DIR)
     # Add the extraction dir to Python path
     if EXTRACTION_DIR not in sys.path:
         sys.path.insert(0, EXTRACTION_DIR)
         print(f"Added {EXTRACTION_DIR} to sys.path")
     # Start controller
     controller_process = start_controller()
     if not controller_process:
@@ -365,7 +421,7 @@ def main():
     # Wait for controller to initialize
     print("Waiting for controller to initialize...")
-    time.sleep(15)
     # Start model worker
     model_worker_process = start_model_worker()
@@ -374,9 +430,9 @@ def main():
         controller_process.terminate()
         return 1
-    # Wait for model to load
-    print("Waiting for model to load (this may take several minutes)...")
-    time.sleep(300)
     # Start Gradio server
     gradio_process = start_gradio_server()

     print("=== Starting LLaMA-Omni2 Controller ===")
     # First try to use our custom implementation
+    direct_controller_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "controller.py")
     if os.path.exists(direct_controller_path):
         print(f"Using custom controller implementation: {direct_controller_path}")
         cmd = [
         print(f"Controller started with PID: {process.pid}")
         return process
+    # Fall back to a simple controller implementation
+    print("No controller script found. Implementing a simple controller...")
+    try:
+        from fastapi import FastAPI, HTTPException
+        import uvicorn
+        from pydantic import BaseModel
+        import threading
+        app = FastAPI()
+        class ModelInfo(BaseModel):
+            model_name: str
+            worker_name: str
+            worker_addr: str
+        # Simple in-memory storage
+        registered_models = {}
+        @app.get("/")
+        def read_root():
+            return {"status": "ok", "models": list(registered_models.keys())}
+        @app.get("/api/v1/models")
+        def list_models():
+            return {"models": list(registered_models.keys())}
+        @app.post("/api/v1/register_worker")
+        def register_worker(model_info: ModelInfo):
+            registered_models[model_info.model_name] = {
+                "worker_name": model_info.worker_name,
+                "worker_addr": model_info.worker_addr
+            }
+            return {"status": "ok"}
+        # Start a simple controller
+        def run_controller():
+            uvicorn.run(app, host="0.0.0.0", port=10000)
+        thread = threading.Thread(target=run_controller, daemon=True)
+        thread.start()
+        print("Simple controller started on port 10000")
+        # Return a dummy process for compatibility
+        class DummyProcess:
+            def __init__(self):
+                self.pid = 0
+            def terminate(self):
+                pass
+            def poll(self):
+                return None
+            def wait(self, timeout=None):
+                pass
+        return DummyProcess()
+    except ImportError as e:
+        print(f"Failed to create simple controller: {e}")
         return None
 def start_model_worker():
     """Start the LLaMA-Omni2 model worker directly"""
     print("=== Starting LLaMA-Omni2 Model Worker ===")
     # First try to use our custom implementation
+    direct_worker_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "model_worker.py")
     if os.path.exists(direct_worker_path):
         print(f"Using custom model worker implementation: {direct_worker_path}")
         cmd = [
         print(f"Model worker started with PID: {process.pid}")
         return process
+    # Fall back to a simple implementation
+    print("No model worker script found. Will try to start Gradio directly with the model.")
+    class DummyProcess:
+        def __init__(self):
+            self.pid = 0
+        def terminate(self):
+            pass
+        def poll(self):
+            return None
+        def wait(self, timeout=None):
+            pass
+    return DummyProcess()
 def start_gradio_server():
     """Start the LLaMA-Omni2 Gradio web server directly"""
     print("=== Starting LLaMA-Omni2 Gradio Server ===")
     # First try to use our custom implementation
+    direct_gradio_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "gradio_web_server.py")
     if os.path.exists(direct_gradio_path):
         print(f"Using custom Gradio server implementation: {direct_gradio_path}")
         cmd = [
         print(f"Gradio server started with PID: {process.pid}")
         return process
+    # Fall back to a simple Gradio implementation
+    print("No Gradio server found. Attempting to create a simple interface...")
+    try:
+        import gradio as gr
+        import threading
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        # Simple function to launch a basic Gradio interface
+        def launch_simple_gradio():
+            try:
+                print(f"Loading model from {LLAMA_OMNI2_MODEL_PATH}...")
+                tokenizer = AutoTokenizer.from_pretrained(LLAMA_OMNI2_MODEL_PATH)
+                model = AutoModelForCausalLM.from_pretrained(LLAMA_OMNI2_MODEL_PATH)
+                def generate_text(input_text):
+                    inputs = tokenizer(input_text, return_tensors="pt")
+                    outputs = model.generate(inputs.input_ids, max_length=100)
+                    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+                with gr.Blocks() as demo:
+                    gr.Markdown("# LLaMA-Omni2 Simple Interface")
+                    with gr.Tab("Text Generation"):
+                        input_text = gr.Textbox(label="Input Text")
+                        output_text = gr.Textbox(label="Generated Text")
+                        generate_btn = gr.Button("Generate")
+                        generate_btn.click(generate_text, inputs=input_text, outputs=output_text)
+                demo.launch(server_name="0.0.0.0", server_port=7860)
+            except Exception as e:
+                print(f"Error in simple Gradio interface: {e}")
+        thread = threading.Thread(target=launch_simple_gradio, daemon=True)
+        thread.start()
+        print("Simple Gradio interface started on port 7860")
+        class DummyProcess:
+            def __init__(self):
+                self.pid = 0
+            def terminate(self):
+                pass
+            def poll(self):
+                return None
+            def wait(self, timeout=None):
+                pass
+        return DummyProcess()
+    except ImportError as e:
+        print(f"Failed to create simple Gradio interface: {e}")
         return None
 def patch_extracted_files(extraction_dir):
     """Patch the extracted Python files to handle missing imports"""
     print("Checking and installing dependencies...")
     download_dependencies()
+    # Create directories directly instead of using extraction script
+    print("Creating necessary directories...")
+    os.makedirs(EXTRACTION_DIR, exist_ok=True)
+    os.makedirs(os.path.join(EXTRACTION_DIR, "llama_omni2"), exist_ok=True)
+    os.makedirs(os.path.join(EXTRACTION_DIR, "llama_omni2", "serve"), exist_ok=True)
     # Ensure the module structure is complete
     ensure_module_structure(EXTRACTION_DIR)
+    # Skip patching files as we're not extracting anything
+    print("Skipping file patching as we're not running extraction")
     # Add the extraction dir to Python path
     if EXTRACTION_DIR not in sys.path:
         sys.path.insert(0, EXTRACTION_DIR)
         print(f"Added {EXTRACTION_DIR} to sys.path")
+    # Skip directly to model download and starting services
+    print("Proceeding directly to model download and starting services...")
+    # Make directories for models
+    os.makedirs(MODELS_DIR, exist_ok=True)
+    os.makedirs(LLAMA_OMNI2_MODEL_PATH, exist_ok=True)
+    os.makedirs(COSYVOICE_PATH, exist_ok=True)
     # Start controller
     controller_process = start_controller()
     if not controller_process:
     # Wait for controller to initialize
     print("Waiting for controller to initialize...")
+    time.sleep(5)
     # Start model worker
     model_worker_process = start_model_worker()
         controller_process.terminate()
         return 1
+    # Wait for model to load - reduced from 300 seconds to 30 seconds
+    print("Waiting for model worker to initialize...")
+    time.sleep(30)
     # Start Gradio server
     gradio_process = start_gradio_server()