Spaces:

marcosremar2
/

llama-omni

Build error

marcosremar2 commited on 26 days ago

Commit

f4aa7db

1 Parent(s): ec8ce73

fddfddffd

Files changed (2) hide show

app.py CHANGED Viewed

@@ -192,13 +192,16 @@ if __name__ == "__main__":
     # Use the direct launcher to run LLaMA-Omni2 components
     print("Starting LLaMA-Omni2 with direct launcher...")
-    launcher_script = os.path.join(os.getcwd(), "launch_llama_omni2.py")
     if os.path.exists(launcher_script):
         try:
             # Make the launcher script executable
             os.chmod(launcher_script, 0o755)
             # Run the launcher script
             subprocess.run([sys.executable, launcher_script], check=True)
         except subprocess.CalledProcessError as e:

     # Use the direct launcher to run LLaMA-Omni2 components
     print("Starting LLaMA-Omni2 with direct launcher...")
+    # Get the absolute path to the launcher script
+    launcher_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "launch_llama_omni2.py")
     if os.path.exists(launcher_script):
         try:
             # Make the launcher script executable
             os.chmod(launcher_script, 0o755)
+            print(f"Running launcher script: {launcher_script}")
             # Run the launcher script
             subprocess.run([sys.executable, launcher_script], check=True)
         except subprocess.CalledProcessError as e:

launch_llama_omni2.py CHANGED Viewed

@@ -96,19 +96,6 @@ DEFAULT_PORT = 8000
     return True
-def run_extraction_script():
-    """Run the script to extract LLaMA-Omni2 components"""
-    print("=== Extracting LLaMA-Omni2 Scripts ===")
-    extract_script = "/home/user/app/extract_llama_omni2_scripts.py"
-    try:
-        subprocess.run([sys.executable, extract_script], check=True)
-        print("Extraction completed successfully")
-        return True
-    except subprocess.CalledProcessError as e:
-        print(f"Error running extraction script: {e}")
-        return False
 def start_controller():
     """Start the LLaMA-Omni2 controller directly"""
     print("=== Starting LLaMA-Omni2 Controller ===")
@@ -254,16 +241,25 @@ def start_gradio_server():
         import gradio as gr
         import threading
         from transformers import AutoModelForCausalLM, AutoTokenizer
         # Simple function to launch a basic Gradio interface
         def launch_simple_gradio():
             try:
                 print(f"Loading model from {LLAMA_OMNI2_MODEL_PATH}...")
                 tokenizer = AutoTokenizer.from_pretrained(LLAMA_OMNI2_MODEL_PATH)
-                model = AutoModelForCausalLM.from_pretrained(LLAMA_OMNI2_MODEL_PATH)
                 def generate_text(input_text):
-                    inputs = tokenizer(input_text, return_tensors="pt")
                     outputs = model.generate(inputs.input_ids, max_length=100)
                     return tokenizer.decode(outputs[0], skip_special_tokens=True)

     return True
 def start_controller():
     """Start the LLaMA-Omni2 controller directly"""
     print("=== Starting LLaMA-Omni2 Controller ===")
         import gradio as gr
         import threading
         from transformers import AutoModelForCausalLM, AutoTokenizer
+        import torch
         # Simple function to launch a basic Gradio interface
         def launch_simple_gradio():
             try:
                 print(f"Loading model from {LLAMA_OMNI2_MODEL_PATH}...")
+                # Check for CUDA availability
+                device = "cuda" if torch.cuda.is_available() else "cpu"
+                print(f"Using device: {device}")
+                if device == "cuda":
+                    print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
+                    print(f"CUDA Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
                 tokenizer = AutoTokenizer.from_pretrained(LLAMA_OMNI2_MODEL_PATH)
+                model = AutoModelForCausalLM.from_pretrained(LLAMA_OMNI2_MODEL_PATH).to(device)
                 def generate_text(input_text):
+                    inputs = tokenizer(input_text, return_tensors="pt").to(device)
                     outputs = model.generate(inputs.input_ids, max_length=100)
                     return tokenizer.decode(outputs[0], skip_special_tokens=True)