Spaces:
Build error
Build error
Commit
·
f4aa7db
1
Parent(s):
ec8ce73
fddfddffd
Browse files- app.py +4 -1
- launch_llama_omni2.py +11 -15
app.py
CHANGED
@@ -192,13 +192,16 @@ if __name__ == "__main__":
|
|
192 |
|
193 |
# Use the direct launcher to run LLaMA-Omni2 components
|
194 |
print("Starting LLaMA-Omni2 with direct launcher...")
|
195 |
-
|
|
|
|
|
196 |
|
197 |
if os.path.exists(launcher_script):
|
198 |
try:
|
199 |
# Make the launcher script executable
|
200 |
os.chmod(launcher_script, 0o755)
|
201 |
|
|
|
202 |
# Run the launcher script
|
203 |
subprocess.run([sys.executable, launcher_script], check=True)
|
204 |
except subprocess.CalledProcessError as e:
|
|
|
192 |
|
193 |
# Use the direct launcher to run LLaMA-Omni2 components
|
194 |
print("Starting LLaMA-Omni2 with direct launcher...")
|
195 |
+
|
196 |
+
# Get the absolute path to the launcher script
|
197 |
+
launcher_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "launch_llama_omni2.py")
|
198 |
|
199 |
if os.path.exists(launcher_script):
|
200 |
try:
|
201 |
# Make the launcher script executable
|
202 |
os.chmod(launcher_script, 0o755)
|
203 |
|
204 |
+
print(f"Running launcher script: {launcher_script}")
|
205 |
# Run the launcher script
|
206 |
subprocess.run([sys.executable, launcher_script], check=True)
|
207 |
except subprocess.CalledProcessError as e:
|
launch_llama_omni2.py
CHANGED
@@ -96,19 +96,6 @@ DEFAULT_PORT = 8000
|
|
96 |
|
97 |
return True
|
98 |
|
99 |
-
def run_extraction_script():
|
100 |
-
"""Run the script to extract LLaMA-Omni2 components"""
|
101 |
-
print("=== Extracting LLaMA-Omni2 Scripts ===")
|
102 |
-
extract_script = "/home/user/app/extract_llama_omni2_scripts.py"
|
103 |
-
|
104 |
-
try:
|
105 |
-
subprocess.run([sys.executable, extract_script], check=True)
|
106 |
-
print("Extraction completed successfully")
|
107 |
-
return True
|
108 |
-
except subprocess.CalledProcessError as e:
|
109 |
-
print(f"Error running extraction script: {e}")
|
110 |
-
return False
|
111 |
-
|
112 |
def start_controller():
|
113 |
"""Start the LLaMA-Omni2 controller directly"""
|
114 |
print("=== Starting LLaMA-Omni2 Controller ===")
|
@@ -254,16 +241,25 @@ def start_gradio_server():
|
|
254 |
import gradio as gr
|
255 |
import threading
|
256 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
257 |
|
258 |
# Simple function to launch a basic Gradio interface
|
259 |
def launch_simple_gradio():
|
260 |
try:
|
261 |
print(f"Loading model from {LLAMA_OMNI2_MODEL_PATH}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
tokenizer = AutoTokenizer.from_pretrained(LLAMA_OMNI2_MODEL_PATH)
|
263 |
-
model = AutoModelForCausalLM.from_pretrained(LLAMA_OMNI2_MODEL_PATH)
|
264 |
|
265 |
def generate_text(input_text):
|
266 |
-
inputs = tokenizer(input_text, return_tensors="pt")
|
267 |
outputs = model.generate(inputs.input_ids, max_length=100)
|
268 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
269 |
|
|
|
96 |
|
97 |
return True
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
def start_controller():
|
100 |
"""Start the LLaMA-Omni2 controller directly"""
|
101 |
print("=== Starting LLaMA-Omni2 Controller ===")
|
|
|
241 |
import gradio as gr
|
242 |
import threading
|
243 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
244 |
+
import torch
|
245 |
|
246 |
# Simple function to launch a basic Gradio interface
|
247 |
def launch_simple_gradio():
|
248 |
try:
|
249 |
print(f"Loading model from {LLAMA_OMNI2_MODEL_PATH}...")
|
250 |
+
# Check for CUDA availability
|
251 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
252 |
+
print(f"Using device: {device}")
|
253 |
+
|
254 |
+
if device == "cuda":
|
255 |
+
print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
|
256 |
+
print(f"CUDA Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
|
257 |
+
|
258 |
tokenizer = AutoTokenizer.from_pretrained(LLAMA_OMNI2_MODEL_PATH)
|
259 |
+
model = AutoModelForCausalLM.from_pretrained(LLAMA_OMNI2_MODEL_PATH).to(device)
|
260 |
|
261 |
def generate_text(input_text):
|
262 |
+
inputs = tokenizer(input_text, return_tensors="pt").to(device)
|
263 |
outputs = model.generate(inputs.input_ids, max_length=100)
|
264 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
265 |
|