Spaces:
Build error
Build error
File size: 8,598 Bytes
34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 1cd5253 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
import os
import sys
import subprocess
import threading
import time
import gradio as gr
# Configure environment for HF Spaces
HF_SPACES = os.environ.get("SPACE_ID") is not None
MODEL_PATH = os.environ.get("MODEL_PATH", "ICTNLP/Llama-3.1-8B-Omni")
DEVICE = "cuda" if os.environ.get("SYSTEM_CUDA_VISIBLE_DEVICES") else "cpu"
def run_background_process(cmd, name):
"""Run a background process and return the process object."""
print(f"Starting {name}...")
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
universal_newlines=True,
shell=True
)
return process
def read_process_output(process, output_box, name):
"""Read and update the output from a process."""
full_output = f"### {name} Output:\n\n"
for line in process.stdout:
full_output += line
output_box.update(value=full_output)
# Process ended
return_code = process.wait()
full_output += f"\n\nProcess exited with code {return_code}"
output_box.update(value=full_output)
def setup_environment():
"""Set up the environment by installing dependencies and downloading models."""
# Create necessary directories
os.makedirs("models/speech_encoder", exist_ok=True)
os.makedirs("vocoder", exist_ok=True)
output = "Setting up environment...\n"
# Install dependencies only if not in HF Space (they're pre-installed there)
if not HF_SPACES:
output += "Installing dependencies...\n"
subprocess.run("pip install openai-whisper>=20231117", shell=True)
subprocess.run("pip install fairseq==0.12.2", shell=True)
# Download vocoder if needed
if not os.path.exists("vocoder/g_00500000"):
output += "Downloading vocoder...\n"
subprocess.run(
"wget https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/g_00500000 -P vocoder/",
shell=True
)
subprocess.run(
"wget https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/config.json -P vocoder/",
shell=True
)
# Initialize Whisper (it will be downloaded automatically)
output += "Initializing Whisper model (this may take a while)...\n"
try:
import whisper
whisper.load_model("large-v3", download_root="models/speech_encoder/")
output += "β
Whisper model initialized successfully!\n"
except Exception as e:
output += f"β Error initializing Whisper model: {str(e)}\n"
return output + "β
Environment setup complete!"
def start_services():
"""Start the controller, model worker, and web server."""
output = "Starting LLaMA-Omni services...\n"
# Start the controller
controller_cmd = "python -m omni_speech.serve.controller --host 0.0.0.0 --port 10000"
controller_process = run_background_process(controller_cmd, "Controller")
output += "β
Controller started\n"
# Wait for controller to start
time.sleep(5)
# Start the model worker
worker_cmd = f"python -m omni_speech.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path {MODEL_PATH} --model-name Llama-3.1-8B-Omni --s2s"
model_worker_process = run_background_process(worker_cmd, "Model Worker")
output += f"β
Model worker started with model: {MODEL_PATH}\n"
# Wait for model worker to start
time.sleep(10)
# Start the web server (this is handled separately since we're using the Gradio UI directly)
output += "β
All services started successfully!\n"
# Keep references to processes to prevent garbage collection
global controller_proc, worker_proc
controller_proc = controller_process
worker_proc = model_worker_process
return output
def create_chat_ui(setup_status="Not started", services_status="Not started"):
"""Create the chat interface for LLaMA-Omni."""
with gr.Blocks() as demo:
gr.Markdown("# π¦π§ LLaMA-Omni: Seamless Speech Interaction")
# Setup and status
with gr.Row():
with gr.Column(scale=1):
setup_btn = gr.Button("1οΈβ£ Setup Environment")
services_btn = gr.Button("2οΈβ£ Start LLaMA-Omni Services", interactive=False)
with gr.Column(scale=2):
setup_output = gr.Textbox(label="Setup Status", value=setup_status, lines=5)
services_output = gr.Textbox(label="Services Status", value=services_status, lines=5)
# Chat interface
with gr.Tabs():
with gr.TabItem("Speech Input"):
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or upload audio")
transcription_output = gr.Textbox(label="Transcription", interactive=False)
submit_audio_btn = gr.Button("Submit Audio", interactive=False)
with gr.TabItem("Text Input"):
text_input = gr.Textbox(label="Text Input", placeholder="Type your message here...")
submit_text_btn = gr.Button("Submit Text", interactive=False)
# Output area
with gr.Row():
with gr.Column():
chatbot = gr.Chatbot(label="Conversation", height=400)
audio_output = gr.Audio(label="Generated Speech", interactive=False)
# Function to handle setup button
def on_setup_click():
output = setup_environment()
return output, gr.Button.update(interactive=True)
# Function to handle services button
def on_services_click():
output = start_services()
return output, gr.Button.update(interactive=True), gr.Button.update(interactive=True)
# Placeholder functions for API calls (to be implemented)
def on_audio_input(audio):
if audio:
# This would use Whisper to transcribe
return "Transcription will appear here when services are running."
return ""
def on_audio_submit(audio, chat_history):
if not audio:
return chat_history, None
user_msg = "Audio message (transcription will be added when implemented)"
bot_msg = "This is a placeholder response. The full model will be running after starting the services."
history = chat_history + [(user_msg, bot_msg)]
return history, None
def on_text_submit(text, chat_history):
if not text:
return chat_history, None
history = chat_history + [(text, "This is a placeholder response. The full model will be running after starting the services.")]
return history, None
# Connect events
setup_btn.click(on_setup_click, outputs=[setup_output, services_btn])
services_btn.click(on_services_click, outputs=[services_output, submit_audio_btn, submit_text_btn])
audio_input.change(on_audio_input, [audio_input], [transcription_output])
submit_audio_btn.click(on_audio_submit, [audio_input, chatbot], [chatbot, audio_output])
submit_text_btn.click(on_text_submit, [text_input, chatbot], [chatbot, audio_output])
# Auto-setup on HF Spaces
if HF_SPACES:
# Run setup automatically in a separate thread
def auto_setup():
time.sleep(2) # Wait for UI to load
setup_output.update(value="Auto-starting setup process...")
setup_status = setup_environment()
setup_output.update(value=setup_status)
services_btn.update(interactive=True)
threading.Thread(target=auto_setup, daemon=True).start()
return demo
if __name__ == "__main__":
# Global references to background processes
controller_proc = None
worker_proc = None
# Build the UI
demo = create_chat_ui()
# Launch with appropriate parameters for HF Spaces
demo.launch(
server_name="0.0.0.0",
server_port=int(os.environ.get("PORT", 7860)),
share=False,
favicon_path="https://huggingface.co/front/assets/huggingface_logo-noborder.ico"
) |