Spaces:
Build error
Build error
import os | |
import sys | |
import subprocess | |
import threading | |
import time | |
import gradio as gr | |
# Configure environment for HF Spaces | |
HF_SPACES = os.environ.get("SPACE_ID") is not None | |
MODEL_PATH = os.environ.get("MODEL_PATH", "ICTNLP/Llama-3.1-8B-Omni") | |
DEVICE = "cuda" if os.environ.get("SYSTEM_CUDA_VISIBLE_DEVICES") else "cpu" | |
def run_background_process(cmd, name): | |
"""Run a background process and return the process object.""" | |
print(f"Starting {name}...") | |
process = subprocess.Popen( | |
cmd, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.STDOUT, | |
text=True, | |
bufsize=1, | |
universal_newlines=True, | |
shell=True | |
) | |
return process | |
def read_process_output(process, output_box, name): | |
"""Read and update the output from a process.""" | |
full_output = f"### {name} Output:\n\n" | |
for line in process.stdout: | |
full_output += line | |
output_box.update(value=full_output) | |
# Process ended | |
return_code = process.wait() | |
full_output += f"\n\nProcess exited with code {return_code}" | |
output_box.update(value=full_output) | |
def setup_environment(): | |
"""Set up the environment by installing dependencies and downloading models.""" | |
# Create necessary directories | |
os.makedirs("models/speech_encoder", exist_ok=True) | |
os.makedirs("vocoder", exist_ok=True) | |
output = "Setting up environment...\n" | |
# Install dependencies only if not in HF Space (they're pre-installed there) | |
if not HF_SPACES: | |
output += "Installing dependencies...\n" | |
subprocess.run("pip install openai-whisper>=20231117", shell=True) | |
subprocess.run("pip install fairseq==0.12.2", shell=True) | |
# Download vocoder if needed | |
if not os.path.exists("vocoder/g_00500000"): | |
output += "Downloading vocoder...\n" | |
subprocess.run( | |
"wget https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/g_00500000 -P vocoder/", | |
shell=True | |
) | |
subprocess.run( | |
"wget https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/config.json -P vocoder/", | |
shell=True | |
) | |
# Initialize Whisper (it will be downloaded automatically) | |
output += "Initializing Whisper model (this may take a while)...\n" | |
try: | |
import whisper | |
whisper.load_model("tiny", download_root="models/speech_encoder/") | |
output += "β Whisper model initialized successfully!\n" | |
except Exception as e: | |
output += f"β Error initializing Whisper model: {str(e)}\n" | |
return output + "β Environment setup complete!" | |
def start_services(): | |
"""Start the controller, model worker, and web server.""" | |
output = "Starting LLaMA-Omni services...\n" | |
# Start the controller | |
controller_cmd = "python -m omni_speech.serve.controller --host 0.0.0.0 --port 10000" | |
controller_process = run_background_process(controller_cmd, "Controller") | |
output += "β Controller started\n" | |
# Wait for controller to start | |
time.sleep(5) | |
# Start the model worker | |
worker_cmd = f"python -m omni_speech.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path {MODEL_PATH} --model-name Llama-3.1-8B-Omni --s2s" | |
model_worker_process = run_background_process(worker_cmd, "Model Worker") | |
output += f"β Model worker started with model: {MODEL_PATH}\n" | |
# Wait for model worker to start | |
time.sleep(10) | |
# Start the web server (this is handled separately since we're using the Gradio UI directly) | |
output += "β All services started successfully!\n" | |
# Keep references to processes to prevent garbage collection | |
global controller_proc, worker_proc | |
controller_proc = controller_process | |
worker_proc = model_worker_process | |
return output | |
def create_chat_ui(setup_status="Not started", services_status="Not started"): | |
"""Create the chat interface for LLaMA-Omni.""" | |
with gr.Blocks() as demo: | |
gr.Markdown("# π¦π§ LLaMA-Omni: Seamless Speech Interaction") | |
# Setup and status | |
with gr.Row(): | |
with gr.Column(scale=1): | |
setup_btn = gr.Button("1οΈβ£ Setup Environment") | |
services_btn = gr.Button("2οΈβ£ Start LLaMA-Omni Services", interactive=False) | |
with gr.Column(scale=2): | |
setup_output = gr.Textbox(label="Setup Status", value=setup_status, lines=5) | |
services_output = gr.Textbox(label="Services Status", value=services_status, lines=5) | |
# Chat interface | |
with gr.Tabs(): | |
with gr.TabItem("Speech Input"): | |
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or upload audio") | |
transcription_output = gr.Textbox(label="Transcription", interactive=False) | |
submit_audio_btn = gr.Button("Submit Audio", interactive=False) | |
with gr.TabItem("Text Input"): | |
text_input = gr.Textbox(label="Text Input", placeholder="Type your message here...") | |
submit_text_btn = gr.Button("Submit Text", interactive=False) | |
# Output area | |
with gr.Row(): | |
with gr.Column(): | |
chatbot = gr.Chatbot(label="Conversation", height=400, type="messages") | |
audio_output = gr.Audio(label="Generated Speech", interactive=False) | |
# Function to handle setup button | |
def on_setup_click(): | |
output_message = setup_environment() | |
return { | |
setup_output: gr.update(value=output_message), | |
services_btn: gr.update(interactive=True) | |
} | |
# Function to handle services button | |
def on_services_click(): | |
output_message = start_services() | |
return { | |
services_output: gr.update(value=output_message), | |
submit_audio_btn: gr.update(interactive=True), | |
submit_text_btn: gr.update(interactive=True) | |
} | |
# Placeholder functions for API calls (to be implemented) | |
def on_audio_input(audio): | |
if audio: | |
# This would use Whisper to transcribe | |
return "Transcription will appear here when services are running." | |
return "" | |
def on_audio_submit(audio, chat_history): | |
if not audio: | |
# Ensure chat_history is returned even if no audio | |
return chat_history if chat_history is not None else [], None | |
# Placeholder for actual transcription logic | |
transcribed_text = "Audio input (transcription pending)" | |
# Create new messages in the "messages" format | |
new_messages = [ | |
{"role": "user", "content": transcribed_text}, | |
{"role": "assistant", "content": "This is a placeholder response. The full model will be running after starting the services."} | |
] | |
# Append new messages to existing history (or initialize if history is None) | |
updated_history = (chat_history if chat_history is not None else []) + new_messages | |
return updated_history, None | |
def on_text_submit(text, chat_history): | |
if not text: | |
# Ensure chat_history is returned even if no text | |
return chat_history if chat_history is not None else [], None | |
# Create new messages in the "messages" format | |
new_messages = [ | |
{"role": "user", "content": text}, | |
{"role": "assistant", "content": "This is a placeholder response. The full model will be running after starting the services."} | |
] | |
# Append new messages to existing history (or initialize if history is None) | |
updated_history = (chat_history if chat_history is not None else []) + new_messages | |
return updated_history, None | |
# Connect events | |
setup_btn.click(on_setup_click, outputs=[setup_output, services_btn]) | |
services_btn.click(on_services_click, outputs=[services_output, submit_audio_btn, submit_text_btn]) | |
audio_input.change(on_audio_input, [audio_input], [transcription_output]) | |
submit_audio_btn.click(on_audio_submit, [audio_input, chatbot], [chatbot, audio_output]) | |
submit_text_btn.click(on_text_submit, [text_input, chatbot], [chatbot, audio_output]) | |
# Auto-setup on HF Spaces | |
if HF_SPACES: | |
def perform_auto_setup_on_load(): | |
# Update UI to show setup is starting | |
yield { | |
setup_output: gr.update(value="Auto-starting setup process...") | |
} | |
# Actual setup call | |
final_setup_status_message = setup_environment() | |
# Update UI with final status and enable next button | |
yield { | |
setup_output: gr.update(value=final_setup_status_message), | |
services_btn: gr.update(interactive=True) | |
} | |
demo.load( | |
perform_auto_setup_on_load, | |
None, | |
[setup_output, services_btn] | |
) | |
return demo | |
if __name__ == "__main__": | |
# Global references to background processes | |
controller_proc = None | |
worker_proc = None | |
# Build the UI | |
demo = create_chat_ui() | |
# Launch with appropriate parameters for HF Spaces | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=int(os.environ.get("PORT", 7860)), | |
share=False, | |
favicon_path="https://huggingface.co/front/assets/huggingface_logo-noborder.ico" | |
) |