llama-omni / app_gradio_spaces.py
marcosremar2's picture
dfdff
92853fc
import os
import sys
import subprocess
import threading
import time
import gradio as gr
# Configure environment for HF Spaces
HF_SPACES = os.environ.get("SPACE_ID") is not None
MODEL_PATH = os.environ.get("MODEL_PATH", "ICTNLP/Llama-3.1-8B-Omni")
DEVICE = "cuda" if os.environ.get("SYSTEM_CUDA_VISIBLE_DEVICES") else "cpu"
def run_background_process(cmd, name):
"""Run a background process and return the process object."""
print(f"Starting {name}...")
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
universal_newlines=True,
shell=True
)
return process
def read_process_output(process, output_box, name):
"""Read and update the output from a process."""
full_output = f"### {name} Output:\n\n"
for line in process.stdout:
full_output += line
output_box.update(value=full_output)
# Process ended
return_code = process.wait()
full_output += f"\n\nProcess exited with code {return_code}"
output_box.update(value=full_output)
def setup_environment():
"""Set up the environment by installing dependencies and downloading models."""
# Create necessary directories
os.makedirs("models/speech_encoder", exist_ok=True)
os.makedirs("vocoder", exist_ok=True)
output = "Setting up environment...\n"
# Install dependencies only if not in HF Space (they're pre-installed there)
if not HF_SPACES:
output += "Installing dependencies...\n"
subprocess.run("pip install openai-whisper>=20231117", shell=True)
subprocess.run("pip install fairseq==0.12.2", shell=True)
# Download vocoder if needed
if not os.path.exists("vocoder/g_00500000"):
output += "Downloading vocoder...\n"
subprocess.run(
"wget https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/g_00500000 -P vocoder/",
shell=True
)
subprocess.run(
"wget https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/config.json -P vocoder/",
shell=True
)
# Initialize Whisper (it will be downloaded automatically)
output += "Initializing Whisper model (this may take a while)...\n"
try:
import whisper
whisper.load_model("tiny", download_root="models/speech_encoder/")
output += "βœ… Whisper model initialized successfully!\n"
except Exception as e:
output += f"❌ Error initializing Whisper model: {str(e)}\n"
return output + "βœ… Environment setup complete!"
def start_services():
"""Start the controller, model worker, and web server."""
output = "Starting LLaMA-Omni services...\n"
# Start the controller
controller_cmd = "python -m omni_speech.serve.controller --host 0.0.0.0 --port 10000"
controller_process = run_background_process(controller_cmd, "Controller")
output += "βœ… Controller started\n"
# Wait for controller to start
time.sleep(5)
# Start the model worker
worker_cmd = f"python -m omni_speech.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path {MODEL_PATH} --model-name Llama-3.1-8B-Omni --s2s"
model_worker_process = run_background_process(worker_cmd, "Model Worker")
output += f"βœ… Model worker started with model: {MODEL_PATH}\n"
# Wait for model worker to start
time.sleep(10)
# Start the web server (this is handled separately since we're using the Gradio UI directly)
output += "βœ… All services started successfully!\n"
# Keep references to processes to prevent garbage collection
global controller_proc, worker_proc
controller_proc = controller_process
worker_proc = model_worker_process
return output
def create_chat_ui(setup_status="Not started", services_status="Not started"):
"""Create the chat interface for LLaMA-Omni."""
with gr.Blocks() as demo:
gr.Markdown("# πŸ¦™πŸŽ§ LLaMA-Omni: Seamless Speech Interaction")
# Setup and status
with gr.Row():
with gr.Column(scale=1):
setup_btn = gr.Button("1️⃣ Setup Environment")
services_btn = gr.Button("2️⃣ Start LLaMA-Omni Services", interactive=False)
with gr.Column(scale=2):
setup_output = gr.Textbox(label="Setup Status", value=setup_status, lines=5)
services_output = gr.Textbox(label="Services Status", value=services_status, lines=5)
# Chat interface
with gr.Tabs():
with gr.TabItem("Speech Input"):
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or upload audio")
transcription_output = gr.Textbox(label="Transcription", interactive=False)
submit_audio_btn = gr.Button("Submit Audio", interactive=False)
with gr.TabItem("Text Input"):
text_input = gr.Textbox(label="Text Input", placeholder="Type your message here...")
submit_text_btn = gr.Button("Submit Text", interactive=False)
# Output area
with gr.Row():
with gr.Column():
chatbot = gr.Chatbot(label="Conversation", height=400, type="messages")
audio_output = gr.Audio(label="Generated Speech", interactive=False)
# Function to handle setup button
def on_setup_click():
output_message = setup_environment()
return {
setup_output: gr.update(value=output_message),
services_btn: gr.update(interactive=True)
}
# Function to handle services button
def on_services_click():
output_message = start_services()
return {
services_output: gr.update(value=output_message),
submit_audio_btn: gr.update(interactive=True),
submit_text_btn: gr.update(interactive=True)
}
# Placeholder functions for API calls (to be implemented)
def on_audio_input(audio):
if audio:
# This would use Whisper to transcribe
return "Transcription will appear here when services are running."
return ""
def on_audio_submit(audio, chat_history):
if not audio:
# Ensure chat_history is returned even if no audio
return chat_history if chat_history is not None else [], None
# Placeholder for actual transcription logic
transcribed_text = "Audio input (transcription pending)"
# Create new messages in the "messages" format
new_messages = [
{"role": "user", "content": transcribed_text},
{"role": "assistant", "content": "This is a placeholder response. The full model will be running after starting the services."}
]
# Append new messages to existing history (or initialize if history is None)
updated_history = (chat_history if chat_history is not None else []) + new_messages
return updated_history, None
def on_text_submit(text, chat_history):
if not text:
# Ensure chat_history is returned even if no text
return chat_history if chat_history is not None else [], None
# Create new messages in the "messages" format
new_messages = [
{"role": "user", "content": text},
{"role": "assistant", "content": "This is a placeholder response. The full model will be running after starting the services."}
]
# Append new messages to existing history (or initialize if history is None)
updated_history = (chat_history if chat_history is not None else []) + new_messages
return updated_history, None
# Connect events
setup_btn.click(on_setup_click, outputs=[setup_output, services_btn])
services_btn.click(on_services_click, outputs=[services_output, submit_audio_btn, submit_text_btn])
audio_input.change(on_audio_input, [audio_input], [transcription_output])
submit_audio_btn.click(on_audio_submit, [audio_input, chatbot], [chatbot, audio_output])
submit_text_btn.click(on_text_submit, [text_input, chatbot], [chatbot, audio_output])
# Auto-setup on HF Spaces
if HF_SPACES:
def perform_auto_setup_on_load():
# Update UI to show setup is starting
yield {
setup_output: gr.update(value="Auto-starting setup process...")
}
# Actual setup call
final_setup_status_message = setup_environment()
# Update UI with final status and enable next button
yield {
setup_output: gr.update(value=final_setup_status_message),
services_btn: gr.update(interactive=True)
}
demo.load(
perform_auto_setup_on_load,
None,
[setup_output, services_btn]
)
return demo
if __name__ == "__main__":
# Global references to background processes
controller_proc = None
worker_proc = None
# Build the UI
demo = create_chat_ui()
# Launch with appropriate parameters for HF Spaces
demo.launch(
server_name="0.0.0.0",
server_port=int(os.environ.get("PORT", 7860)),
share=False,
favicon_path="https://huggingface.co/front/assets/huggingface_logo-noborder.ico"
)