import os import sys import subprocess import threading import time import gradio as gr def run_background_process(cmd, name): """Run a background process and return the process object.""" print(f"Starting {name}...") process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True, shell=True ) return process def read_process_output(process, output_box, name): """Read and update the output from a process.""" full_output = f"### {name} Output:\n\n" for line in process.stdout: full_output += line output_box.update(value=full_output) # Process ended return_code = process.wait() full_output += f"\n\nProcess exited with code {return_code}" output_box.update(value=full_output) def setup_environment(): """Set up the environment by installing dependencies and downloading models.""" # Create necessary directories os.makedirs("models/speech_encoder", exist_ok=True) os.makedirs("vocoder", exist_ok=True) # Download whisper model os.system("pip install openai-whisper>=20231117") os.system("pip install fairseq==0.12.2") # Download vocoder if not os.path.exists("vocoder/g_00500000"): os.system("wget https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/g_00500000 -P vocoder/") os.system("wget https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/config.json -P vocoder/") # Initialize Whisper (it will be downloaded automatically) os.system("python -c \"import whisper; whisper.load_model('large-v3', download_root='models/speech_encoder/')\"") return "✅ Environment setup complete!" def start_services(controller_output, model_worker_output, web_server_output): """Start the controller, model worker, and web server.""" # Start the controller controller_process = run_background_process( "python -m omni_speech.serve.controller --host 0.0.0.0 --port 10000", "Controller" ) # Start a thread to read controller output controller_thread = threading.Thread( target=read_process_output, args=(controller_process, controller_output, "Controller"), daemon=True ) controller_thread.start() # Wait for controller to start time.sleep(5) # Start the model worker model_worker_process = run_background_process( "python -m omni_speech.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path Llama-3.1-8B-Omni --model-name Llama-3.1-8B-Omni --s2s", "Model Worker" ) # Start a thread to read model worker output model_worker_thread = threading.Thread( target=read_process_output, args=(model_worker_process, model_worker_output, "Model Worker"), daemon=True ) model_worker_thread.start() # Wait for model worker to start time.sleep(10) # Start the web server web_server_process = run_background_process( "python -m omni_speech.serve.gradio_web_server --controller http://localhost:10000 --port 8001 --model-list-mode reload --vocoder vocoder/g_00500000 --vocoder-cfg vocoder/config.json", "Web Server" ) # Start a thread to read web server output web_server_thread = threading.Thread( target=read_process_output, args=(web_server_process, web_server_output, "Web Server"), daemon=True ) web_server_thread.start() # Wait for web server to start time.sleep(5) return "✅ All services started! Click the 'Open Interface' button below." def build_ui(): """Build the Gradio UI.""" with gr.Blocks() as demo: gr.Markdown("# 🦙🎧 LLaMA-Omni Deployment") with gr.Tab("Setup"): setup_btn = gr.Button("Setup Environment") setup_output = gr.Textbox(label="Setup Output", value="Click 'Setup Environment' to start.") setup_btn.click(setup_environment, outputs=setup_output) with gr.Tab("Services"): start_btn = gr.Button("Start LLaMA-Omni Services") status_output = gr.Textbox(label="Status", value="Click 'Start LLaMA-Omni Services' to begin.") with gr.Accordion("Service Logs", open=False): controller_output = gr.Markdown("Controller not started") model_worker_output = gr.Markdown("Model Worker not started") web_server_output = gr.Markdown("Web Server not started") start_btn.click( start_services, inputs=[], outputs=[status_output, controller_output, model_worker_output, web_server_output] ) interface_btn = gr.Button("Open Interface") interface_btn.click(lambda: gr.update(value="http://localhost:8001"), None, None) with gr.Tab("About"): gr.Markdown(""" # About LLaMA-Omni LLaMA-Omni is a speech-language model built upon Llama-3.1-8B-Instruct. It supports low-latency and high-quality speech interactions, simultaneously generating both text and speech responses based on speech instructions. ## Features * Built on Llama-3.1-8B-Instruct, ensuring high-quality responses * Low-latency speech interaction with a latency as low as 226ms * Simultaneous generation of both text and speech responses ## License This code is released under the Apache-2.0 License. The model is intended for academic research purposes only and may NOT be used for commercial purposes. Original work by Qingkai Fang, Shoutao Guo, Yan Zhou, Zhengrui Ma, Shaolei Zhang, Yang Feng. """) return demo if __name__ == "__main__": demo = build_ui() demo.launch(server_port=7860)