Spaces:
Build error
Build error
File size: 10,070 Bytes
34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 92853fc c57019c 34b8b49 c57019c 34b8b49 1cd5253 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c 34b8b49 c57019c cca3728 c57019c ecb392e c57019c 6b2acdf c57019c 6b2acdf c57019c 13e28fe 34b8b49 13e28fe 34b8b49 13e28fe c57019c 13e28fe 34b8b49 13e28fe c57019c ecb392e bd67da0 ecb392e bd67da0 ecb392e 34b8b49 c57019c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 |
import os
import sys
import subprocess
import threading
import time
import gradio as gr
# Configure environment for HF Spaces
HF_SPACES = os.environ.get("SPACE_ID") is not None
MODEL_PATH = os.environ.get("MODEL_PATH", "ICTNLP/Llama-3.1-8B-Omni")
DEVICE = "cuda" if os.environ.get("SYSTEM_CUDA_VISIBLE_DEVICES") else "cpu"
def run_background_process(cmd, name):
"""Run a background process and return the process object."""
print(f"Starting {name}...")
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
universal_newlines=True,
shell=True
)
return process
def read_process_output(process, output_box, name):
"""Read and update the output from a process."""
full_output = f"### {name} Output:\n\n"
for line in process.stdout:
full_output += line
output_box.update(value=full_output)
# Process ended
return_code = process.wait()
full_output += f"\n\nProcess exited with code {return_code}"
output_box.update(value=full_output)
def setup_environment():
"""Set up the environment by installing dependencies and downloading models."""
# Create necessary directories
os.makedirs("models/speech_encoder", exist_ok=True)
os.makedirs("vocoder", exist_ok=True)
output = "Setting up environment...\n"
# Install dependencies only if not in HF Space (they're pre-installed there)
if not HF_SPACES:
output += "Installing dependencies...\n"
subprocess.run("pip install openai-whisper>=20231117", shell=True)
subprocess.run("pip install fairseq==0.12.2", shell=True)
# Download vocoder if needed
if not os.path.exists("vocoder/g_00500000"):
output += "Downloading vocoder...\n"
subprocess.run(
"wget https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/g_00500000 -P vocoder/",
shell=True
)
subprocess.run(
"wget https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/config.json -P vocoder/",
shell=True
)
# Initialize Whisper (it will be downloaded automatically)
output += "Initializing Whisper model (this may take a while)...\n"
try:
import whisper
whisper.load_model("tiny", download_root="models/speech_encoder/")
output += "β
Whisper model initialized successfully!\n"
except Exception as e:
output += f"β Error initializing Whisper model: {str(e)}\n"
return output + "β
Environment setup complete!"
def start_services():
"""Start the controller, model worker, and web server."""
output = "Starting LLaMA-Omni services...\n"
# Start the controller
controller_cmd = "python -m omni_speech.serve.controller --host 0.0.0.0 --port 10000"
controller_process = run_background_process(controller_cmd, "Controller")
output += "β
Controller started\n"
# Wait for controller to start
time.sleep(5)
# Start the model worker
worker_cmd = f"python -m omni_speech.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path {MODEL_PATH} --model-name Llama-3.1-8B-Omni --s2s"
model_worker_process = run_background_process(worker_cmd, "Model Worker")
output += f"β
Model worker started with model: {MODEL_PATH}\n"
# Wait for model worker to start
time.sleep(10)
# Start the web server (this is handled separately since we're using the Gradio UI directly)
output += "β
All services started successfully!\n"
# Keep references to processes to prevent garbage collection
global controller_proc, worker_proc
controller_proc = controller_process
worker_proc = model_worker_process
return output
def create_chat_ui(setup_status="Not started", services_status="Not started"):
"""Create the chat interface for LLaMA-Omni."""
with gr.Blocks() as demo:
gr.Markdown("# π¦π§ LLaMA-Omni: Seamless Speech Interaction")
# Setup and status
with gr.Row():
with gr.Column(scale=1):
setup_btn = gr.Button("1οΈβ£ Setup Environment")
services_btn = gr.Button("2οΈβ£ Start LLaMA-Omni Services", interactive=False)
with gr.Column(scale=2):
setup_output = gr.Textbox(label="Setup Status", value=setup_status, lines=5)
services_output = gr.Textbox(label="Services Status", value=services_status, lines=5)
# Chat interface
with gr.Tabs():
with gr.TabItem("Speech Input"):
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or upload audio")
transcription_output = gr.Textbox(label="Transcription", interactive=False)
submit_audio_btn = gr.Button("Submit Audio", interactive=False)
with gr.TabItem("Text Input"):
text_input = gr.Textbox(label="Text Input", placeholder="Type your message here...")
submit_text_btn = gr.Button("Submit Text", interactive=False)
# Output area
with gr.Row():
with gr.Column():
chatbot = gr.Chatbot(label="Conversation", height=400, type="messages")
audio_output = gr.Audio(label="Generated Speech", interactive=False)
# Function to handle setup button
def on_setup_click():
output_message = setup_environment()
return {
setup_output: gr.update(value=output_message),
services_btn: gr.update(interactive=True)
}
# Function to handle services button
def on_services_click():
output_message = start_services()
return {
services_output: gr.update(value=output_message),
submit_audio_btn: gr.update(interactive=True),
submit_text_btn: gr.update(interactive=True)
}
# Placeholder functions for API calls (to be implemented)
def on_audio_input(audio):
if audio:
# This would use Whisper to transcribe
return "Transcription will appear here when services are running."
return ""
def on_audio_submit(audio, chat_history):
if not audio:
# Ensure chat_history is returned even if no audio
return chat_history if chat_history is not None else [], None
# Placeholder for actual transcription logic
transcribed_text = "Audio input (transcription pending)"
# Create new messages in the "messages" format
new_messages = [
{"role": "user", "content": transcribed_text},
{"role": "assistant", "content": "This is a placeholder response. The full model will be running after starting the services."}
]
# Append new messages to existing history (or initialize if history is None)
updated_history = (chat_history if chat_history is not None else []) + new_messages
return updated_history, None
def on_text_submit(text, chat_history):
if not text:
# Ensure chat_history is returned even if no text
return chat_history if chat_history is not None else [], None
# Create new messages in the "messages" format
new_messages = [
{"role": "user", "content": text},
{"role": "assistant", "content": "This is a placeholder response. The full model will be running after starting the services."}
]
# Append new messages to existing history (or initialize if history is None)
updated_history = (chat_history if chat_history is not None else []) + new_messages
return updated_history, None
# Connect events
setup_btn.click(on_setup_click, outputs=[setup_output, services_btn])
services_btn.click(on_services_click, outputs=[services_output, submit_audio_btn, submit_text_btn])
audio_input.change(on_audio_input, [audio_input], [transcription_output])
submit_audio_btn.click(on_audio_submit, [audio_input, chatbot], [chatbot, audio_output])
submit_text_btn.click(on_text_submit, [text_input, chatbot], [chatbot, audio_output])
# Auto-setup on HF Spaces
if HF_SPACES:
def perform_auto_setup_on_load():
# Update UI to show setup is starting
yield {
setup_output: gr.update(value="Auto-starting setup process...")
}
# Actual setup call
final_setup_status_message = setup_environment()
# Update UI with final status and enable next button
yield {
setup_output: gr.update(value=final_setup_status_message),
services_btn: gr.update(interactive=True)
}
demo.load(
perform_auto_setup_on_load,
None,
[setup_output, services_btn]
)
return demo
if __name__ == "__main__":
# Global references to background processes
controller_proc = None
worker_proc = None
# Build the UI
demo = create_chat_ui()
# Launch with appropriate parameters for HF Spaces
demo.launch(
server_name="0.0.0.0",
server_port=int(os.environ.get("PORT", 7860)),
share=False,
favicon_path="https://huggingface.co/front/assets/huggingface_logo-noborder.ico"
) |