Spaces:

marcosremar2
/

llama-omni

Build error

App Files Files Community

llama-omni / app_gradio_spaces.py

marcosremar2

dfdff

92853fc 21 days ago

raw

history blame contribute delete

10.1 kB

	import os
	import sys
	import subprocess
	import threading
	import time
	import gradio as gr

	# Configure environment for HF Spaces
	HF_SPACES = os.environ.get("SPACE_ID") is not None
	MODEL_PATH = os.environ.get("MODEL_PATH", "ICTNLP/Llama-3.1-8B-Omni")
	DEVICE = "cuda" if os.environ.get("SYSTEM_CUDA_VISIBLE_DEVICES") else "cpu"

	def run_background_process(cmd, name):
	"""Run a background process and return the process object."""
	print(f"Starting {name}...")
	process = subprocess.Popen(
	cmd,
	stdout=subprocess.PIPE,
	stderr=subprocess.STDOUT,
	text=True,
	bufsize=1,
	universal_newlines=True,
	shell=True
	)
	return process

	def read_process_output(process, output_box, name):
	"""Read and update the output from a process."""
	full_output = f"### {name} Output:\n\n"
	for line in process.stdout:
	full_output += line
	output_box.update(value=full_output)

	# Process ended
	return_code = process.wait()
	full_output += f"\n\nProcess exited with code {return_code}"
	output_box.update(value=full_output)

	def setup_environment():
	"""Set up the environment by installing dependencies and downloading models."""
	# Create necessary directories
	os.makedirs("models/speech_encoder", exist_ok=True)
	os.makedirs("vocoder", exist_ok=True)

	output = "Setting up environment...\n"

	# Install dependencies only if not in HF Space (they're pre-installed there)
	if not HF_SPACES:
	output += "Installing dependencies...\n"
	subprocess.run("pip install openai-whisper>=20231117", shell=True)
	subprocess.run("pip install fairseq==0.12.2", shell=True)

	# Download vocoder if needed
	if not os.path.exists("vocoder/g_00500000"):
	output += "Downloading vocoder...\n"
	subprocess.run(
	"wget https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/g_00500000 -P vocoder/",
	shell=True
	)
	subprocess.run(
	"wget https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/config.json -P vocoder/",
	shell=True
	)

	# Initialize Whisper (it will be downloaded automatically)
	output += "Initializing Whisper model (this may take a while)...\n"
	try:
	import whisper
	whisper.load_model("tiny", download_root="models/speech_encoder/")
	output += "✅ Whisper model initialized successfully!\n"
	except Exception as e:
	output += f"❌ Error initializing Whisper model: {str(e)}\n"

	return output + "✅ Environment setup complete!"

	def start_services():
	"""Start the controller, model worker, and web server."""
	output = "Starting LLaMA-Omni services...\n"

	# Start the controller
	controller_cmd = "python -m omni_speech.serve.controller --host 0.0.0.0 --port 10000"
	controller_process = run_background_process(controller_cmd, "Controller")
	output += "✅ Controller started\n"

	# Wait for controller to start
	time.sleep(5)

	# Start the model worker
	worker_cmd = f"python -m omni_speech.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path {MODEL_PATH} --model-name Llama-3.1-8B-Omni --s2s"
	model_worker_process = run_background_process(worker_cmd, "Model Worker")
	output += f"✅ Model worker started with model: {MODEL_PATH}\n"

	# Wait for model worker to start
	time.sleep(10)

	# Start the web server (this is handled separately since we're using the Gradio UI directly)
	output += "✅ All services started successfully!\n"

	# Keep references to processes to prevent garbage collection
	global controller_proc, worker_proc
	controller_proc = controller_process
	worker_proc = model_worker_process

	return output

	def create_chat_ui(setup_status="Not started", services_status="Not started"):
	"""Create the chat interface for LLaMA-Omni."""
	with gr.Blocks() as demo:
	gr.Markdown("# 🦙🎧 LLaMA-Omni: Seamless Speech Interaction")

	# Setup and status
	with gr.Row():
	with gr.Column(scale=1):
	setup_btn = gr.Button("1️⃣ Setup Environment")
	services_btn = gr.Button("2️⃣ Start LLaMA-Omni Services", interactive=False)

	with gr.Column(scale=2):
	setup_output = gr.Textbox(label="Setup Status", value=setup_status, lines=5)
	services_output = gr.Textbox(label="Services Status", value=services_status, lines=5)

	# Chat interface
	with gr.Tabs():
	with gr.TabItem("Speech Input"):
	audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or upload audio")
	transcription_output = gr.Textbox(label="Transcription", interactive=False)
	submit_audio_btn = gr.Button("Submit Audio", interactive=False)

	with gr.TabItem("Text Input"):
	text_input = gr.Textbox(label="Text Input", placeholder="Type your message here...")
	submit_text_btn = gr.Button("Submit Text", interactive=False)


	# Output area
	with gr.Row():
	with gr.Column():
	chatbot = gr.Chatbot(label="Conversation", height=400, type="messages")
	audio_output = gr.Audio(label="Generated Speech", interactive=False)

	# Function to handle setup button
	def on_setup_click():
	output_message = setup_environment()
	return {
	setup_output: gr.update(value=output_message),
	services_btn: gr.update(interactive=True)
	}

	# Function to handle services button
	def on_services_click():
	output_message = start_services()
	return {
	services_output: gr.update(value=output_message),
	submit_audio_btn: gr.update(interactive=True),
	submit_text_btn: gr.update(interactive=True)
	}

	# Placeholder functions for API calls (to be implemented)
	def on_audio_input(audio):
	if audio:
	# This would use Whisper to transcribe
	return "Transcription will appear here when services are running."
	return ""

	def on_audio_submit(audio, chat_history):
	if not audio:
	# Ensure chat_history is returned even if no audio
	return chat_history if chat_history is not None else [], None

	# Placeholder for actual transcription logic
	transcribed_text = "Audio input (transcription pending)"

	# Create new messages in the "messages" format
	new_messages = [
	{"role": "user", "content": transcribed_text},
	{"role": "assistant", "content": "This is a placeholder response. The full model will be running after starting the services."}
	]

	# Append new messages to existing history (or initialize if history is None)
	updated_history = (chat_history if chat_history is not None else []) + new_messages
	return updated_history, None

	def on_text_submit(text, chat_history):
	if not text:
	# Ensure chat_history is returned even if no text
	return chat_history if chat_history is not None else [], None

	# Create new messages in the "messages" format
	new_messages = [
	{"role": "user", "content": text},
	{"role": "assistant", "content": "This is a placeholder response. The full model will be running after starting the services."}
	]

	# Append new messages to existing history (or initialize if history is None)
	updated_history = (chat_history if chat_history is not None else []) + new_messages
	return updated_history, None

	# Connect events
	setup_btn.click(on_setup_click, outputs=[setup_output, services_btn])
	services_btn.click(on_services_click, outputs=[services_output, submit_audio_btn, submit_text_btn])

	audio_input.change(on_audio_input, [audio_input], [transcription_output])
	submit_audio_btn.click(on_audio_submit, [audio_input, chatbot], [chatbot, audio_output])
	submit_text_btn.click(on_text_submit, [text_input, chatbot], [chatbot, audio_output])

	# Auto-setup on HF Spaces
	if HF_SPACES:
	def perform_auto_setup_on_load():
	# Update UI to show setup is starting
	yield {
	setup_output: gr.update(value="Auto-starting setup process...")
	}

	# Actual setup call
	final_setup_status_message = setup_environment()

	# Update UI with final status and enable next button
	yield {
	setup_output: gr.update(value=final_setup_status_message),
	services_btn: gr.update(interactive=True)
	}

	demo.load(
	perform_auto_setup_on_load,
	None,
	[setup_output, services_btn]
	)

	return demo

	if __name__ == "__main__":
	# Global references to background processes
	controller_proc = None
	worker_proc = None

	# Build the UI
	demo = create_chat_ui()

	# Launch with appropriate parameters for HF Spaces
	demo.launch(
	server_name="0.0.0.0",
	server_port=int(os.environ.get("PORT", 7860)),
	share=False,
	favicon_path="https://huggingface.co/front/assets/huggingface_logo-noborder.ico"
	)