Spaces:

bravedims
/

AI_Avatar_Chat

Running

AI_Avatar_Chat / elevenlabs_integration.py

bravedims

Deploy OmniAvatar-14B with ElevenLabs TTS integration to Hugging Face Spaces

bd1f2b1 3 months ago

6.94 kB

	#!/usr/bin/env python3
	"""
	ElevenLabs + OmniAvatar Integration Example
	"""

	import requests
	import json
	import os
	from typing import Optional

	class ElevenLabsOmniAvatarClient:
	def __init__(self, elevenlabs_api_key: str, omni_avatar_base_url: str = "http://localhost:7860"):
	self.elevenlabs_api_key = elevenlabs_api_key
	self.omni_avatar_base_url = omni_avatar_base_url
	self.elevenlabs_base_url = "https://api.elevenlabs.io/v1"

	def text_to_speech_url(self, text: str, voice_id: str, model_id: str = "eleven_monolingual_v1") -> str:
	"""
	Generate speech from text using ElevenLabs and return the audio URL

	Args:
	text: Text to convert to speech
	voice_id: ElevenLabs voice ID
	model_id: ElevenLabs model ID

	Returns:
	URL to the generated audio file
	"""
	url = f"{self.elevenlabs_base_url}/text-to-speech/{voice_id}"

	headers = {
	"Accept": "audio/mpeg",
	"Content-Type": "application/json",
	"xi-api-key": self.elevenlabs_api_key
	}

	data = {
	"text": text,
	"model_id": model_id,
	"voice_settings": {
	"stability": 0.5,
	"similarity_boost": 0.5
	}
	}

	# Generate audio
	response = requests.post(url, json=data, headers=headers)

	if response.status_code != 200:
	raise Exception(f"ElevenLabs API error: {response.status_code} - {response.text}")

	# Save audio to temporary file and return a URL
	# In practice, you might upload this to a CDN or file server
	# For this example, we'll assume you have a way to serve the file

	# This is a placeholder - in real implementation, you would:
	# 1. Save the audio file
	# 2. Upload to a file server or CDN
	# 3. Return the public URL

	return f"{self.elevenlabs_base_url}/text-to-speech/{voice_id}?text={text}&model_id={model_id}"

	def generate_avatar(self,
	prompt: str,
	speech_text: str,
	voice_id: str,
	image_url: Optional[str] = None,
	guidance_scale: float = 5.0,
	audio_scale: float = 3.5,
	num_steps: int = 30) -> dict:
	"""
	Generate avatar video using ElevenLabs audio and OmniAvatar

	Args:
	prompt: Description of character behavior
	speech_text: Text to be spoken (sent to ElevenLabs)
	voice_id: ElevenLabs voice ID
	image_url: Optional reference image URL
	guidance_scale: Prompt guidance scale
	audio_scale: Audio guidance scale
	num_steps: Number of inference steps

	Returns:
	Generation result with video path and metadata
	"""

	try:
	# Step 1: Generate audio URL from ElevenLabs
	print(f"🎤 Generating speech with ElevenLabs...")
	print(f"Text: {speech_text}")
	print(f"Voice ID: {voice_id}")

	# Get audio URL from ElevenLabs
	elevenlabs_audio_url = self.text_to_speech_url(speech_text, voice_id)

	# Step 2: Generate avatar with OmniAvatar
	print(f"🎭 Generating avatar with OmniAvatar...")
	print(f"Prompt: {prompt}")

	avatar_data = {
	"prompt": prompt,
	"elevenlabs_audio_url": elevenlabs_audio_url,
	"guidance_scale": guidance_scale,
	"audio_scale": audio_scale,
	"num_steps": num_steps
	}

	if image_url:
	avatar_data["image_url"] = image_url
	print(f"Image URL: {image_url}")

	response = requests.post(f"{self.omni_avatar_base_url}/generate", json=avatar_data)

	if response.status_code != 200:
	raise Exception(f"OmniAvatar API error: {response.status_code} - {response.text}")

	result = response.json()

	print(f"✅ Avatar generated successfully!")
	print(f"Output: {result['output_path']}")
	print(f"Processing time: {result['processing_time']:.2f}s")

	return result

	except Exception as e:
	print(f"❌ Error generating avatar: {e}")
	raise

	def main():
	"""Example usage"""

	# Configuration
	ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY", "your-elevenlabs-api-key")
	OMNI_AVATAR_URL = os.getenv("OMNI_AVATAR_URL", "http://localhost:7860")

	if ELEVENLABS_API_KEY == "your-elevenlabs-api-key":
	print("⚠️ Please set your ELEVENLABS_API_KEY environment variable")
	print("Example: export ELEVENLABS_API_KEY='your-actual-api-key'")
	return

	# Initialize client
	client = ElevenLabsOmniAvatarClient(ELEVENLABS_API_KEY, OMNI_AVATAR_URL)

	# Example 1: Basic avatar generation
	print("=== Example 1: Basic Avatar Generation ===")
	try:
	result = client.generate_avatar(
	prompt="A friendly teacher explaining a concept with clear hand gestures",
	speech_text="Hello! Today we're going to learn about artificial intelligence and how it works.",
	voice_id="21m00Tcm4TlvDq8ikWAM", # Replace with your voice ID
	guidance_scale=5.0,
	audio_scale=4.0,
	num_steps=30
	)
	print(f"Video saved to: {result['output_path']}")
	except Exception as e:
	print(f"Example 1 failed: {e}")

	# Example 2: Avatar with reference image
	print("\n=== Example 2: Avatar with Reference Image ===")
	try:
	result = client.generate_avatar(
	prompt="A professional presenter speaking confidently to an audience",
	speech_text="Welcome to our presentation on the future of technology.",
	voice_id="21m00Tcm4TlvDq8ikWAM", # Replace with your voice ID
	image_url="https://example.com/professional-headshot.jpg", # Replace with actual image
	guidance_scale=5.5,
	audio_scale=3.5,
	num_steps=35
	)
	print(f"Video with reference image saved to: {result['output_path']}")
	except Exception as e:
	print(f"Example 2 failed: {e}")

	print("\n🎉 Integration examples completed!")
	print("\nTo use this script:")
	print("1. Set your ElevenLabs API key: export ELEVENLABS_API_KEY='your-key'")
	print("2. Start OmniAvatar API: python app.py")
	print("3. Run this script: python elevenlabs_integration.py")

	if __name__ == "__main__":
	main()