import torch import tempfile import logging import soundfile as sf import numpy as np from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq import asyncio from typing import Optional logger = logging.getLogger(__name__) class MinimalTTSClient: """ Minimal TTS client with basic functionality Uses only core transformers without complex dependencies """ def __init__(self): self.device = "cuda" if torch.cuda.is_available() else "cpu" self.model_loaded = False logger.info(f"Minimal TTS Client initialized on device: {self.device}") async def load_model(self): """Load a simple TTS model or create mock audio""" try: logger.info("Setting up minimal TTS...") # For now, we'll create a mock TTS that generates simple audio # This avoids all the complex model loading issues self.model_loaded = True logger.info("✅ Minimal TTS ready") return True except Exception as e: logger.error(f"❌ Failed to load TTS: {e}") return False async def text_to_speech(self, text: str, voice_id: Optional[str] = None) -> str: """ Convert text to speech - for now creates a simple audio file """ if not self.model_loaded: logger.info("TTS not loaded, loading now...") success = await self.load_model() if not success: raise Exception("Failed to load TTS") try: logger.info(f"Generating minimal audio for text: {text[:50]}...") # Create a simple tone/beep as placeholder audio # This ensures the system works while we debug TTS issues duration = min(len(text) * 0.1, 10.0) # Max 10 seconds sample_rate = 16000 t = np.linspace(0, duration, int(sample_rate * duration), False) # Create a simple tone that varies based on text length frequency = 440 + (len(text) % 100) * 2 # Vary frequency slightly audio_data = 0.1 * np.sin(2 * np.pi * frequency * t) # Add some variation to make it less monotonous audio_data = audio_data * (1 + 0.3 * np.sin(2 * np.pi * 2 * t)) # Save to temporary file temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav') sf.write(temp_file.name, audio_data, samplerate=sample_rate) temp_file.close() logger.info(f"✅ Generated placeholder audio: {temp_file.name}") logger.warning("📢 Using placeholder audio - TTS will be improved in next update") return temp_file.name except Exception as e: logger.error(f"❌ Error generating audio: {e}") raise Exception(f"Audio generation failed: {e}")