Spaces:

Musabbirkm
/

ContentVoiceGen

Running

File size: 6,935 Bytes

import gradio as gr
import asyncio
import tempfile
import logging
import requests
from VOCALIS import Agent, ContentGenerator
from edgeTTsLang import languages


logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

def generate_the_content(content_type, language,output_style,content_length, theme, expectations):
    try:
        temperature_map = {
            "Precise (Deterministic)": 0.1,
            "Very Focused (Low Randomness)": 0.3,
            "Moderately Focused (Slight Randomness)": 0.4,
            "Balanced (Moderate Creativity)": 0.5,
            "Slightly Creative (Moderate Randomness)": 0.6,
            "Creative (High Randomness)": 0.7,
            "Highly Creative (Very High Randomness)": 0.8,
            "Experimental (Maximum Randomness)": 0.95,
        }
        temperature = temperature_map.get(output_style, 0.6)
        agent = Agent(model="gemini-2.0-flash", temperature=temperature)
        generator = ContentGenerator(agent, content_type, language, content_length, theme, expectations)
        output = generator.generate_content()

        return output

    except ValueError as ve:
        return f"Input Error: {ve}"
    except requests.exceptions.ConnectionError:
        return "Network Error: Could not connect to API. Please check your internet connection."
    except Exception as e:
        return f"General Error: {e}"

async def text_to_speech(text, voice, rate, pitch):
    import edge_tts
    if not text.strip():
        return None, "Please enter text to convert."
    if not voice:
        return None, "Please select a voice."
    rate_str = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    return tmp_path, None

async def tts_interface(content_type, language, voice, output_style, content_length, theme, Customization, rate, pitch):
    text_output = generate_the_content(content_type, language, output_style, content_length, theme, Customization)
    if text_output.startswith("Error:"):
        return None, None, gr.Markdown(text_output)

    audio_file, warning = await text_to_speech(text_output, languages[language][voice], rate, pitch)

    if warning:
        return text_output, gr.Markdown(warning)

    return text_output, audio_file, None

def create_demo():
    language_choices = list(languages.keys())

    custom_theme = gr.themes.Soft(
        primary_hue="indigo",
        secondary_hue="blue",
        neutral_hue="slate",
        radius_size=gr.themes.sizes.radius_sm,
        font=[gr.themes.GoogleFont("Montserrat"), "Arial", "sans-serif"],
    )

    demo = gr.Interface(
        fn=tts_interface,
        theme=custom_theme,
        inputs=[
            gr.Dropdown(label="Content Type", choices=[
                "story", "social", "news", "motivational", "explainer", "advertisement", "interview", "podcast",
                "testimonial", "comedy", "audiobook", "documentary", "meditation", "education", "poem", "recipe",
                "script", "summary", "email", "blog"
            ], value="story"),
            gr.Dropdown(label="Language", choices=language_choices, value=language_choices[0] if language_choices else ""),
            gr.Dropdown(label="Voice", choices=["Female", "Male"], value="Female"),
            gr.Dropdown(label="Output Style", choices=[
                "Precise (Deterministic)", "Very Focused (Low Randomness)", "Moderately Focused (Slight Randomness)",
                "Balanced (Moderate Creativity)", "Slightly Creative (Moderate Randomness)",
                "Creative (High Randomness)", "Highly Creative (Very High Randomness)",
                "Experimental (Maximum Randomness)"
            ], value="Balanced (Moderate Creativity)"),
            gr.Slider(label="Content Length (Words)", minimum=100, maximum=1000, value=200, step=10),
            gr.Dropdown(label="Theme/Nature (Optional)", choices=[
                "General/None", "Narrative/Storytelling", "Informative/Educational", "Descriptive/Atmospheric",
                "Persuasive/Argumentative", "Humorous/Comedic", "Emotional/Inspirational", "Technical/Scientific",
                "Historical/Cultural", "Modern/Contemporary", "Futuristic/Sci-Fi", "Fantasy/Mythical",
                "Mystery/Suspense", "Adventure/Exploration", "Realistic/Documentary", "Philosophical/Reflective",
                "Social/Relational", "Environmental/Nature", "Personal/Anecdotal"
            ], value="General/None"),
            gr.Textbox(label="Customization", placeholder="Add any extra information to help customize the generated content"),
            gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
        ],
        outputs=[
            gr.Textbox(label="Generated Text"),
            gr.Audio(label="Generated Audio", type="filepath"),
            gr.Markdown(label="Error/Warning", visible=True)
        ],
        title="✨ AI VoiceCraft: Text-to-Speech Studio 🎙️",
        description="""
        **Crafted by MusabirKm**
        
        🚀 Transform your text into captivating audio! 🚀  

        This tool generates AI-powered content and converts it into lifelike speech using Microsoft Edge TTS.  

        🔹 **Features at a Glance:**  
        🌍 Supports multiple languages and voices  
        🎚️ Adjust speech rate and pitch for natural delivery  
        📝 Generate dynamic content: stories, news, podcasts & more  
        🎭 Customize tone, length, and style to fit your needs  

        """,
        article="""  
        # 🌟 Welcome to AI VoiceCraft! 🌟  

        **Unleash the power of AI-driven text-to-speech.**  

        This advanced application blends **cutting-edge AI content generation** with high-quality speech synthesis to create immersive audio experiences.  

        ## 🎤 Key Highlights:  
        🔊 Natural and expressive voice output  
        📖 AI-powered script generation tailored for speech  
        ⚙️ Fine-tune pitch, rate, and delivery style  

        🔗 [Discover more AI tools@github/musabbirkm](https://github.com/musabbirkm) 
        🔗 [Follow MusabirKm on Hugging Face](https://huggingface.com/musabbirkm)   
        """,

        allow_flagging="never",
        api_name=None,
    )
    return demo

async def main():
    demo = create_demo()
    demo.queue(default_concurrency_limit=5)
    demo.launch(show_api=False)


if __name__ == "__main__":
    try:
        asyncio.run(main())
    except RuntimeError:
        import nest_asyncio
        nest_asyncio.apply()
        asyncio.run(main())