File size: 6,935 Bytes
9d1f8e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
093a373
 
 
9d1f8e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
093a373
 
9d1f8e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import gradio as gr
import asyncio
import tempfile
import logging
import requests
from VOCALIS import Agent, ContentGenerator
from edgeTTsLang import languages


logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

def generate_the_content(content_type, language,output_style,content_length, theme, expectations):
    try:
        temperature_map = {
            "Precise (Deterministic)": 0.1,
            "Very Focused (Low Randomness)": 0.3,
            "Moderately Focused (Slight Randomness)": 0.4,
            "Balanced (Moderate Creativity)": 0.5,
            "Slightly Creative (Moderate Randomness)": 0.6,
            "Creative (High Randomness)": 0.7,
            "Highly Creative (Very High Randomness)": 0.8,
            "Experimental (Maximum Randomness)": 0.95,
        }
        temperature = temperature_map.get(output_style, 0.6)
        agent = Agent(model="gemini-2.0-flash", temperature=temperature)
        generator = ContentGenerator(agent, content_type, language, content_length, theme, expectations)
        output = generator.generate_content()

        return output

    except ValueError as ve:
        return f"Input Error: {ve}"
    except requests.exceptions.ConnectionError:
        return "Network Error: Could not connect to API. Please check your internet connection."
    except Exception as e:
        return f"General Error: {e}"

async def text_to_speech(text, voice, rate, pitch):
    import edge_tts
    if not text.strip():
        return None, "Please enter text to convert."
    if not voice:
        return None, "Please select a voice."
    rate_str = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    return tmp_path, None

async def tts_interface(content_type, language, voice, output_style, content_length, theme, Customization, rate, pitch):
    text_output = generate_the_content(content_type, language, output_style, content_length, theme, Customization)
    if text_output.startswith("Error:"):
        return None, None, gr.Markdown(text_output)

    audio_file, warning = await text_to_speech(text_output, languages[language][voice], rate, pitch)

    if warning:
        return text_output, gr.Markdown(warning)

    return text_output, audio_file, None

def create_demo():
    language_choices = list(languages.keys())

    custom_theme = gr.themes.Soft(
        primary_hue="indigo",
        secondary_hue="blue",
        neutral_hue="slate",
        radius_size=gr.themes.sizes.radius_sm,
        font=[gr.themes.GoogleFont("Montserrat"), "Arial", "sans-serif"],
    )

    demo = gr.Interface(
        fn=tts_interface,
        theme=custom_theme,
        inputs=[
            gr.Dropdown(label="Content Type", choices=[
                "story", "social", "news", "motivational", "explainer", "advertisement", "interview", "podcast",
                "testimonial", "comedy", "audiobook", "documentary", "meditation", "education", "poem", "recipe",
                "script", "summary", "email", "blog"
            ], value="story"),
            gr.Dropdown(label="Language", choices=language_choices, value=language_choices[0] if language_choices else ""),
            gr.Dropdown(label="Voice", choices=["Female", "Male"], value="Female"),
            gr.Dropdown(label="Output Style", choices=[
                "Precise (Deterministic)", "Very Focused (Low Randomness)", "Moderately Focused (Slight Randomness)",
                "Balanced (Moderate Creativity)", "Slightly Creative (Moderate Randomness)",
                "Creative (High Randomness)", "Highly Creative (Very High Randomness)",
                "Experimental (Maximum Randomness)"
            ], value="Balanced (Moderate Creativity)"),
            gr.Slider(label="Content Length (Words)", minimum=100, maximum=1000, value=200, step=10),
            gr.Dropdown(label="Theme/Nature (Optional)", choices=[
                "General/None", "Narrative/Storytelling", "Informative/Educational", "Descriptive/Atmospheric",
                "Persuasive/Argumentative", "Humorous/Comedic", "Emotional/Inspirational", "Technical/Scientific",
                "Historical/Cultural", "Modern/Contemporary", "Futuristic/Sci-Fi", "Fantasy/Mythical",
                "Mystery/Suspense", "Adventure/Exploration", "Realistic/Documentary", "Philosophical/Reflective",
                "Social/Relational", "Environmental/Nature", "Personal/Anecdotal"
            ], value="General/None"),
            gr.Textbox(label="Customization", placeholder="Add any extra information to help customize the generated content"),
            gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
        ],
        outputs=[
            gr.Textbox(label="Generated Text"),
            gr.Audio(label="Generated Audio", type="filepath"),
            gr.Markdown(label="Error/Warning", visible=True)
        ],
        title="✨ AI VoiceCraft: Text-to-Speech Studio πŸŽ™οΈ",
        description="""
        **Crafted by MusabirKm**
        
        πŸš€ Transform your text into captivating audio! πŸš€  

        This tool generates AI-powered content and converts it into lifelike speech using Microsoft Edge TTS.  

        πŸ”Ή **Features at a Glance:**  
        🌍 Supports multiple languages and voices  
        🎚️ Adjust speech rate and pitch for natural delivery  
        πŸ“ Generate dynamic content: stories, news, podcasts & more  
        🎭 Customize tone, length, and style to fit your needs  

        """,
        article="""  
        # 🌟 Welcome to AI VoiceCraft! 🌟  

        **Unleash the power of AI-driven text-to-speech.**  

        This advanced application blends **cutting-edge AI content generation** with high-quality speech synthesis to create immersive audio experiences.  

        ## 🎀 Key Highlights:  
        πŸ”Š Natural and expressive voice output  
        πŸ“– AI-powered script generation tailored for speech  
        βš™οΈ Fine-tune pitch, rate, and delivery style  

        πŸ”— [Discover more AI tools@github/musabbirkm](https://github.com/musabbirkm) 
        πŸ”— [Follow MusabirKm on Hugging Face](https://huggingface.com/musabbirkm)   
        """,

        allow_flagging="never",
        api_name=None,
    )
    return demo

async def main():
    demo = create_demo()
    demo.queue(default_concurrency_limit=5)
    demo.launch(show_api=False)


if __name__ == "__main__":
    try:
        asyncio.run(main())
    except RuntimeError:
        import nest_asyncio
        nest_asyncio.apply()
        asyncio.run(main())