Spaces:
Running
Running
| import gradio as gr | |
| from google import genai | |
| from google.genai import types | |
| import wave | |
| import os | |
| from dotenv import load_dotenv | |
| # Load API key | |
| load_dotenv() | |
| GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
| client = genai.Client(api_key=GOOGLE_API_KEY) | |
| # Save audio from PCM to WAV | |
| def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2): | |
| with wave.open(filename, "wb") as wf: | |
| wf.setnchannels(channels) | |
| wf.setsampwidth(sample_width) | |
| wf.setframerate(rate) | |
| wf.writeframes(pcm) | |
| # Gemini TTS generation function | |
| def generate_speech(text, voice): | |
| try: | |
| response = client.models.generate_content( | |
| model="gemini-2.5-flash-preview-tts", | |
| contents=text, | |
| config=types.GenerateContentConfig( | |
| response_modalities=["AUDIO"], | |
| speech_config=types.SpeechConfig( | |
| voice_config=types.VoiceConfig( | |
| prebuilt_voice_config=types.PrebuiltVoiceConfig( | |
| voice_name=voice | |
| ) | |
| ) | |
| ) | |
| ) | |
| ) | |
| audio_data = response.candidates[0].content.parts[0].inline_data.data | |
| output_path = "output.wav" | |
| wave_file(output_path, audio_data) | |
| return output_path, output_path, "Speech generated successfully." | |
| except Exception as e: | |
| return None, None, f"Error: {str(e)}" | |
| # Gradio app using Blocks | |
| with gr.Blocks(title="Gemini TTS Demo") as demo: | |
| gr.Markdown("## Google Gemini Text-to-Speech") | |
| gr.Markdown("Enter text below, choose a voice, and listen to the generated speech.") | |
| with gr.Row(): | |
| text_input = gr.Textbox( | |
| lines=3, | |
| label="Enter Text", | |
| placeholder="Example: Welcome to the world of AI." | |
| ) | |
| voice_input = gr.Dropdown( | |
| choices=["Kore", "Wes"], | |
| value="Kore", | |
| label="Select Voice" | |
| ) | |
| with gr.Row(): | |
| generate_btn = gr.Button("Generate Speech", variant="primary") | |
| with gr.Row(): | |
| audio_output = gr.Audio(label="Generated Audio") | |
| file_output = gr.File(label="Download Audio File") | |
| status_output = gr.Textbox(label="Status", interactive=False) | |
| examples = gr.Examples( | |
| examples=[ | |
| ["Good morning! Hope you have a great day ahead.", "Kore"], | |
| ["Welcome to the future of AI voice generation.", "Wes"], | |
| ["Your appointment is scheduled for 3 PM on Monday.", "Kore"], | |
| ["This is a demo of Google's Gemini text-to-speech feature.", "Wes"], | |
| ], | |
| inputs=[text_input, voice_input], | |
| ) | |
| generate_btn.click( | |
| fn=generate_speech, | |
| inputs=[text_input, voice_input], | |
| outputs=[audio_output, file_output, status_output], | |
| ) | |
| demo.launch() | |