Spaces:
Sleeping
Sleeping
import requests | |
import base64 | |
import io | |
import json | |
import gradio as gr | |
from gradio import Text | |
import base64 | |
import numpy as np | |
from pydub import AudioSegment | |
# Define the API endpoint URL | |
url = "https://ruslanmv-hf-llm-api-collection.hf.space/tts" | |
# Set headers for content type and desired response format | |
headers = { | |
"Content-Type": "application/json", | |
"accept": "application/json" # May need adjustment if endpoint doesn't support JSON | |
} | |
def convert_text_to_base64(text, language="en"): | |
"""Converts text to base64 encoded audio string using the provided API. | |
Args: | |
text (str): The text to convert to speech. | |
language (str, optional): The language code for the speech (default: "en"). | |
Returns: | |
str: The base64 encoded audio string on success, None on error. | |
""" | |
try: | |
# Prepare the data | |
data = { | |
"input_text": text, | |
"from_language": language | |
} | |
# Send the POST request | |
response = requests.post(url, headers=headers, json=data) | |
# Check for successful response | |
if response.status_code == 200: | |
try: | |
# Check for JSON response format first | |
response_data = response.json() | |
# Check for errors in the response (if JSON) | |
if "detail" in response_data: | |
print(f"Error: {response_data['detail']}") | |
return None | |
# Extract audio data from the response (assuming it's in a field) | |
audio_data = response_data.get("audio", None) | |
if not audio_data: | |
print("Error: Missing audio data in response.") | |
return None | |
except json.JSONDecodeError: | |
# If not JSON, assume raw binary data | |
audio_data = response.content | |
# Use an in-memory buffer | |
with io.BytesIO() as buffer: | |
# Write audio data to the buffer | |
buffer.write(audio_data) | |
# Encode audio data to base64 string | |
base64_encoded_str = base64.b64encode(buffer.getvalue()).decode("utf-8") | |
return base64_encoded_str | |
else: | |
print(f"Error: {response.status_code}") | |
return None | |
except Exception as e: | |
print(f"Error: {e}") | |
return None | |
def get_audio_properties(audio_data): | |
try: | |
# Try to read as WAV | |
audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="wav") | |
format = "wav" | |
except: | |
try: | |
# Try to read as MP3 | |
audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="mp3") | |
format = "mp3" | |
except Exception as e: | |
raise ValueError(f"Unknown audio format: {e}") | |
duration = len(audio_segment) / 1000.0 # duration in seconds | |
bitrate = audio_segment.frame_rate | |
channels = audio_segment.channels | |
sample_width = audio_segment.sample_width | |
return { | |
"format": format, | |
"duration": duration, | |
"bitrate": bitrate, | |
"channels": channels, | |
"sample_width": sample_width, | |
"audio_segment": audio_segment | |
} | |
def play_audio(text): | |
"""Converts text to speech using the provided API and plays the audio.""" | |
base64_encoded_audio = convert_text_to_base64(text) | |
if base64_encoded_audio: | |
# Decode base64 string to bytes (assuming known format) | |
# Decode the base64 string | |
audio_data = base64.b64decode(base64_encoded_audio) | |
# Get audio properties | |
properties = get_audio_properties(audio_data) | |
print("Audio Properties:", properties) | |
# Convert audio segment to numpy array | |
audio_segment = properties["audio_segment"] | |
samples = np.array(audio_segment.get_array_of_samples()) | |
if audio_segment.channels == 2: | |
samples = samples.reshape((-1, 2)) | |
# Create the audio component with controls and optional download button | |
return 24000, samples | |
else: | |
return "Error occurred during conversion." | |
# Define the Gradio interface with clear labels for user interaction | |
interface = gr.Interface( | |
fn=play_audio, | |
title="Text to Speech API", # Add a title to the interface | |
description="Developed by Ruslan Magana, visit <a href='https://ruslanmv.com/' target='_blank'>ruslanmv.com</a> for more information.", | |
inputs=Text(label="Enter text to convert to speech"), | |
outputs=gr.Audio(label="Generated audio", type="numpy"), | |
#live=True # Enable live updates | |
) | |
# Launch the Gradio interface | |
interface.launch() | |