Spaces:

ruslanmv
/

Text-To-Speech

Sleeping

App Files Files Community

Text-To-Speech / app.py

ruslanmv

First commit

4177df5 about 1 year ago

raw

history blame contribute delete

4.71 kB

	import requests
	import base64
	import io
	import json
	import gradio as gr
	from gradio import Text
	import base64
	import numpy as np
	from pydub import AudioSegment
	# Define the API endpoint URL
	url = "https://ruslanmv-hf-llm-api-collection.hf.space/tts"

	# Set headers for content type and desired response format
	headers = {
	"Content-Type": "application/json",
	"accept": "application/json" # May need adjustment if endpoint doesn't support JSON
	}


	def convert_text_to_base64(text, language="en"):
	"""Converts text to base64 encoded audio string using the provided API.

	Args:
	text (str): The text to convert to speech.
	language (str, optional): The language code for the speech (default: "en").

	Returns:
	str: The base64 encoded audio string on success, None on error.
	"""

	try:
	# Prepare the data
	data = {
	"input_text": text,
	"from_language": language
	}

	# Send the POST request
	response = requests.post(url, headers=headers, json=data)

	# Check for successful response
	if response.status_code == 200:
	try:
	# Check for JSON response format first
	response_data = response.json()

	# Check for errors in the response (if JSON)
	if "detail" in response_data:
	print(f"Error: {response_data['detail']}")
	return None

	# Extract audio data from the response (assuming it's in a field)
	audio_data = response_data.get("audio", None)
	if not audio_data:
	print("Error: Missing audio data in response.")
	return None

	except json.JSONDecodeError:
	# If not JSON, assume raw binary data
	audio_data = response.content

	# Use an in-memory buffer
	with io.BytesIO() as buffer:
	# Write audio data to the buffer
	buffer.write(audio_data)

	# Encode audio data to base64 string
	base64_encoded_str = base64.b64encode(buffer.getvalue()).decode("utf-8")

	return base64_encoded_str

	else:
	print(f"Error: {response.status_code}")
	return None

	except Exception as e:
	print(f"Error: {e}")
	return None




	def get_audio_properties(audio_data):
	try:
	# Try to read as WAV
	audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="wav")
	format = "wav"
	except:
	try:
	# Try to read as MP3
	audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="mp3")
	format = "mp3"
	except Exception as e:
	raise ValueError(f"Unknown audio format: {e}")

	duration = len(audio_segment) / 1000.0 # duration in seconds
	bitrate = audio_segment.frame_rate
	channels = audio_segment.channels
	sample_width = audio_segment.sample_width

	return {
	"format": format,
	"duration": duration,
	"bitrate": bitrate,
	"channels": channels,
	"sample_width": sample_width,
	"audio_segment": audio_segment
	}

	def play_audio(text):
	"""Converts text to speech using the provided API and plays the audio."""

	base64_encoded_audio = convert_text_to_base64(text)

	if base64_encoded_audio:
	# Decode base64 string to bytes (assuming known format)
	# Decode the base64 string
	audio_data = base64.b64decode(base64_encoded_audio)

	# Get audio properties
	properties = get_audio_properties(audio_data)
	print("Audio Properties:", properties)


	# Convert audio segment to numpy array
	audio_segment = properties["audio_segment"]
	samples = np.array(audio_segment.get_array_of_samples())
	if audio_segment.channels == 2:
	samples = samples.reshape((-1, 2))

	# Create the audio component with controls and optional download button
	return 24000, samples
	else:
	return "Error occurred during conversion."
	# Define the Gradio interface with clear labels for user interaction
	interface = gr.Interface(
	fn=play_audio,
	title="Text to Speech API", # Add a title to the interface
	description="Developed by Ruslan Magana, visit <a href='https://ruslanmv.com/' target='_blank'>ruslanmv.com</a> for more information.",
	inputs=Text(label="Enter text to convert to speech"),
	outputs=gr.Audio(label="Generated audio", type="numpy"),
	#live=True # Enable live updates
	)

	# Launch the Gradio interface
	interface.launch()