Spaces:

ruslanmv
/

Text-To-Speech

Starting

App Files Files Community

ruslanmv commited on Jun 14, 2024

Commit

4177df5

1 Parent(s): a00a466

First commit

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +147 -0
requirements.txt +0 -0

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🚀
 colorFrom: gray
 colorTo: green
 sdk: gradio
-sdk_version: 4.36.1
 app_file: app.py
 pinned: false
 ---

 colorFrom: gray
 colorTo: green
 sdk: gradio
+sdk_version: 4.31.2
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import requests
+import base64
+import io
+import json
+import gradio as gr
+from gradio import Text
+import base64
+import numpy as np
+from pydub import AudioSegment
+# Define the API endpoint URL
+url = "https://ruslanmv-hf-llm-api-collection.hf.space/tts"
+# Set headers for content type and desired response format
+headers = {
+    "Content-Type": "application/json",
+    "accept": "application/json"  # May need adjustment if endpoint doesn't support JSON
+}
+def convert_text_to_base64(text, language="en"):
+    """Converts text to base64 encoded audio string using the provided API.
+    Args:
+        text (str): The text to convert to speech.
+        language (str, optional): The language code for the speech (default: "en").
+    Returns:
+        str: The base64 encoded audio string on success, None on error.
+    """
+    try:
+        # Prepare the data
+        data = {
+            "input_text": text,
+            "from_language": language
+        }
+        # Send the POST request
+        response = requests.post(url, headers=headers, json=data)
+        # Check for successful response
+        if response.status_code == 200:
+            try:
+                # Check for JSON response format first
+                response_data = response.json()
+                # Check for errors in the response (if JSON)
+                if "detail" in response_data:
+                    print(f"Error: {response_data['detail']}")
+                    return None
+                # Extract audio data from the response (assuming it's in a field)
+                audio_data = response_data.get("audio", None)
+                if not audio_data:
+                    print("Error: Missing audio data in response.")
+                    return None
+            except json.JSONDecodeError:
+                # If not JSON, assume raw binary data
+                audio_data = response.content
+            # Use an in-memory buffer
+            with io.BytesIO() as buffer:
+                # Write audio data to the buffer
+                buffer.write(audio_data)
+                # Encode audio data to base64 string
+                base64_encoded_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
+            return base64_encoded_str
+        else:
+            print(f"Error: {response.status_code}")
+            return None
+    except Exception as e:
+        print(f"Error: {e}")
+        return None
+def get_audio_properties(audio_data):
+    try:
+        # Try to read as WAV
+        audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="wav")
+        format = "wav"
+    except:
+        try:
+            # Try to read as MP3
+            audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="mp3")
+            format = "mp3"
+        except Exception as e:
+            raise ValueError(f"Unknown audio format: {e}")
+    duration = len(audio_segment) / 1000.0  # duration in seconds
+    bitrate = audio_segment.frame_rate
+    channels = audio_segment.channels
+    sample_width = audio_segment.sample_width
+    return {
+        "format": format,
+        "duration": duration,
+        "bitrate": bitrate,
+        "channels": channels,
+        "sample_width": sample_width,
+        "audio_segment": audio_segment
+    }
+def play_audio(text):
+    """Converts text to speech using the provided API and plays the audio."""
+    base64_encoded_audio = convert_text_to_base64(text)
+    if base64_encoded_audio:
+        # Decode base64 string to bytes (assuming known format)
+        # Decode the base64 string
+        audio_data = base64.b64decode(base64_encoded_audio)
+        # Get audio properties
+        properties = get_audio_properties(audio_data)
+        print("Audio Properties:", properties)
+        # Convert audio segment to numpy array
+        audio_segment = properties["audio_segment"]
+        samples = np.array(audio_segment.get_array_of_samples())
+        if audio_segment.channels == 2:
+            samples = samples.reshape((-1, 2))
+        # Create the audio component with controls and optional download button
+        return 24000, samples
+    else:
+        return "Error occurred during conversion."
+# Define the Gradio interface with clear labels for user interaction
+interface = gr.Interface(
+    fn=play_audio,
+    title="Text to Speech API",  # Add a title to the interface
+    description="Developed by Ruslan Magana, visit <a href='https://ruslanmv.com/' target='_blank'>ruslanmv.com</a> for more information.",
+    inputs=Text(label="Enter text to convert to speech"),
+    outputs=gr.Audio(label="Generated audio", type="numpy"),
+    #live=True  # Enable live updates
+)
+# Launch the Gradio interface
+interface.launch()

requirements.txt ADDED Viewed

File without changes