ruslanmv commited on
Commit
4177df5
·
1 Parent(s): a00a466

First commit

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +147 -0
  3. requirements.txt +0 -0
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🚀
4
  colorFrom: gray
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 4.36.1
8
  app_file: app.py
9
  pinned: false
10
  ---
 
4
  colorFrom: gray
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.31.2
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import base64
3
+ import io
4
+ import json
5
+ import gradio as gr
6
+ from gradio import Text
7
+ import base64
8
+ import numpy as np
9
+ from pydub import AudioSegment
10
+ # Define the API endpoint URL
11
+ url = "https://ruslanmv-hf-llm-api-collection.hf.space/tts"
12
+
13
+ # Set headers for content type and desired response format
14
+ headers = {
15
+ "Content-Type": "application/json",
16
+ "accept": "application/json" # May need adjustment if endpoint doesn't support JSON
17
+ }
18
+
19
+
20
+ def convert_text_to_base64(text, language="en"):
21
+ """Converts text to base64 encoded audio string using the provided API.
22
+
23
+ Args:
24
+ text (str): The text to convert to speech.
25
+ language (str, optional): The language code for the speech (default: "en").
26
+
27
+ Returns:
28
+ str: The base64 encoded audio string on success, None on error.
29
+ """
30
+
31
+ try:
32
+ # Prepare the data
33
+ data = {
34
+ "input_text": text,
35
+ "from_language": language
36
+ }
37
+
38
+ # Send the POST request
39
+ response = requests.post(url, headers=headers, json=data)
40
+
41
+ # Check for successful response
42
+ if response.status_code == 200:
43
+ try:
44
+ # Check for JSON response format first
45
+ response_data = response.json()
46
+
47
+ # Check for errors in the response (if JSON)
48
+ if "detail" in response_data:
49
+ print(f"Error: {response_data['detail']}")
50
+ return None
51
+
52
+ # Extract audio data from the response (assuming it's in a field)
53
+ audio_data = response_data.get("audio", None)
54
+ if not audio_data:
55
+ print("Error: Missing audio data in response.")
56
+ return None
57
+
58
+ except json.JSONDecodeError:
59
+ # If not JSON, assume raw binary data
60
+ audio_data = response.content
61
+
62
+ # Use an in-memory buffer
63
+ with io.BytesIO() as buffer:
64
+ # Write audio data to the buffer
65
+ buffer.write(audio_data)
66
+
67
+ # Encode audio data to base64 string
68
+ base64_encoded_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
69
+
70
+ return base64_encoded_str
71
+
72
+ else:
73
+ print(f"Error: {response.status_code}")
74
+ return None
75
+
76
+ except Exception as e:
77
+ print(f"Error: {e}")
78
+ return None
79
+
80
+
81
+
82
+
83
+ def get_audio_properties(audio_data):
84
+ try:
85
+ # Try to read as WAV
86
+ audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="wav")
87
+ format = "wav"
88
+ except:
89
+ try:
90
+ # Try to read as MP3
91
+ audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="mp3")
92
+ format = "mp3"
93
+ except Exception as e:
94
+ raise ValueError(f"Unknown audio format: {e}")
95
+
96
+ duration = len(audio_segment) / 1000.0 # duration in seconds
97
+ bitrate = audio_segment.frame_rate
98
+ channels = audio_segment.channels
99
+ sample_width = audio_segment.sample_width
100
+
101
+ return {
102
+ "format": format,
103
+ "duration": duration,
104
+ "bitrate": bitrate,
105
+ "channels": channels,
106
+ "sample_width": sample_width,
107
+ "audio_segment": audio_segment
108
+ }
109
+
110
+ def play_audio(text):
111
+ """Converts text to speech using the provided API and plays the audio."""
112
+
113
+ base64_encoded_audio = convert_text_to_base64(text)
114
+
115
+ if base64_encoded_audio:
116
+ # Decode base64 string to bytes (assuming known format)
117
+ # Decode the base64 string
118
+ audio_data = base64.b64decode(base64_encoded_audio)
119
+
120
+ # Get audio properties
121
+ properties = get_audio_properties(audio_data)
122
+ print("Audio Properties:", properties)
123
+
124
+
125
+ # Convert audio segment to numpy array
126
+ audio_segment = properties["audio_segment"]
127
+ samples = np.array(audio_segment.get_array_of_samples())
128
+ if audio_segment.channels == 2:
129
+ samples = samples.reshape((-1, 2))
130
+
131
+ # Create the audio component with controls and optional download button
132
+ return 24000, samples
133
+ else:
134
+ return "Error occurred during conversion."
135
+ # Define the Gradio interface with clear labels for user interaction
136
+ interface = gr.Interface(
137
+ fn=play_audio,
138
+ title="Text to Speech API", # Add a title to the interface
139
+ description="Developed by Ruslan Magana, visit <a href='https://ruslanmv.com/' target='_blank'>ruslanmv.com</a> for more information.",
140
+ inputs=Text(label="Enter text to convert to speech"),
141
+ outputs=gr.Audio(label="Generated audio", type="numpy"),
142
+ #live=True # Enable live updates
143
+ )
144
+
145
+ # Launch the Gradio interface
146
+ interface.launch()
147
+
requirements.txt ADDED
File without changes