Spaces:
Running
Running
Commit
·
0d7faff
1
Parent(s):
753a150
[UPDATE] add api endpoint
Browse files- app.py +229 -69
- fongbe_dictionary.db +0 -0
app.py
CHANGED
@@ -3,6 +3,9 @@ import torch
|
|
3 |
import torchaudio
|
4 |
import librosa
|
5 |
import os
|
|
|
|
|
|
|
6 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
7 |
from huggingface_hub import login
|
8 |
import logging
|
@@ -41,17 +44,54 @@ def load_model():
|
|
41 |
return False
|
42 |
|
43 |
|
44 |
-
def
|
45 |
-
"""Fonction
|
46 |
-
|
47 |
if model is None or processor is None:
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
|
|
|
|
50 |
if audio is None:
|
51 |
return "❌ Aucun fichier audio fourni"
|
52 |
|
53 |
try:
|
54 |
logger.info(f"🎵 Traitement audio: {audio}")
|
|
|
55 |
try:
|
56 |
waveform, sample_rate = torchaudio.load(audio)
|
57 |
logger.info(f"✅ Audio chargé avec torchaudio: {sample_rate}Hz")
|
@@ -61,39 +101,84 @@ def transcribe(audio):
|
|
61 |
waveform = torch.tensor(waveform).unsqueeze(0)
|
62 |
logger.info(f"✅ Audio chargé avec librosa: {sample_rate}Hz")
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
|
73 |
-
inputs = processor(
|
74 |
-
waveform.squeeze(),
|
75 |
-
sampling_rate=16000,
|
76 |
-
return_tensors="pt"
|
77 |
-
)
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
max_length=500,
|
84 |
-
do_sample=False,
|
85 |
-
num_beams=1
|
86 |
-
)
|
87 |
|
88 |
-
|
|
|
|
|
|
|
89 |
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
except Exception as e:
|
94 |
-
error_msg = f"
|
95 |
logger.error(error_msg)
|
96 |
-
return error_msg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
|
99 |
print("🚀 DÉMARRAGE API STT FONGBÉ - RONALDODEV")
|
@@ -106,55 +191,130 @@ else:
|
|
106 |
print("❌ Erreur de chargement du modèle")
|
107 |
model_status = "❌ Erreur de chargement"
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
type="filepath",
|
114 |
-
label="🎤 Uploadez un fichier ou enregistrez directement"
|
115 |
-
),
|
116 |
-
outputs=gr.Textbox(
|
117 |
-
label="📝 Transcription en Fongbé",
|
118 |
-
placeholder="La transcription apparaîtra ici...",
|
119 |
-
lines=3
|
120 |
-
),
|
121 |
-
title="🎤 API STT Fongbé - Ronaldodev",
|
122 |
-
description=f"""
|
123 |
**Reconnaissance vocale pour la langue Fongbé**
|
124 |
-
|
125 |
-
Uploadez un fichier audio (WAV, MP3, M4A) ou enregistrez directement avec votre microphone.
|
126 |
-
|
127 |
**Statut:** {model_status}
|
128 |
-
|
129 |
**Modèle:** `{MODEL_NAME}`
|
130 |
-
"""
|
131 |
-
article="""
|
132 |
-
## 🔌 API pour développeurs
|
133 |
-
|
134 |
-
Cette interface expose automatiquement une API REST :
|
135 |
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
|
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
149 |
|
150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
-
|
153 |
-
""
|
154 |
-
|
155 |
-
]
|
156 |
-
theme=gr.themes.Soft(),
|
157 |
-
allow_flagging="never"
|
158 |
)
|
159 |
|
160 |
demo.launch()
|
|
|
3 |
import torchaudio
|
4 |
import librosa
|
5 |
import os
|
6 |
+
import base64
|
7 |
+
import io
|
8 |
+
import tempfile
|
9 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
10 |
from huggingface_hub import login
|
11 |
import logging
|
|
|
44 |
return False
|
45 |
|
46 |
|
47 |
+
def process_audio_data(audio_data, sample_rate=None):
|
48 |
+
"""Fonction commune pour traiter les données audio"""
|
|
|
49 |
if model is None or processor is None:
|
50 |
+
raise Exception("Modèle non chargé")
|
51 |
+
|
52 |
+
# Convertir en mono si nécessaire
|
53 |
+
if len(audio_data.shape) > 1:
|
54 |
+
audio_data = audio_data.mean(axis=0)
|
55 |
+
|
56 |
+
# Convertir en tensor PyTorch
|
57 |
+
if not isinstance(audio_data, torch.Tensor):
|
58 |
+
waveform = torch.tensor(audio_data, dtype=torch.float32).unsqueeze(0)
|
59 |
+
else:
|
60 |
+
waveform = audio_data.unsqueeze(0) if audio_data.dim() == 1 else audio_data
|
61 |
+
|
62 |
+
# Resampling si nécessaire
|
63 |
+
if sample_rate and sample_rate != 16000:
|
64 |
+
logger.info(f"🔄 Resampling {sample_rate}Hz → 16000Hz")
|
65 |
+
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
66 |
+
waveform = resampler(waveform)
|
67 |
+
|
68 |
+
inputs = processor(
|
69 |
+
waveform.squeeze(),
|
70 |
+
sampling_rate=16000,
|
71 |
+
return_tensors="pt"
|
72 |
+
)
|
73 |
+
|
74 |
+
logger.info("🔄 Génération de la transcription...")
|
75 |
+
with torch.no_grad():
|
76 |
+
result = model.generate(
|
77 |
+
**inputs,
|
78 |
+
max_length=500,
|
79 |
+
do_sample=False,
|
80 |
+
num_beams=1
|
81 |
+
)
|
82 |
+
|
83 |
+
transcription = processor.batch_decode(result, skip_special_tokens=True)[0]
|
84 |
+
return transcription.strip()
|
85 |
+
|
86 |
|
87 |
+
def transcribe(audio):
|
88 |
+
"""Fonction pour l'interface Gradio (fichier)"""
|
89 |
if audio is None:
|
90 |
return "❌ Aucun fichier audio fourni"
|
91 |
|
92 |
try:
|
93 |
logger.info(f"🎵 Traitement audio: {audio}")
|
94 |
+
|
95 |
try:
|
96 |
waveform, sample_rate = torchaudio.load(audio)
|
97 |
logger.info(f"✅ Audio chargé avec torchaudio: {sample_rate}Hz")
|
|
|
101 |
waveform = torch.tensor(waveform).unsqueeze(0)
|
102 |
logger.info(f"✅ Audio chargé avec librosa: {sample_rate}Hz")
|
103 |
|
104 |
+
transcription = process_audio_data(waveform, sample_rate)
|
105 |
+
logger.info(f"✅ Transcription réussie: '{transcription}'")
|
106 |
+
return transcription
|
107 |
|
108 |
+
except Exception as e:
|
109 |
+
error_msg = f"❌ Erreur de transcription: {str(e)}"
|
110 |
+
logger.error(error_msg)
|
111 |
+
return error_msg
|
112 |
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
+
def transcribe_api_base64(audio_base64):
|
115 |
+
"""API pour données base64"""
|
116 |
+
try:
|
117 |
+
logger.info("🔄 API: Traitement base64...")
|
|
|
|
|
|
|
|
|
118 |
|
119 |
+
# Décoder le base64
|
120 |
+
if audio_base64.startswith('data:'):
|
121 |
+
# Format: data:audio/wav;base64,XXXXX
|
122 |
+
audio_base64 = audio_base64.split(',')[1]
|
123 |
|
124 |
+
audio_bytes = base64.b64decode(audio_base64)
|
125 |
+
|
126 |
+
# Créer un fichier temporaire
|
127 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
|
128 |
+
temp_file.write(audio_bytes)
|
129 |
+
temp_path = temp_file.name
|
130 |
+
|
131 |
+
try:
|
132 |
+
# Charger avec librosa
|
133 |
+
waveform, sample_rate = librosa.load(temp_path, sr=None)
|
134 |
+
waveform = torch.tensor(waveform)
|
135 |
+
|
136 |
+
transcription = process_audio_data(waveform, sample_rate)
|
137 |
+
logger.info(f"✅ API Transcription: '{transcription}'")
|
138 |
+
|
139 |
+
return {"success": True, "transcription": transcription}
|
140 |
+
|
141 |
+
finally:
|
142 |
+
# Nettoyer le fichier temporaire
|
143 |
+
os.unlink(temp_path)
|
144 |
|
145 |
except Exception as e:
|
146 |
+
error_msg = f"Erreur API base64: {str(e)}"
|
147 |
logger.error(error_msg)
|
148 |
+
return {"success": False, "error": error_msg}
|
149 |
+
|
150 |
+
|
151 |
+
def transcribe_api_file(audio_file):
|
152 |
+
"""API pour fichier audio direct"""
|
153 |
+
try:
|
154 |
+
logger.info("🔄 API: Traitement fichier...")
|
155 |
+
|
156 |
+
# Lire le fichier
|
157 |
+
audio_bytes = audio_file.read()
|
158 |
+
|
159 |
+
# Créer un fichier temporaire
|
160 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
|
161 |
+
temp_file.write(audio_bytes)
|
162 |
+
temp_path = temp_file.name
|
163 |
+
|
164 |
+
try:
|
165 |
+
# Charger avec librosa
|
166 |
+
waveform, sample_rate = librosa.load(temp_path, sr=None)
|
167 |
+
waveform = torch.tensor(waveform)
|
168 |
+
|
169 |
+
transcription = process_audio_data(waveform, sample_rate)
|
170 |
+
logger.info(f"✅ API Transcription: '{transcription}'")
|
171 |
+
|
172 |
+
return {"success": True, "transcription": transcription}
|
173 |
+
|
174 |
+
finally:
|
175 |
+
# Nettoyer le fichier temporaire
|
176 |
+
os.unlink(temp_path)
|
177 |
+
|
178 |
+
except Exception as e:
|
179 |
+
error_msg = f"Erreur API fichier: {str(e)}"
|
180 |
+
logger.error(error_msg)
|
181 |
+
return {"success": False, "error": error_msg}
|
182 |
|
183 |
|
184 |
print("🚀 DÉMARRAGE API STT FONGBÉ - RONALDODEV")
|
|
|
191 |
print("❌ Erreur de chargement du modèle")
|
192 |
model_status = "❌ Erreur de chargement"
|
193 |
|
194 |
+
# Interface Gradio principale
|
195 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="🎤 API STT Fongbé") as demo:
|
196 |
+
gr.Markdown(f"""
|
197 |
+
# 🎤 API STT Fongbé - Ronaldodev
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
**Reconnaissance vocale pour la langue Fongbé**
|
|
|
|
|
|
|
199 |
**Statut:** {model_status}
|
|
|
200 |
**Modèle:** `{MODEL_NAME}`
|
201 |
+
""")
|
|
|
|
|
|
|
|
|
202 |
|
203 |
+
with gr.Tab("🎵 Interface Utilisateur"):
|
204 |
+
audio_input = gr.Audio(
|
205 |
+
sources=["upload", "microphone"],
|
206 |
+
type="filepath",
|
207 |
+
label="🎤 Uploadez un fichier ou enregistrez directement"
|
208 |
+
)
|
209 |
+
transcription_output = gr.Textbox(
|
210 |
+
label="📝 Transcription en Fongbé",
|
211 |
+
placeholder="La transcription apparaîtra ici...",
|
212 |
+
lines=3
|
213 |
+
)
|
214 |
+
transcribe_btn = gr.Button("🔄 Transcrire", variant="primary")
|
215 |
|
216 |
+
transcribe_btn.click(
|
217 |
+
fn=transcribe,
|
218 |
+
inputs=audio_input,
|
219 |
+
outputs=transcription_output
|
220 |
+
)
|
221 |
|
222 |
+
with gr.Tab("🔌 API Documentation"):
|
223 |
+
gr.Markdown("""
|
224 |
+
## 📡 Endpoints API Disponibles
|
225 |
+
|
226 |
+
### 1. **POST** `/api/transcribe_base64`
|
227 |
+
Pour envoyer de l'audio en base64
|
228 |
+
|
229 |
+
**Headers:**
|
230 |
+
```
|
231 |
+
Content-Type: application/json
|
232 |
+
```
|
233 |
+
|
234 |
+
**Body:**
|
235 |
+
```json
|
236 |
+
{
|
237 |
+
"audio_base64": "data:audio/wav;base64,UklGRnoAAABXQVZF..."
|
238 |
+
}
|
239 |
+
```
|
240 |
+
|
241 |
+
**Réponse:**
|
242 |
+
```json
|
243 |
+
{
|
244 |
+
"success": true,
|
245 |
+
"transcription": "votre transcription ici"
|
246 |
+
}
|
247 |
+
```
|
248 |
+
|
249 |
+
### 2. **POST** `/api/transcribe_file`
|
250 |
+
Pour envoyer un fichier audio directement
|
251 |
+
|
252 |
+
**Headers:**
|
253 |
+
```
|
254 |
+
Content-Type: multipart/form-data
|
255 |
+
```
|
256 |
+
|
257 |
+
**Body:**
|
258 |
+
- `audio_file`: votre fichier audio (WAV, MP3, M4A...)
|
259 |
+
|
260 |
+
**Réponse:**
|
261 |
+
```json
|
262 |
+
{
|
263 |
+
"success": true,
|
264 |
+
"transcription": "votre transcription ici"
|
265 |
+
}
|
266 |
+
```
|
267 |
+
|
268 |
+
### 📱 Exemple d'utilisation
|
269 |
+
|
270 |
+
**Python:**
|
271 |
+
```python
|
272 |
+
import requests
|
273 |
+
import base64
|
274 |
+
|
275 |
+
# Méthode 1: Base64
|
276 |
+
with open("audio.wav", "rb") as f:
|
277 |
+
audio_b64 = base64.b64encode(f.read()).decode()
|
278 |
+
|
279 |
+
response = requests.post(
|
280 |
+
"https://ronaldodev-stt-fongbe.hf.space/api/transcribe_base64",
|
281 |
+
json={"audio_base64": f"data:audio/wav;base64,{audio_b64}"}
|
282 |
+
)
|
283 |
|
284 |
+
# Méthode 2: Fichier direct
|
285 |
+
with open("audio.wav", "rb") as f:
|
286 |
+
response = requests.post(
|
287 |
+
"https://ronaldodev-stt-fongbe.hf.space/api/transcribe_file",
|
288 |
+
files={"audio_file": f}
|
289 |
+
)
|
290 |
|
291 |
+
result = response.json()
|
292 |
+
print(result["transcription"])
|
293 |
+
```
|
294 |
+
|
295 |
+
**Flutter:**
|
296 |
+
```dart
|
297 |
+
// Fichier direct
|
298 |
+
var request = http.MultipartRequest(
|
299 |
+
'POST',
|
300 |
+
Uri.parse('https://ronaldodev-stt-fongbe.hf.space/api/transcribe_file')
|
301 |
+
);
|
302 |
+
request.files.add(await http.MultipartFile.fromPath('audio_file', audioPath));
|
303 |
+
var response = await request.send();
|
304 |
+
```
|
305 |
+
""")
|
306 |
+
|
307 |
+
# Ajouter les endpoints API personnalisés
|
308 |
+
demo.add_api_route(
|
309 |
+
"/api/transcribe_base64",
|
310 |
+
transcribe_api_base64,
|
311 |
+
methods=["POST"]
|
312 |
+
)
|
313 |
|
314 |
+
demo.add_api_route(
|
315 |
+
"/api/transcribe_file",
|
316 |
+
transcribe_api_file,
|
317 |
+
methods=["POST"]
|
|
|
|
|
318 |
)
|
319 |
|
320 |
demo.launch()
|
fongbe_dictionary.db
ADDED
Binary file (24.6 kB). View file
|
|