Spaces:

Ronaldodev
/

stt-fongbe

Running

App Files Files Community

Ronaldodev commited on 12 days ago

Commit

0d7faff

1 Parent(s): 753a150

[UPDATE] add api endpoint

Browse files

Files changed (2) hide show

app.py +229 -69
fongbe_dictionary.db +0 -0

app.py CHANGED Viewed

@@ -3,6 +3,9 @@ import torch
 import torchaudio
 import librosa
 import os
 from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
 from huggingface_hub import login
 import logging
@@ -41,17 +44,54 @@ def load_model():
         return False
-def transcribe(audio):
-    """Fonction principale de transcription"""
     if model is None or processor is None:
-        return "❌ Erreur: Modèle non chargé. Vérifiez les logs."
     if audio is None:
         return "❌ Aucun fichier audio fourni"
     try:
         logger.info(f"🎵 Traitement audio: {audio}")
         try:
             waveform, sample_rate = torchaudio.load(audio)
             logger.info(f"✅ Audio chargé avec torchaudio: {sample_rate}Hz")
@@ -61,39 +101,84 @@ def transcribe(audio):
             waveform = torch.tensor(waveform).unsqueeze(0)
             logger.info(f"✅ Audio chargé avec librosa: {sample_rate}Hz")
-        if waveform.shape[0] > 1:
-            waveform = waveform.mean(dim=0, keepdim=True)
-            logger.info("🔄 Conversion stéréo → mono")
-        if sample_rate != 16000:
-            logger.info(f"🔄 Resampling {sample_rate}Hz → 16000Hz")
-            resampler = torchaudio.transforms.Resample(sample_rate, 16000)
-            waveform = resampler(waveform)
-        inputs = processor(
-            waveform.squeeze(),
-            sampling_rate=16000,
-            return_tensors="pt"
-        )
-        logger.info("🔄 Génération de la transcription...")
-        with torch.no_grad():
-            result = model.generate(
-                **inputs,
-                max_length=500,
-                do_sample=False,
-                num_beams=1
-            )
-        transcription = processor.batch_decode(result, skip_special_tokens=True)[0]
-        logger.info(f"✅ Transcription réussie: '{transcription}'")
-        return transcription.strip()
     except Exception as e:
-        error_msg = f"❌ Erreur de transcription: {str(e)}"
         logger.error(error_msg)
-        return error_msg
 print("🚀 DÉMARRAGE API STT FONGBÉ - RONALDODEV")
@@ -106,55 +191,130 @@ else:
     print("❌ Erreur de chargement du modèle")
     model_status = "❌ Erreur de chargement"
-demo = gr.Interface(
-    fn=transcribe,
-    inputs=gr.Audio(
-        sources=["upload", "microphone"],
-        type="filepath",
-        label="🎤 Uploadez un fichier ou enregistrez directement"
-    ),
-    outputs=gr.Textbox(
-        label="📝 Transcription en Fongbé",
-        placeholder="La transcription apparaîtra ici...",
-        lines=3
-    ),
-    title="🎤 API STT Fongbé - Ronaldodev",
-    description=f"""
     **Reconnaissance vocale pour la langue Fongbé**
-    Uploadez un fichier audio (WAV, MP3, M4A) ou enregistrez directement avec votre microphone.
     **Statut:** {model_status}
     **Modèle:** `{MODEL_NAME}`
-    """,
-    article="""
-    ## 🔌 API pour développeurs
-    Cette interface expose automatiquement une API REST :
-    **Endpoint:** `POST /api/predict`
-    **Exemple d'utilisation:**
-    ```python
-    import requests
-    response = requests.post(
-        "https://ronaldodev-stt-fongbe.hf.space/api/predict",
-        json={"data": [audio_file_path]}
-    )
-    transcription = response.json()["data"][0]
-    ```
-    **Pour Flutter:** Utilisez MultipartRequest avec l'endpoint ci-dessus.
-    **L'utilisation de l'api est gratuite pour l'instant**
-    """,
-    examples=[
-    ],
-    theme=gr.themes.Soft(),
-    allow_flagging="never"
 )
 demo.launch()

 import torchaudio
 import librosa
 import os
+import base64
+import io
+import tempfile
 from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
 from huggingface_hub import login
 import logging
         return False
+def process_audio_data(audio_data, sample_rate=None):
+    """Fonction commune pour traiter les données audio"""
     if model is None or processor is None:
+        raise Exception("Modèle non chargé")
+    # Convertir en mono si nécessaire
+    if len(audio_data.shape) > 1:
+        audio_data = audio_data.mean(axis=0)
+    # Convertir en tensor PyTorch
+    if not isinstance(audio_data, torch.Tensor):
+        waveform = torch.tensor(audio_data, dtype=torch.float32).unsqueeze(0)
+    else:
+        waveform = audio_data.unsqueeze(0) if audio_data.dim() == 1 else audio_data
+    # Resampling si nécessaire
+    if sample_rate and sample_rate != 16000:
+        logger.info(f"🔄 Resampling {sample_rate}Hz → 16000Hz")
+        resampler = torchaudio.transforms.Resample(sample_rate, 16000)
+        waveform = resampler(waveform)
+    inputs = processor(
+        waveform.squeeze(),
+        sampling_rate=16000,
+        return_tensors="pt"
+    )
+    logger.info("🔄 Génération de la transcription...")
+    with torch.no_grad():
+        result = model.generate(
+            **inputs,
+            max_length=500,
+            do_sample=False,
+            num_beams=1
+        )
+    transcription = processor.batch_decode(result, skip_special_tokens=True)[0]
+    return transcription.strip()
+def transcribe(audio):
+    """Fonction pour l'interface Gradio (fichier)"""
     if audio is None:
         return "❌ Aucun fichier audio fourni"
     try:
         logger.info(f"🎵 Traitement audio: {audio}")
         try:
             waveform, sample_rate = torchaudio.load(audio)
             logger.info(f"✅ Audio chargé avec torchaudio: {sample_rate}Hz")
             waveform = torch.tensor(waveform).unsqueeze(0)
             logger.info(f"✅ Audio chargé avec librosa: {sample_rate}Hz")
+        transcription = process_audio_data(waveform, sample_rate)
+        logger.info(f"✅ Transcription réussie: '{transcription}'")
+        return transcription
+    except Exception as e:
+        error_msg = f"❌ Erreur de transcription: {str(e)}"
+        logger.error(error_msg)
+        return error_msg
+def transcribe_api_base64(audio_base64):
+    """API pour données base64"""
+    try:
+        logger.info("🔄 API: Traitement base64...")
+        # Décoder le base64
+        if audio_base64.startswith('data:'):
+            # Format: data:audio/wav;base64,XXXXX
+            audio_base64 = audio_base64.split(',')[1]
+        audio_bytes = base64.b64decode(audio_base64)
+        # Créer un fichier temporaire
+        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
+            temp_file.write(audio_bytes)
+            temp_path = temp_file.name
+        try:
+            # Charger avec librosa
+            waveform, sample_rate = librosa.load(temp_path, sr=None)
+            waveform = torch.tensor(waveform)
+            transcription = process_audio_data(waveform, sample_rate)
+            logger.info(f"✅ API Transcription: '{transcription}'")
+            return {"success": True, "transcription": transcription}
+        finally:
+            # Nettoyer le fichier temporaire
+            os.unlink(temp_path)
     except Exception as e:
+        error_msg = f"Erreur API base64: {str(e)}"
         logger.error(error_msg)
+        return {"success": False, "error": error_msg}
+def transcribe_api_file(audio_file):
+    """API pour fichier audio direct"""
+    try:
+        logger.info("🔄 API: Traitement fichier...")
+        # Lire le fichier
+        audio_bytes = audio_file.read()
+        # Créer un fichier temporaire
+        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
+            temp_file.write(audio_bytes)
+            temp_path = temp_file.name
+        try:
+            # Charger avec librosa
+            waveform, sample_rate = librosa.load(temp_path, sr=None)
+            waveform = torch.tensor(waveform)
+            transcription = process_audio_data(waveform, sample_rate)
+            logger.info(f"✅ API Transcription: '{transcription}'")
+            return {"success": True, "transcription": transcription}
+        finally:
+            # Nettoyer le fichier temporaire
+            os.unlink(temp_path)
+    except Exception as e:
+        error_msg = f"Erreur API fichier: {str(e)}"
+        logger.error(error_msg)
+        return {"success": False, "error": error_msg}
 print("🚀 DÉMARRAGE API STT FONGBÉ - RONALDODEV")
     print("❌ Erreur de chargement du modèle")
     model_status = "❌ Erreur de chargement"
+# Interface Gradio principale
+with gr.Blocks(theme=gr.themes.Soft(), title="🎤 API STT Fongbé") as demo:
+    gr.Markdown(f"""
+    # 🎤 API STT Fongbé - Ronaldodev
     **Reconnaissance vocale pour la langue Fongbé**
     **Statut:** {model_status}
     **Modèle:** `{MODEL_NAME}`
+    """)
+    with gr.Tab("🎵 Interface Utilisateur"):
+        audio_input = gr.Audio(
+            sources=["upload", "microphone"],
+            type="filepath",
+            label="🎤 Uploadez un fichier ou enregistrez directement"
+        )
+        transcription_output = gr.Textbox(
+            label="📝 Transcription en Fongbé",
+            placeholder="La transcription apparaîtra ici...",
+            lines=3
+        )
+        transcribe_btn = gr.Button("🔄 Transcrire", variant="primary")
+        transcribe_btn.click(
+            fn=transcribe,
+            inputs=audio_input,
+            outputs=transcription_output
+        )
+    with gr.Tab("🔌 API Documentation"):
+        gr.Markdown("""
+        ## 📡 Endpoints API Disponibles
+        ### 1. **POST** `/api/transcribe_base64`
+        Pour envoyer de l'audio en base64
+        **Headers:**
+        ```
+        Content-Type: application/json
+        ```
+        **Body:**
+        ```json
+        {
+          "audio_base64": "data:audio/wav;base64,UklGRnoAAABXQVZF..."
+        }
+        ```
+        **Réponse:**
+        ```json
+        {
+          "success": true,
+          "transcription": "votre transcription ici"
+        }
+        ```
+        ### 2. **POST** `/api/transcribe_file`
+        Pour envoyer un fichier audio directement
+        **Headers:**
+        ```
+        Content-Type: multipart/form-data
+        ```
+        **Body:**
+        - `audio_file`: votre fichier audio (WAV, MP3, M4A...)
+        **Réponse:**
+        ```json
+        {
+          "success": true,
+          "transcription": "votre transcription ici"
+        }
+        ```
+        ### 📱 Exemple d'utilisation
+        **Python:**
+        ```python
+        import requests
+        import base64
+        # Méthode 1: Base64
+        with open("audio.wav", "rb") as f:
+            audio_b64 = base64.b64encode(f.read()).decode()
+        response = requests.post(
+            "https://ronaldodev-stt-fongbe.hf.space/api/transcribe_base64",
+            json={"audio_base64": f"data:audio/wav;base64,{audio_b64}"}
+        )
+        # Méthode 2: Fichier direct
+        with open("audio.wav", "rb") as f:
+            response = requests.post(
+                "https://ronaldodev-stt-fongbe.hf.space/api/transcribe_file",
+                files={"audio_file": f}
+            )
+        result = response.json()
+        print(result["transcription"])
+        ```
+        **Flutter:**
+        ```dart
+        // Fichier direct
+        var request = http.MultipartRequest(
+          'POST',
+          Uri.parse('https://ronaldodev-stt-fongbe.hf.space/api/transcribe_file')
+        );
+        request.files.add(await http.MultipartFile.fromPath('audio_file', audioPath));
+        var response = await request.send();
+        ```
+        """)
+# Ajouter les endpoints API personnalisés
+demo.add_api_route(
+    "/api/transcribe_base64",
+    transcribe_api_base64,
+    methods=["POST"]
+)
+demo.add_api_route(
+    "/api/transcribe_file",
+    transcribe_api_file,
+    methods=["POST"]
 )
 demo.launch()

fongbe_dictionary.db ADDED Viewed

Binary file (24.6 kB). View file