Spaces:

bilalfaye
/

Wax_ak_Bind_Wolof

Sleeping

App Files Files Community

bilalfaye commited on Jan 17

Commit

960a3ac

verified ·

1 Parent(s): 1ea7b59

Create app.py

Browse files

Files changed (1) hide show

app.py +73 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import gradio as gr
+import torchaudio
+from transformers import pipeline
+import torch
+from datasets import load_dataset
+# Modèle 1 : Transcription audio Wolof -> texte Wolof
+pipe_wolof = pipeline(
+    task="automatic-speech-recognition",
+    model="bilalfaye/wav2vec2-large-mms-1b-wolof",
+    processor="bilalfaye/wav2vec2-large-mms-1b-wolof",
+    device="cuda" if torch.cuda.is_available() else "cpu"
+)
+# Fonction 1 : Transcription audio Wolof -> texte Wolof
+def transcribe_audio_wolof(audio):
+    # Charger l'audio avec torchaudio
+    waveform, sample_rate = torchaudio.load(audio)
+    # Convertir stéréo en mono
+    if waveform.shape[0] > 1:
+        mono_audio = waveform.mean(dim=0, keepdim=True)
+    else:
+        mono_audio = waveform
+    # Rééchantillonner à 16 kHz si nécessaire
+    if sample_rate != 16000:
+        resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
+        mono_audio = resampler(mono_audio)
+        sample_rate = 16000
+    # Convertir en tableau numpy
+    mono_audio = mono_audio.squeeze(0).numpy()
+    # Transcrire l'audio
+    result = pipe_wolof({"array": mono_audio, "sampling_rate": sample_rate})
+    return result['text']
+# Modèle 2 : Texte Wolof -> audio Wolof
+synthesiser_wolof = pipeline("text-to-speech", "bilalfaye/speecht5_tts-wolof")
+# Charger les embeddings pour les voix masculine et féminine
+embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+speaker_embedding_male = torch.tensor(embeddings_dataset[0]["xvector"]).unsqueeze(0)
+speaker_embedding_female = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
+# Fonction 2 : Texte Wolof -> audio Wolof
+def text_to_speech_wolof(text, voice_type):
+    embedding = speaker_embedding_male if voice_type == "Male" else speaker_embedding_female
+    speech = synthesiser_wolof(text, forward_params={"speaker_embeddings": embedding})
+    return speech["sampling_rate"], speech["audio"]
+# Interface Gradio
+with gr.Blocks() as app:
+    with gr.Tab("Transcription Audio -> Texte"):
+        gr.Markdown("### Transcription audio Wolof vers texte")
+        audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Enregistrer ou importer un fichier audio")
+        transcription_output = gr.Textbox(label="Texte transcrit")
+        transcribe_button = gr.Button("Transcrire")
+        transcribe_button.click(transcribe_audio_wolof, inputs=audio_input, outputs=transcription_output)
+    with gr.Tab("Texte -> Synthèse Vocale"):
+        gr.Markdown("### Conversion de texte Wolof en audio")
+        text_input = gr.Textbox(label="Entrez du texte en Wolof")
+        voice_selector = gr.Radio(["Male", "Female"], label="Type de voix", value="Male")
+        audio_output = gr.Audio(label="Synthèse vocale")
+        synthesize_button = gr.Button("Synthétiser")
+        synthesize_button.click(text_to_speech_wolof, inputs=[text_input, voice_selector], outputs=audio_output)
+# Lancer l'application
+app.launch(debug=True, share=True)