Spaces:

EnDevSols
/

ASR-Arabic

Sleeping

App Files Files

xet

Community

Mudassir-75 commited on Sep 2, 2024

Commit

5487b58

verified ·

1 Parent(s): fd42012

Shifted to Inference API

Browse files

Files changed (1) hide show

app.py +13 -28

app.py CHANGED Viewed

@@ -1,47 +1,34 @@
 import streamlit as st
-import torch
-import librosa
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import Levenshtein
 from io import BytesIO
 from audio_recorder_streamlit import audio_recorder
-# Load the processor and model for Wav2Vec2 once
 @st.cache_resource
-def load_model():
-    MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
-    processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
-    model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
-    return processor, model
-processor, model = load_model()
-def transcribe_audio(audio_bytes):
     """
-    Transcribes speech from an audio file using a pretrained Wav2Vec2 model.
     Args:
         audio_bytes (bytes): Audio data in bytes.
     Returns:
         str: The transcription of the speech in the audio file.
     """
-    speech_array, sampling_rate = librosa.load(BytesIO(audio_bytes), sr=16000)
-    input_values = processor(speech_array, sampling_rate=sampling_rate, return_tensors="pt", padding=True).input_values
-    with torch.no_grad():
-        logits = model(input_values).logits
-    predicted_ids = torch.argmax(logits, dim=-1)
-    transcription = processor.batch_decode(predicted_ids)[0].strip()
-    return transcription
 def levenshtein_similarity(transcription1, transcription2):
     """
     Calculate the Levenshtein similarity between two transcriptions.
     Args:
         transcription1 (str): The first transcription.
         transcription2 (str): The second transcription.
     Returns:
         float: A normalized similarity score between 0 and 1, where 1 indicates identical transcriptions.
     """
@@ -52,16 +39,14 @@ def levenshtein_similarity(transcription1, transcription2):
 def evaluate_audio_similarity(original_audio_bytes, user_audio_bytes):
     """
     Compares the similarity between the transcription of an original audio file and a user's audio file.
     Args:
         original_audio_bytes (bytes): Bytes of the original audio file.
         user_audio_bytes (bytes): Bytes of the user's audio file.
     Returns:
         tuple: Transcriptions and Levenshtein similarity score.
     """
-    transcription_original = transcribe_audio(original_audio_bytes)
-    transcription_user = transcribe_audio(user_audio_bytes)
     similarity_score_levenshtein = levenshtein_similarity(transcription_original, transcription_user)
     return transcription_original, transcription_user, similarity_score_levenshtein

 import streamlit as st
+import requests
 import Levenshtein
 from io import BytesIO
 from audio_recorder_streamlit import audio_recorder
+# Function to securely load the Hugging Face API token
 @st.cache_resource
+def load_hf_token():
+    return st.secrets["HF_API_KEY"]
+# Function to query the Hugging Face Inference API
+def transcribe_audio_hf(audio_bytes):
     """
+    Transcribes speech from an audio file using the Hugging Face Inference API.
     Args:
         audio_bytes (bytes): Audio data in bytes.
     Returns:
         str: The transcription of the speech in the audio file.
     """
+    API_URL = "https://api-inference.huggingface.co/models/jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
+    headers = {"Authorization": f"Bearer {load_hf_token()}"}
+    response = requests.post(API_URL, headers=headers, data=audio_bytes)
+    return response.json().get("text", "").strip()
 def levenshtein_similarity(transcription1, transcription2):
     """
     Calculate the Levenshtein similarity between two transcriptions.
     Args:
         transcription1 (str): The first transcription.
         transcription2 (str): The second transcription.
     Returns:
         float: A normalized similarity score between 0 and 1, where 1 indicates identical transcriptions.
     """
 def evaluate_audio_similarity(original_audio_bytes, user_audio_bytes):
     """
     Compares the similarity between the transcription of an original audio file and a user's audio file.
     Args:
         original_audio_bytes (bytes): Bytes of the original audio file.
         user_audio_bytes (bytes): Bytes of the user's audio file.
     Returns:
         tuple: Transcriptions and Levenshtein similarity score.
     """
+    transcription_original = transcribe_audio_hf(original_audio_bytes)
+    transcription_user = transcribe_audio_hf(user_audio_bytes)
     similarity_score_levenshtein = levenshtein_similarity(transcription_original, transcription_user)
     return transcription_original, transcription_user, similarity_score_levenshtein