Spaces:

romas-458
/

acr

Sleeping

App Files Files Community

roman commited on May 27, 2024

Commit

62e68d5

1 Parent(s): 8dd0371

update requirements.txt, add whisper small ukr

Browse files

Files changed (3) hide show

app.py +39 -11
app2.py +41 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,22 +1,39 @@
 import streamlit as st
-import whisper
 import tempfile
 from pydub import AudioSegment
 # Define available models
-available_models = ["tiny", "base", "small", "medium", "large"]
-st.title("Voice Recognition App")
-st.write("Upload an audio file and choose a Whisper model to transcribe it to text.")
 # Model selection dropdown
-model_choice = st.selectbox("Choose a Whisper model", available_models)
-# Load the selected Whisper model
 st.write(f"Loading {model_choice} model...")
-model = whisper.load_model(model_choice)
 st.write(f"{model_choice} model loaded successfully.")
 # File uploader for audio file
 uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a"])
@@ -26,7 +43,7 @@ if uploaded_file is not None:
         temp_file.write(uploaded_file.read())
         temp_file_path = temp_file.name
-    # Convert audio file to a format supported by Whisper (if necessary)
     audio = AudioSegment.from_file(temp_file_path)
     temp_wav_path = tempfile.mktemp(suffix=".wav")
     audio.export(temp_wav_path, format="wav")
@@ -35,7 +52,18 @@ if uploaded_file is not None:
     st.write("Transcribing audio...")
-    # Transcribe audio using Whisper model
-    result = model.transcribe(temp_wav_path)
     st.write("Transcription:")
-    st.write(result["text"])

 import streamlit as st
+from transformers import AutoModelForSpeechSeq2Seq, Wav2Vec2Processor
+import torch
 import tempfile
 from pydub import AudioSegment
+import numpy as np
 # Define available models
+# available_models = [
+#     "facebook/s2t-small-mustc-en-fr-st",
+#     "facebook/s2t-medium-mustc-en-fr-st",
+#     "facebook/s2t-large-mustc-en-fr-st"
+# ]
+available_models = ["Yehor/whisper-small-ukrainian"]
+st.title("Voice Recognition App using SpeechSeq2Seq")
+st.write("Upload an audio file and choose a model to transcribe it to text.")
 # Model selection dropdown
+model_choice = st.selectbox("Choose a SpeechSeq2Seq model", available_models)
+# Load the selected model and processor
+@st.cache_resource
+def load_model_and_processor(model_name):
+    model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name)
+    processor = Wav2Vec2Processor.from_pretrained(model_name)
+    return model, processor
 st.write(f"Loading {model_choice} model...")
+model, processor = load_model_and_processor(model_choice)
 st.write(f"{model_choice} model loaded successfully.")
 # File uploader for audio file
 uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a"])
         temp_file.write(uploaded_file.read())
         temp_file_path = temp_file.name
+    # Convert audio file to a format supported by the processor (if necessary)
     audio = AudioSegment.from_file(temp_file_path)
     temp_wav_path = tempfile.mktemp(suffix=".wav")
     audio.export(temp_wav_path, format="wav")
     st.write("Transcribing audio...")
+    # Load audio
+    audio_input = AudioSegment.from_file(temp_wav_path).set_frame_rate(16000).set_channels(1)
+    audio_input = np.array(audio_input.get_array_of_samples())
+    # Process the audio
+    input_features = processor(audio_input, return_tensors="pt", sampling_rate=16000).input_values
+    # Generate transcription
+    with torch.no_grad():
+        predicted_ids = model.generate(input_features)
+    transcription = processor.batch_decode(predicted_ids)[0]
     st.write("Transcription:")
+    st.write(transcription)

app2.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import streamlit as st
+import whisper
+import tempfile
+from pydub import AudioSegment
+# Define available models
+available_models = ["tiny", "base", "small", "medium", "large"]
+st.title("Voice Recognition App")
+st.write("Upload an audio file and choose a Whisper model to transcribe it to text.")
+# Model selection dropdown
+model_choice = st.selectbox("Choose a Whisper model", available_models)
+# Load the selected Whisper model
+st.write(f"Loading {model_choice} model...")
+model = whisper.load_model(model_choice)
+st.write(f"{model_choice} model loaded successfully.")
+# File uploader for audio file
+uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a"])
+if uploaded_file is not None:
+    # Save the uploaded file temporarily
+    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+        temp_file.write(uploaded_file.read())
+        temp_file_path = temp_file.name
+    # Convert audio file to a format supported by Whisper (if necessary)
+    audio = AudioSegment.from_file(temp_file_path)
+    temp_wav_path = tempfile.mktemp(suffix=".wav")
+    audio.export(temp_wav_path, format="wav")
+    st.audio(uploaded_file, format="audio/wav")
+    st.write("Transcribing audio...")
+    # Transcribe audio using Whisper model
+    result = model.transcribe(temp_wav_path)
+    st.write("Transcription:")
+    st.write(result["text"])

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 streamlit
 transformers
 pydub
-openai-whisper

 streamlit
 transformers
 pydub
+openai-whisper
+torch