ZephaniahQ's picture
fixed syntax error in app.py
8115b30
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import gradio as gr
import os
import time
import tempfile
from scipy.io.wavfile import write
from pydub import AudioSegment
# Configuration
MODEL_ID = "Crystalcareai/Whisper-Medicalv1"
CHUNK_LENGTH_S = 30
RETURN_TIMESTAMPS = True
device = "cpu"
torch_dtype = torch.float32
# Load model and processor once
print("Loading model...")
model = AutoModelForSpeechSeq2Seq.from_pretrained(
MODEL_ID,
torch_dtype=torch_dtype,
use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(MODEL_ID)
# Build pipeline
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
torch_dtype=torch_dtype,
device=device,
chunk_length_s=CHUNK_LENGTH_S,
return_timestamps=RETURN_TIMESTAMPS
)
# split func
def split_audio(path, chunk_len_ms=30_000):
audio = AudioSegment.from_file(path)
return [audio[i:i+chunk_len_ms] for i in range(0, len(audio), chunk_len_ms)]
#transcribe chunkwise
def transcribe_chunkwise(audio, transcript_state):
sr, audio_data = audio
# Save whole file first
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
write(tmp.name, sr, audio_data)
path = tmp.name
# Split audio into 30s chunks
audio_chunks = split_audio(path)
new_entries = []
for i, chunk in enumerate(audio_chunks):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as ctmp:
chunk.export(ctmp.name, format="wav")
text = pipe(ctmp.name)["text"]
transcript_state.append(text)
yield "\n".join(transcript_state), transcript_state
#trasncribe func
def transcribe(audio):
if audio is None:
return "No audio input detected"
sr, audio_data = audio
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
write(tmp.name, sr, audio_data)
tmp_path = tmp.name
text = pipe(tmp_path)["text"]
text = text.replace(". ", ".\n").replace("! ", "!\n").replace("? ", "?\n")
os.remove(tmp_path)
return text
with gr.Blocks() as app:
state = gr.State([])
mic_input = gr.Audio(sources=["microphone"], label="πŸŽ™οΈ speak")
mic_run_btn = gr.Button("Trasncribe")
file_input = gr.Audio(sources=["upload"],label="πŸ“ Upload Audio")
file_run_btn = gr.Button("Trasncribe")
output_textbox = gr.Textbox(label="Transcription");
mic_run_btn.click(fn=transcribe_chunkwise, inputs=[mic_input, state], outputs=output_textbox, api_name="transcribe", show_progress=True)
file_run_btn.click(fn=transcribe_chunkwise, inputs=[file_input, state], outputs=output_textbox, api_name="transcribe", show_progress=True)
app.launch()