Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,768 Bytes
5d52c32 6c226f9 d790c0b 88183ad 1e8d252 6cd6646 8c4d38d 6c226f9 17f14b2 f696e7e 6c226f9 f696e7e 6c226f9 5d52c32 3da85d4 1e8d252 6cd6646 2bf1d0a 1e8d252 15b00fb 3da85d4 3df1d51 72be79d 46704ba 4731eae 1e8d252 15b00fb 3da85d4 3df1d51 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import spaces
import torch
import gradio as gr
from transformers import pipeline
import tempfile
import os
import uuid
import scipy.io.wavfile
import numpy as np
MODEL_NAME = "ylacombe/whisper-large-v3-turbo"
BATCH_SIZE = 8
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
@spaces.GPU
def transcribe(inputs, previous_transcription):
try:
sample_rate, audio_data = inputs
# Convert audio data to a NumPy array of floats normalized between -1 and 1
audio_data = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
# Perform transcription
transcription = pipe(audio_data,
batch_size=BATCH_SIZE,
generate_kwargs={"task": "transcribe"},
return_timestamps=True)
# Append new transcription to previous transcription
previous_transcription += transcription["text"]
return previous_transcription
except Exception as e:
print(f"Error during transcription: {e}")
return previous_transcription
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown(f"# Realtime Whisper Large V3 Turbo: Transcribe Audio\n Transcribe inputs in Realtime. This Demo uses the checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers.")
input_audio_microphone = gr.Audio(streaming=True)
output = gr.Textbox(label="Transcription", value="")
input_audio_microphone.stream(transcribe, [input_audio_microphone, output], [output], time_limit=45, stream_every=2, concurrency_limit=None)
demo.queue().launch() |