File size: 2,837 Bytes
adf34ec 3643b3a adf34ec 3643b3a adf34ec ebef752 adf34ec ebef752 adf34ec ebef752 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
from transformers.pipelines.audio_utils import ffmpeg_read
from huggingface_hub import login
import gradio as gr
import spaces
import torch
import os
HF_TOKEN = os.getenv("HF")
MODEL_ID_HF = os.getenv("MODEL_ID_HF")
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = MODEL_ID_HF
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, token=HF_TOKEN, torch_dtype=torch_dtype, use_safetensors=True).to(device)
processor = AutoProcessor.from_pretrained(model_id, token=HF_TOKEN)
pipe = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, token=HF_TOKEN, feature_extractor=processor.feature_extractor,
max_new_tokens=128, chunk_length_s=15, batch_size=8, torch_dtype=torch_dtype, device=device) # token=os.environ["HF"]
@spaces.GPU
def transcribe(inputs, task):
if inputs is None:
raise gr.Error(
"No audio file submitted! Please upload or record an audio file before submitting your request.")
text = pipe(inputs, generate_kwargs={
"task": task, 'language': 'sq'}, return_timestamps=True)["text"]
return text
demo = gr.Blocks()
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources=["upload"], type="filepath", label="Audio file"),
],
outputs=gr.Textbox(label="Transcription", lines=8),
title="Whisper Large V3 Turbo Shqip: Transcribe Audio",
description=("This fine-tuned Whisper model provides reliable transcription for Albanian audio, whether from a microphone or an uploaded file. "
"Key details about this project:"
"\n\n- Fine-tuned on 400 plus hours of carefully curated Albanian audio data. "
f"\n- Hosted on Hugging Face. Repository: [{model_id}](https://huggingface.co/{model_id}). "
),
allow_flagging="never",
)
mf_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources=["microphone"], type="filepath"),
],
outputs=gr.Textbox(label="Transcription", lines=8),
title="Whisper Large V3 Turbo Shqip: Transcribe Audio",
description=("This fine-tuned Whisper model provides reliable transcription for Albanian audio, whether from a microphone or an uploaded file. "
"Key details about this project:"
"\n\n- Fine-tuned on 400 plus hours of carefully curated Albanian audio data. "
f"\n- Hosted on Hugging Face. Repository: [{model_id}](https://huggingface.co/{model_id}). "
),
allow_flagging="never",
)
with demo:
gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
demo.launch()
|