Spaces:

Ivan000
/

whisper-large-v3-turbo

Sleeping

whisper-large-v3-turbo / app.py

Create app.py

ff83bcc verified 12 months ago

1.61 kB

	# app.py
	# =============
	# This is a complete app.py file for an automatic speech recognition app using the openai/whisper-large-v3-turbo model.
	# The app is built using Gradio and Hugging Face Transformers, and it runs on the CPU to avoid video memory usage.

	import torch
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
	import gradio as gr

	# Set device to CPU
	device = "cpu"
	torch_dtype = torch.float32

	# Load the model and processor
	model_id = "openai/whisper-large-v3-turbo"

	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
	)
	model.to(device)

	processor = AutoProcessor.from_pretrained(model_id)

	# Create the ASR pipeline
	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	torch_dtype=torch_dtype,
	device=device,
	)

	def transcribe_audio(audio_file):
	"""
	Transcribe the given audio file using the Whisper model.

	Parameters:
	audio_file (str): Path to the audio file.

	Returns:
	str: Transcribed text.
	"""
	result = pipe(audio_file)
	return result["text"]

	# Define the Gradio interface
	iface = gr.Interface(
	fn=transcribe_audio,
	inputs=gr.inputs.Audio(source="upload", type="filepath"),
	outputs="text",
	title="Whisper ASR Demo",
	description="Upload an audio file and get the transcribed text using the openai/whisper-large-v3-turbo model.",
	)

	# Launch the Gradio app
	if __name__ == "__main__":
	iface.launch()