Spaces:

Somalitts
/

speech-to-text

Running

speech-to-text / app.py

Create app.py

bece762 verified 24 days ago

1.26 kB

	import gradio as gr
	import torchaudio
	import torch
	from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor

	# Load model and processor
	processor = Wav2Vec2Processor.from_pretrained("Mustafaa4a/ASR-Somali")
	model = Wav2Vec2ForCTC.from_pretrained("Mustafaa4a/ASR-Somali")

	def transcribe(audio):
	waveform, sample_rate = torchaudio.load(audio)

	if sample_rate != 16000:
	resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
	waveform = resampler(waveform)

	inputs = processor(waveform.squeeze(), sampling_rate=16000, return_tensors="pt")
	with torch.no_grad():
	logits = model(**inputs).logits

	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.decode(predicted_ids[0])
	return transcription

	# Gradio Interface setup
	interface = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(type="filepath", label="Upload Somali Audio (.wav)"),
	outputs=gr.Textbox(label="Transcription"),
	title="Somali-speech_to_text",
	description="Upload a Somali speech audio file (mono WAV, 16kHz) and get the text transcription."
	)

	# Launch the Gradio app and make it publicly available by using 'share=True'
	interface.launch() # Don't use share=True in Hugging Face Spaces