Spaces:

luluw
/

Conformer-CTC-Small

Sleeping

Conformer-CTC-Small / decoder.py

Conformer Space

dcae561 4 months ago

1.44 kB

	import torch
	import torchaudio

	from torchaudio.models.decoder import ctc_decoder, download_pretrained_files


	class SpeechRecognitionEngine:
	"""
	ASR engine to transcribe recorded audio.
	"""
	def __init__(self, model_file, token_path):
	self.model = torch.jit.load(model_file)
	self.model.eval().to('cpu')

	# Load decoder files and tokens
	files = download_pretrained_files("librispeech-4-gram")

	with open(token_path, 'r') as f:
	tokens = f.read().splitlines()

	self.decoder = ctc_decoder(
	lexicon=files.lexicon,
	tokens=tokens,
	lm=files.lm,
	nbest=1,
	beam_size=50,
	beam_threshold=25,
	beam_size_token=20,
	lm_weight=1.23,
	word_score=-0.26,
	)

	print("Loaded beam search with Ken LM")


	def transcribe(self, model, featurizer, filename):
	"""
	Transcribe audio from a file using the ASR model.
	"""

	try:
	waveform, _ = torchaudio.load(filename)
	mel = featurizer(waveform).permute(0, 2, 1) # Prepare mel features
	with torch.inference_mode():
	out = model(mel)
	results = self.decoder(out)
	return " ".join(results[0][0].words).strip()
	except Exception as e:
	raise RuntimeError(f"Error during transcription: {e}")