Error for ASR transcribing

#1
by Mutturaj - opened

Traceback (most recent call last):
File "/appvol/Bhashani/NeMo/a.py", line 11, in
ctc_text = model.transcribe(['Marinal-L_Tam_Lang-1.wav'], batch_size=1,logprobs=False, language_id='as')[0]
File "/appvol/python310/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/appvol/Bhashani/NeMo/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py", line 139, in transcribe
return super().transcribe(
File "/appvol/python310/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/appvol/Bhashani/NeMo/nemo/collections/asr/models/rnnt_models.py", line 276, in transcribe
return super().transcribe(
File "/appvol/python310/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/appvol/Bhashani/NeMo/nemo/collections/asr/parts/mixins/transcription.py", line 282, in transcribe
for processed_outputs in generator:
File "/appvol/Bhashani/NeMo/nemo/collections/asr/parts/mixins/transcription.py", line 384, in transcribe_generator
for test_batch in tqdm(dataloader, desc="Transcribing", disable=not verbose):
File "/appvol/python310/lib/python3.10/site-packages/tqdm/std.py", line 1181, in iter
for obj in iterable:
File "/appvol/python310/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 708, in next
data = self._next_data()
File "/appvol/python310/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 764, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "/appvol/python310/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/appvol/python310/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/appvol/Bhashani/NeMo/nemo/collections/asr/data/audio_to_text.py", line 491, in getitem
features = self.featurizer.process(
File "/appvol/Bhashani/NeMo/nemo/collections/asr/parts/preprocessing/features.py", line 186, in process
audio = AudioSegment.from_file(
File "/appvol/Bhashani/NeMo/nemo/collections/asr/parts/preprocessing/segment.py", line 280, in from_file
return cls(
File "/appvol/Bhashani/NeMo/nemo/collections/asr/parts/preprocessing/segment.py", line 88, in init
samples = self._convert_samples_to_float32(samples)
File "/appvol/Bhashani/NeMo/nemo/collections/asr/parts/preprocessing/segment.py", line 168, in _convert_samples_to_float32
if samples.dtype in np.sctypes['int']:
File "/appvol/python310/lib/python3.10/site-packages/numpy/init.py", line 400, in getattr
raise AttributeError(
AttributeError: np.sctypes was removed in the NumPy 2.0 release. Access dtypes explicitly instead.

this is th ecode import torch
import nemo.collections.asr as nemo_asr

model = nemo_asr.models.ASRModel.from_pretrained("ai4bharat/indicconformer_stt_as_hybrid_rnnt_large")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.freeze() # inference mode
model = model.to(device) # transfer model to device

model.cur_decoder = "ctc"
ctc_text = model.transcribe(['Marinal-L_Tam_Lang-1.wav'], batch_size=1,logprobs=False, language_id='as')[0]
print(ctc_text)

its working for RNNT, not CTC

Sign up or log in to comment