AshwinSankar commited on
Commit
18edbb5
·
1 Parent(s): 54ec772

change to point vars

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -48,7 +48,7 @@ tokenizer = SeamlessM4TTokenizer.from_pretrained("ai4bharat/seamless-m4t-v2-larg
48
 
49
  CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
50
 
51
- AUDIO_SAMPLE_RATE = 16000.0
52
  MAX_INPUT_AUDIO_LENGTH = 60 # in seconds
53
  DEFAULT_TARGET_LANGUAGE = "Hindi"
54
 
@@ -69,7 +69,7 @@ def run_s2tt(input_audio: str, source_language: str, target_language: str) -> st
69
 
70
  input_audio, orig_freq = torchaudio.load(input_audio)
71
  input_audio = torchaudio.functional.resample(input_audio, orig_freq=orig_freq, new_freq=16000)
72
- audio_inputs= processor(input_audio, sampling_rate=16000, return_tensors="pt").to(device="cuda", dtype=torch_dtype)
73
 
74
  text_out = model.generate(**audio_inputs, tgt_lang=target_language_code)[0].float().cpu().numpy().squeeze()
75
 
@@ -82,7 +82,7 @@ def run_asr(input_audio: str, target_language: str) -> str:
82
 
83
  input_audio, orig_freq = torchaudio.load(input_audio)
84
  input_audio = torchaudio.functional.resample(input_audio, orig_freq=orig_freq, new_freq=16000)
85
- audio_inputs= processor(input_audio, sampling_rate=16000, return_tensors="pt").to(device="cuda", dtype=torch_dtype)
86
 
87
  text_out = model.generate(**audio_inputs, tgt_lang=target_language_code)[0].float().cpu().numpy().squeeze()
88
 
 
48
 
49
  CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
50
 
51
+ AUDIO_SAMPLE_RATE = 16000
52
  MAX_INPUT_AUDIO_LENGTH = 60 # in seconds
53
  DEFAULT_TARGET_LANGUAGE = "Hindi"
54
 
 
69
 
70
  input_audio, orig_freq = torchaudio.load(input_audio)
71
  input_audio = torchaudio.functional.resample(input_audio, orig_freq=orig_freq, new_freq=16000)
72
+ audio_inputs= processor(input_audio, sampling_rate=16000, return_tensors="pt").to(device=device, dtype=torch_dtype)
73
 
74
  text_out = model.generate(**audio_inputs, tgt_lang=target_language_code)[0].float().cpu().numpy().squeeze()
75
 
 
82
 
83
  input_audio, orig_freq = torchaudio.load(input_audio)
84
  input_audio = torchaudio.functional.resample(input_audio, orig_freq=orig_freq, new_freq=16000)
85
+ audio_inputs= processor(input_audio, sampling_rate=16000, return_tensors="pt").to(device=device, dtype=torch_dtype)
86
 
87
  text_out = model.generate(**audio_inputs, tgt_lang=target_language_code)[0].float().cpu().numpy().squeeze()
88