Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
18edbb5
1
Parent(s):
54ec772
change to point vars
Browse files
app.py
CHANGED
@@ -48,7 +48,7 @@ tokenizer = SeamlessM4TTokenizer.from_pretrained("ai4bharat/seamless-m4t-v2-larg
|
|
48 |
|
49 |
CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
|
50 |
|
51 |
-
AUDIO_SAMPLE_RATE = 16000
|
52 |
MAX_INPUT_AUDIO_LENGTH = 60 # in seconds
|
53 |
DEFAULT_TARGET_LANGUAGE = "Hindi"
|
54 |
|
@@ -69,7 +69,7 @@ def run_s2tt(input_audio: str, source_language: str, target_language: str) -> st
|
|
69 |
|
70 |
input_audio, orig_freq = torchaudio.load(input_audio)
|
71 |
input_audio = torchaudio.functional.resample(input_audio, orig_freq=orig_freq, new_freq=16000)
|
72 |
-
audio_inputs= processor(input_audio, sampling_rate=16000, return_tensors="pt").to(device=
|
73 |
|
74 |
text_out = model.generate(**audio_inputs, tgt_lang=target_language_code)[0].float().cpu().numpy().squeeze()
|
75 |
|
@@ -82,7 +82,7 @@ def run_asr(input_audio: str, target_language: str) -> str:
|
|
82 |
|
83 |
input_audio, orig_freq = torchaudio.load(input_audio)
|
84 |
input_audio = torchaudio.functional.resample(input_audio, orig_freq=orig_freq, new_freq=16000)
|
85 |
-
audio_inputs= processor(input_audio, sampling_rate=16000, return_tensors="pt").to(device=
|
86 |
|
87 |
text_out = model.generate(**audio_inputs, tgt_lang=target_language_code)[0].float().cpu().numpy().squeeze()
|
88 |
|
|
|
48 |
|
49 |
CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
|
50 |
|
51 |
+
AUDIO_SAMPLE_RATE = 16000
|
52 |
MAX_INPUT_AUDIO_LENGTH = 60 # in seconds
|
53 |
DEFAULT_TARGET_LANGUAGE = "Hindi"
|
54 |
|
|
|
69 |
|
70 |
input_audio, orig_freq = torchaudio.load(input_audio)
|
71 |
input_audio = torchaudio.functional.resample(input_audio, orig_freq=orig_freq, new_freq=16000)
|
72 |
+
audio_inputs= processor(input_audio, sampling_rate=16000, return_tensors="pt").to(device=device, dtype=torch_dtype)
|
73 |
|
74 |
text_out = model.generate(**audio_inputs, tgt_lang=target_language_code)[0].float().cpu().numpy().squeeze()
|
75 |
|
|
|
82 |
|
83 |
input_audio, orig_freq = torchaudio.load(input_audio)
|
84 |
input_audio = torchaudio.functional.resample(input_audio, orig_freq=orig_freq, new_freq=16000)
|
85 |
+
audio_inputs= processor(input_audio, sampling_rate=16000, return_tensors="pt").to(device=device, dtype=torch_dtype)
|
86 |
|
87 |
text_out = model.generate(**audio_inputs, tgt_lang=target_language_code)[0].float().cpu().numpy().squeeze()
|
88 |
|