botarioAcc commited on
Commit
85eedc6
·
verified ·
1 Parent(s): 16ff511

update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -30
app.py CHANGED
@@ -21,46 +21,36 @@ if LANGUAGE not in SUPPORTED_LANGUAGES:
21
  else:
22
  MODEL_PATH = MODEL_PATHS[LANGUAGE]
23
 
24
- @spaces.GPU
25
- def init_pipeline():
26
- return pipeline(
27
- "automatic-speech-recognition",
28
- model=MODEL_PATH,
29
- device=0 if torch.cuda.is_available() else -1,
30
- chunk_length_s=30,
31
- stride_length_s=(4, 2),
32
- batch_size=8,
33
- token=os.getenv("HF_TOKEN"),
34
- )
35
-
36
- wave2vec_pipeline = init_pipeline()
37
-
38
 
 
39
  def transcribe_gradio(audio_path: str | None) -> str:
40
- """
41
- Transcribe an uploaded or recorded audio file and report inference time.
42
-
43
- Args:
44
- audio_path: Local filesystem path to the audio file provided by Gradio;
45
- None or empty if the user hasn't recorded/uploaded anything.
46
-
47
- Returns:
48
- A string containing either:
49
- - A warning if no file was provided,
50
- - An error message if loading/decoding failed,
51
- - Or the transcript followed by the elapsed inference time.
52
- """
53
  if not audio_path:
54
  return "⚠️ Please record something or choose a file first."
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  start = time.time()
57
  try:
58
- transcript = utils.transcribe_file(audio_path, wave2vec_pipeline)
59
- except ValueError as err:
 
60
  return f"❌ {err}"
61
  runtime = time.time() - start
62
- return f"{transcript}\n\n⌛ Inference time: {runtime:.2f} s"
63
 
 
64
 
65
  # gradio interface
66
  with gr.Blocks(title="Wave2Vec (Luxembourgish) ", theme="soft", css=CUSTOM_CSS) as demo:
 
21
  else:
22
  MODEL_PATH = MODEL_PATHS[LANGUAGE]
23
 
24
+ _asr_pipeline = None
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ @spaces.GPU
27
  def transcribe_gradio(audio_path: str | None) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  if not audio_path:
29
  return "⚠️ Please record something or choose a file first."
30
 
31
+ global _asr_pipeline
32
+
33
+ if _asr_pipeline is None:
34
+
35
+ _asr_pipeline = pipeline(
36
+ "automatic-speech-recognition",
37
+ model=MODEL_PATH,
38
+ device=0 if torch.cuda.is_available() else -1,
39
+ chunk_length_s=30,
40
+ stride_length_s=(4, 2),
41
+ batch_size=8,
42
+ token=os.getenv("HF_TOKEN"),
43
+ )
44
+
45
  start = time.time()
46
  try:
47
+ result = _asr_pipeline(audio_path)
48
+ transcript = result["text"] if isinstance(result, dict) else str(result)
49
+ except Exception as err:
50
  return f"❌ {err}"
51
  runtime = time.time() - start
 
52
 
53
+ return f"{transcript}\n\n⌛ Inference time: {runtime:.2f} s"
54
 
55
  # gradio interface
56
  with gr.Blocks(title="Wave2Vec (Luxembourgish) ", theme="soft", css=CUSTOM_CSS) as demo: