Yehor commited on
Commit
02575aa
·
1 Parent(s): e697892
Files changed (3) hide show
  1. README.md +0 -3
  2. app.py +91 -48
  3. requirements-dev.txt +0 -1
README.md CHANGED
@@ -17,9 +17,6 @@ uv venv --python 3.10
17
  source .venv/bin/activate
18
 
19
  uv pip install -r requirements.txt
20
-
21
- # in development mode
22
- uv pip install -r requirements-dev.txt
23
  ```
24
 
25
  ## Run
 
17
  source .venv/bin/activate
18
 
19
  uv pip install -r requirements.txt
 
 
 
20
  ```
21
 
22
  ## Run
app.py CHANGED
@@ -1,26 +1,39 @@
1
  import sys
2
  import time
3
 
4
- from importlib.metadata import version
5
 
6
- import spaces
 
 
 
7
 
8
  import torch
9
  import torchaudio
10
  import torchaudio.transforms as T
11
 
12
  import gradio as gr
 
 
13
 
14
  from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
15
 
 
 
 
 
 
 
 
 
16
  use_cuda = torch.cuda.is_available()
17
 
18
  if use_cuda:
19
- print('CUDA is available, setting correct inference_device variable.')
20
- device = 'cuda'
21
  torch_dtype = torch.float16
22
  else:
23
- device = 'cpu'
24
  torch_dtype = torch.float32
25
 
26
  # Config
@@ -33,7 +46,9 @@ concurrency_limit = 5
33
  use_torch_compile = False
34
 
35
  # Load the model
36
- asr_model = AutoModelForCTC.from_pretrained(model_name, torch_dtype=torch_dtype, device_map=device)
 
 
37
  processor = Wav2Vec2BertProcessor.from_pretrained(model_name)
38
 
39
  if use_torch_compile:
@@ -66,7 +81,7 @@ authors_table = """
66
 
67
  Follow them in social networks and **contact** if you need any help or have any questions:
68
 
69
- | <img src="https://avatars.githubusercontent.com/u/7875085?v=4" width="100"> **Yehor Smoliakov** |
70
  |-------------------------------------------------------------------------------------------------|
71
  | https://t.me/smlkw in Telegram |
72
  | https://x.com/yehor_smoliakov at X |
@@ -78,16 +93,11 @@ Follow them in social networks and **contact** if you need any help or have any
78
  description_head = f"""
79
  # Speech-to-Text for Ukrainian v2.1
80
 
81
- ## Overview
82
-
83
  This space uses https://huggingface.co/{model_name} model to recognize audio files.
84
 
85
  > Due to resource limitations, audio duration **must not** exceed **{max_duration}** seconds.
86
  """.strip()
87
 
88
- description_foot = f"""
89
- {authors_table}
90
- """.strip()
91
 
92
  transcription_value = """
93
  Recognized text will appear here.
@@ -107,15 +117,14 @@ tech_env = f"""
107
  tech_libraries = f"""
108
  #### Libraries
109
 
110
- - torch: {version('torch')}
111
- - torchaudio: {version('torchaudio')}
112
- - transformers: {version('transformers')}
113
- - accelerate: {version('accelerate')}
114
- - gradio: {version('gradio')}
115
  """.strip()
116
 
117
 
118
- @spaces.GPU
119
  def inference(audio_path, progress=gr.Progress()):
120
  if not audio_path:
121
  raise gr.Error("Please upload an audio file.")
@@ -190,53 +199,87 @@ def inference(audio_path, progress=gr.Progress()):
190
  result_texts = []
191
 
192
  for result in results:
193
- result_texts.append(f'**{result["path"]}**')
194
  result_texts.append("\n\n")
195
- result_texts.append(f'> {result["transcription"]}')
196
  result_texts.append("\n\n")
197
- result_texts.append(f'**Audio duration**: {result["audio_duration"]}')
198
  result_texts.append("\n")
199
- result_texts.append(f'**Real-Time Factor**: {result["rtf"]}')
200
 
201
  return "\n".join(result_texts)
202
 
203
 
204
- demo = gr.Blocks(
205
- title="Speech-to-Text for Ukrainian",
206
- analytics_enabled=False,
207
- theme=gr.themes.Base(),
208
- )
209
 
210
- with demo:
211
- gr.Markdown(description_head)
212
 
213
- gr.Markdown("## Usage")
 
 
 
 
 
 
 
 
 
 
214
 
215
- with gr.Column():
216
- audio_file = gr.Audio(label="Audio file", type="filepath")
217
- transcription = gr.Markdown(
218
- label="Transcription",
219
- value=transcription_value,
 
 
 
 
 
 
 
220
  )
221
 
222
- gr.Button("Run").click(
223
- inference,
224
- concurrency_limit=concurrency_limit,
225
- inputs=audio_file,
226
- outputs=transcription,
227
- )
228
 
229
- with gr.Row():
230
- gr.Examples(label="Choose an example", inputs=audio_file, examples=examples)
231
 
232
- gr.Markdown(examples_table)
233
 
234
- gr.Markdown(description_foot)
235
 
236
- gr.Markdown("### Gradio app uses:")
237
- gr.Markdown(tech_env)
238
- gr.Markdown(tech_libraries)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
  if __name__ == "__main__":
 
241
  demo.queue()
242
  demo.launch()
 
1
  import sys
2
  import time
3
 
4
+ from importlib.metadata import version, PackageNotFoundError
5
 
6
+ try:
7
+ import spaces
8
+ except ImportError:
9
+ print("ZeroGPU is not available, skipping...")
10
 
11
  import torch
12
  import torchaudio
13
  import torchaudio.transforms as T
14
 
15
  import gradio as gr
16
+ from gradio.themes import Soft
17
+ from gradio.utils import is_zero_gpu_space
18
 
19
  from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
20
 
21
+ try:
22
+ spaces_version = version("spaces")
23
+ print("ZeroGPU is available, changing inference call.")
24
+ except PackageNotFoundError:
25
+ spaces_version = "N/A"
26
+ print("ZeroGPU is not available, skipping...")
27
+
28
+ use_zero_gpu = is_zero_gpu_space()
29
  use_cuda = torch.cuda.is_available()
30
 
31
  if use_cuda:
32
+ print("CUDA is available, setting correct inference_device variable.")
33
+ device = "cuda"
34
  torch_dtype = torch.float16
35
  else:
36
+ device = "cpu"
37
  torch_dtype = torch.float32
38
 
39
  # Config
 
46
  use_torch_compile = False
47
 
48
  # Load the model
49
+ asr_model = AutoModelForCTC.from_pretrained(
50
+ model_name, torch_dtype=torch_dtype, device_map=device
51
+ )
52
  processor = Wav2Vec2BertProcessor.from_pretrained(model_name)
53
 
54
  if use_torch_compile:
 
81
 
82
  Follow them in social networks and **contact** if you need any help or have any questions:
83
 
84
+ | **Yehor Smoliakov** |
85
  |-------------------------------------------------------------------------------------------------|
86
  | https://t.me/smlkw in Telegram |
87
  | https://x.com/yehor_smoliakov at X |
 
93
  description_head = f"""
94
  # Speech-to-Text for Ukrainian v2.1
95
 
 
 
96
  This space uses https://huggingface.co/{model_name} model to recognize audio files.
97
 
98
  > Due to resource limitations, audio duration **must not** exceed **{max_duration}** seconds.
99
  """.strip()
100
 
 
 
 
101
 
102
  transcription_value = """
103
  Recognized text will appear here.
 
117
  tech_libraries = f"""
118
  #### Libraries
119
 
120
+ - torch: {version("torch")}
121
+ - torchaudio: {version("torchaudio")}
122
+ - transformers: {version("transformers")}
123
+ - accelerate: {version("accelerate")}
124
+ - gradio: {version("gradio")}
125
  """.strip()
126
 
127
 
 
128
  def inference(audio_path, progress=gr.Progress()):
129
  if not audio_path:
130
  raise gr.Error("Please upload an audio file.")
 
199
  result_texts = []
200
 
201
  for result in results:
202
+ result_texts.append(f"**{result['path']}**")
203
  result_texts.append("\n\n")
204
+ result_texts.append(f"> {result['transcription']}")
205
  result_texts.append("\n\n")
206
+ result_texts.append(f"**Audio duration**: {result['audio_duration']}")
207
  result_texts.append("\n")
208
+ result_texts.append(f"**Real-Time Factor**: {result['rtf']}")
209
 
210
  return "\n".join(result_texts)
211
 
212
 
213
+ inference_func = inference
214
+ if use_zero_gpu:
215
+ inference_func = spaces.GPU(inference)
 
 
216
 
 
 
217
 
218
+ def create_app():
219
+ tab = gr.Blocks(
220
+ title="Speech-to-Text for Ukrainian",
221
+ analytics_enabled=False,
222
+ theme=Soft(),
223
+ )
224
+
225
+ with tab:
226
+ gr.Markdown(description_head)
227
+
228
+ gr.Markdown("## Usage")
229
 
230
+ with gr.Column():
231
+ audio_file = gr.Audio(label="Audio file", type="filepath")
232
+ transcription = gr.Markdown(
233
+ label="Transcription",
234
+ value=transcription_value,
235
+ )
236
+
237
+ gr.Button("Run").click(
238
+ inference_func,
239
+ concurrency_limit=concurrency_limit,
240
+ inputs=audio_file,
241
+ outputs=transcription,
242
  )
243
 
244
+ with gr.Row():
245
+ gr.Examples(label="Choose an example", inputs=audio_file, examples=examples)
 
 
 
 
246
 
247
+ gr.Markdown(examples_table)
 
248
 
249
+ return tab
250
 
 
251
 
252
+ def create_env():
253
+ with gr.Blocks(theme=Soft()) as tab:
254
+ gr.Markdown(tech_env)
255
+ gr.Markdown(tech_libraries)
256
+
257
+ return tab
258
+
259
+
260
+ def create_authors():
261
+ with gr.Blocks(theme=Soft()) as tab:
262
+ gr.Markdown(authors_table)
263
+
264
+ return tab
265
+
266
+
267
+ def create_demo():
268
+ app_tab = create_app()
269
+ authors_tab = create_authors()
270
+ env_tab = create_env()
271
+
272
+ return gr.TabbedInterface(
273
+ [app_tab, authors_tab, env_tab],
274
+ tab_names=[
275
+ "🎙️ Recognition",
276
+ "👥 Authors",
277
+ "📦 Environment, Models, and Libraries",
278
+ ],
279
+ )
280
+
281
 
282
  if __name__ == "__main__":
283
+ demo = create_demo()
284
  demo.queue()
285
  demo.launch()
requirements-dev.txt DELETED
@@ -1 +0,0 @@
1
- ruff