Spaces:

Yehor
/

w2v-bert-uk-v2.1

Running

App Files Files Community

Yehor commited on 10 days ago

Commit

02575aa

1 Parent(s): e697892

Use tabs

Browse files

Files changed (3) hide show

README.md +0 -3
app.py +91 -48
requirements-dev.txt +0 -1

README.md CHANGED Viewed

@@ -17,9 +17,6 @@ uv venv --python 3.10
 source .venv/bin/activate
 uv pip install -r requirements.txt
-# in development mode
-uv pip install -r requirements-dev.txt
 ```
 ## Run

 source .venv/bin/activate
 uv pip install -r requirements.txt
 ```
 ## Run

app.py CHANGED Viewed

@@ -1,26 +1,39 @@
 import sys
 import time
-from importlib.metadata import version
-import spaces
 import torch
 import torchaudio
 import torchaudio.transforms as T
 import gradio as gr
 from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
 use_cuda = torch.cuda.is_available()
 if use_cuda:
-    print('CUDA is available, setting correct inference_device variable.')
-    device = 'cuda'
     torch_dtype = torch.float16
 else:
-    device = 'cpu'
     torch_dtype = torch.float32
 # Config
@@ -33,7 +46,9 @@ concurrency_limit = 5
 use_torch_compile = False
 # Load the model
-asr_model = AutoModelForCTC.from_pretrained(model_name, torch_dtype=torch_dtype, device_map=device)
 processor = Wav2Vec2BertProcessor.from_pretrained(model_name)
 if use_torch_compile:
@@ -66,7 +81,7 @@ authors_table = """
 Follow them in social networks and **contact** if you need any help or have any questions:
-| <img src="https://avatars.githubusercontent.com/u/7875085?v=4" width="100"> **Yehor Smoliakov** |
 |-------------------------------------------------------------------------------------------------|
 | https://t.me/smlkw in Telegram                                                                  |
 | https://x.com/yehor_smoliakov at X                                                              |
@@ -78,16 +93,11 @@ Follow them in social networks and **contact** if you need any help or have any
 description_head = f"""
 # Speech-to-Text for Ukrainian v2.1
-## Overview
 This space uses https://huggingface.co/{model_name} model to recognize audio files.
 > Due to resource limitations, audio duration **must not** exceed **{max_duration}** seconds.
 """.strip()
-description_foot = f"""
-{authors_table}
-""".strip()
 transcription_value = """
 Recognized text will appear here.
@@ -107,15 +117,14 @@ tech_env = f"""
 tech_libraries = f"""
 #### Libraries
-- torch: {version('torch')}
-- torchaudio: {version('torchaudio')}
-- transformers: {version('transformers')}
-- accelerate: {version('accelerate')}
-- gradio: {version('gradio')}
 """.strip()
-@spaces.GPU
 def inference(audio_path, progress=gr.Progress()):
     if not audio_path:
         raise gr.Error("Please upload an audio file.")
@@ -190,53 +199,87 @@ def inference(audio_path, progress=gr.Progress()):
     result_texts = []
     for result in results:
-        result_texts.append(f'**{result["path"]}**')
         result_texts.append("\n\n")
-        result_texts.append(f'> {result["transcription"]}')
         result_texts.append("\n\n")
-        result_texts.append(f'**Audio duration**: {result["audio_duration"]}')
         result_texts.append("\n")
-        result_texts.append(f'**Real-Time Factor**: {result["rtf"]}')
     return "\n".join(result_texts)
-demo = gr.Blocks(
-    title="Speech-to-Text for Ukrainian",
-    analytics_enabled=False,
-    theme=gr.themes.Base(),
-)
-with demo:
-    gr.Markdown(description_head)
-    gr.Markdown("## Usage")
-    with gr.Column():
-        audio_file = gr.Audio(label="Audio file", type="filepath")
-        transcription = gr.Markdown(
-            label="Transcription",
-            value=transcription_value,
         )
-    gr.Button("Run").click(
-        inference,
-        concurrency_limit=concurrency_limit,
-        inputs=audio_file,
-        outputs=transcription,
-    )
-    with gr.Row():
-        gr.Examples(label="Choose an example", inputs=audio_file, examples=examples)
-    gr.Markdown(examples_table)
-    gr.Markdown(description_foot)
-    gr.Markdown("### Gradio app uses:")
-    gr.Markdown(tech_env)
-    gr.Markdown(tech_libraries)
 if __name__ == "__main__":
     demo.queue()
     demo.launch()

 import sys
 import time
+from importlib.metadata import version, PackageNotFoundError
+try:
+    import spaces
+except ImportError:
+    print("ZeroGPU is not available, skipping...")
 import torch
 import torchaudio
 import torchaudio.transforms as T
 import gradio as gr
+from gradio.themes import Soft
+from gradio.utils import is_zero_gpu_space
 from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
+try:
+    spaces_version = version("spaces")
+    print("ZeroGPU is available, changing inference call.")
+except PackageNotFoundError:
+    spaces_version = "N/A"
+    print("ZeroGPU is not available, skipping...")
+use_zero_gpu = is_zero_gpu_space()
 use_cuda = torch.cuda.is_available()
 if use_cuda:
+    print("CUDA is available, setting correct inference_device variable.")
+    device = "cuda"
     torch_dtype = torch.float16
 else:
+    device = "cpu"
     torch_dtype = torch.float32
 # Config
 use_torch_compile = False
 # Load the model
+asr_model = AutoModelForCTC.from_pretrained(
+    model_name, torch_dtype=torch_dtype, device_map=device
+)
 processor = Wav2Vec2BertProcessor.from_pretrained(model_name)
 if use_torch_compile:
 Follow them in social networks and **contact** if you need any help or have any questions:
+| **Yehor Smoliakov** |
 |-------------------------------------------------------------------------------------------------|
 | https://t.me/smlkw in Telegram                                                                  |
 | https://x.com/yehor_smoliakov at X                                                              |
 description_head = f"""
 # Speech-to-Text for Ukrainian v2.1
 This space uses https://huggingface.co/{model_name} model to recognize audio files.
 > Due to resource limitations, audio duration **must not** exceed **{max_duration}** seconds.
 """.strip()
 transcription_value = """
 Recognized text will appear here.
 tech_libraries = f"""
 #### Libraries
+- torch: {version("torch")}
+- torchaudio: {version("torchaudio")}
+- transformers: {version("transformers")}
+- accelerate: {version("accelerate")}
+- gradio: {version("gradio")}
 """.strip()
 def inference(audio_path, progress=gr.Progress()):
     if not audio_path:
         raise gr.Error("Please upload an audio file.")
     result_texts = []
     for result in results:
+        result_texts.append(f"**{result['path']}**")
         result_texts.append("\n\n")
+        result_texts.append(f"> {result['transcription']}")
         result_texts.append("\n\n")
+        result_texts.append(f"**Audio duration**: {result['audio_duration']}")
         result_texts.append("\n")
+        result_texts.append(f"**Real-Time Factor**: {result['rtf']}")
     return "\n".join(result_texts)
+inference_func = inference
+if use_zero_gpu:
+    inference_func = spaces.GPU(inference)
+def create_app():
+    tab = gr.Blocks(
+        title="Speech-to-Text for Ukrainian",
+        analytics_enabled=False,
+        theme=Soft(),
+    )
+    with tab:
+        gr.Markdown(description_head)
+        gr.Markdown("## Usage")
+        with gr.Column():
+            audio_file = gr.Audio(label="Audio file", type="filepath")
+            transcription = gr.Markdown(
+                label="Transcription",
+                value=transcription_value,
+            )
+        gr.Button("Run").click(
+            inference_func,
+            concurrency_limit=concurrency_limit,
+            inputs=audio_file,
+            outputs=transcription,
         )
+        with gr.Row():
+            gr.Examples(label="Choose an example", inputs=audio_file, examples=examples)
+        gr.Markdown(examples_table)
+    return tab
+def create_env():
+    with gr.Blocks(theme=Soft()) as tab:
+        gr.Markdown(tech_env)
+        gr.Markdown(tech_libraries)
+    return tab
+def create_authors():
+    with gr.Blocks(theme=Soft()) as tab:
+        gr.Markdown(authors_table)
+    return tab
+def create_demo():
+    app_tab = create_app()
+    authors_tab = create_authors()
+    env_tab = create_env()
+    return gr.TabbedInterface(
+        [app_tab, authors_tab, env_tab],
+        tab_names=[
+            "🎙️ Recognition",
+            "👥 Authors",
+            "📦 Environment, Models, and Libraries",
+        ],
+    )
 if __name__ == "__main__":
+    demo = create_demo()
     demo.queue()
     demo.launch()

requirements-dev.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- ruff