Spaces:

gradio
/

asr_main

Sleeping

App Files Files Community

freddyaboulton HF Staff commited on Sep 16, 2024

Commit

61e51ff

verified ·

1 Parent(s): 5f3408d

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

requirements.txt +2 -2
run.ipynb +1 -1
run.py +6 -1

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
-gradio-client @ git+https://github.com/gradio-app/gradio@9f0fe392c9f2604b9f937b9414e67d9b71b69109#subdirectory=client/python
-https://gradio-pypi-previews.s3.amazonaws.com/9f0fe392c9f2604b9f937b9414e67d9b71b69109/gradio-4.44.0-py3-none-any.whl
 torch
 torchaudio
 transformers

+gradio-client @ git+https://github.com/gradio-app/gradio@b888db4a9af43a648f0d772c2c3dce429fb72cfa#subdirectory=client/python
+https://gradio-pypi-previews.s3.amazonaws.com/b888db4a9af43a648f0d772c2c3dce429fb72cfa/gradio-4.44.0-py3-none-any.whl
 torch
 torchaudio
 transformers

run.ipynb CHANGED Viewed

@@ -1 +1 @@

- {"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: asr"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch torchaudio transformers"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import pipeline\n", "import numpy as np\n", "\n", "transcriber = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-base.en\")\n", "\n", "def transcribe(audio):\n", " sr, y = audio\n", " y = y.astype(np.float32)\n", " y /= np.max(np.abs(y))\n", "\n", " return transcriber({\"sampling_rate\": sr, \"raw\": y})[\"text\"] # type: ignore\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " gr.Audio(sources~~=[\~~"microphone\"]),\n", " \"text\",\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}

+ {"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: asr"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch torchaudio transformers"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import pipeline\n", "import numpy as np\n", "\n", "transcriber = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-base.en\")\n", "\n", "def transcribe(audio):\n", " sr, y = audio\n", " \n", " # Convert to mono if stereo\n", " if y.ndim > 1:\n", " y = y.mean(axis=1)\n", " \n", " y = y.astype(np.float32)\n", " y /= np.max(np.abs(y))\n", "\n", " return transcriber({\"sampling_rate\": sr, \"raw\": y})[\"text\"] # type: ignore\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " gr.Audio(sources=\"microphone\"),\n", " \"text\",\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}

run.py CHANGED Viewed

@@ -6,6 +6,11 @@ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-bas
 def transcribe(audio):
     sr, y = audio
     y = y.astype(np.float32)
     y /= np.max(np.abs(y))
@@ -13,7 +18,7 @@ def transcribe(audio):
 demo = gr.Interface(
     transcribe,
-    gr.Audio(sources=["microphone"]),
     "text",
 )

 def transcribe(audio):
     sr, y = audio
+    # Convert to mono if stereo
+    if y.ndim > 1:
+        y = y.mean(axis=1)
     y = y.astype(np.float32)
     y /= np.max(np.abs(y))
 demo = gr.Interface(
     transcribe,
+    gr.Audio(sources="microphone"),
     "text",
 )