yanyaoer commited on
Commit
cf44123
·
verified ·
1 Parent(s): ef05dc7

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. .gitignore +7 -0
  2. .gradio/certificate.pem +31 -0
  3. .python-version +1 -0
  4. README.md +2 -8
  5. main.py +120 -0
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ .venv
2
+ .gradio
3
+ *.wav
4
+ *.mp4
5
+ *.m4a
6
+ *.swp
7
+ outputs
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Video2text
3
- emoji: 🦀
4
- colorFrom: yellow
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.29.1
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: video2text
3
+ app_file: main.py
 
 
4
  sdk: gradio
5
  sdk_version: 5.29.1
 
 
6
  ---
 
 
main.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /usr/bin/env -S uv run
2
+ # /// script
3
+ # requires-python = ">=3.10"
4
+ # dependencies = ["yt-dlp", "whisper-cpp-pybind", "faster-whisper", "gradio"]
5
+ # ///
6
+
7
+ # dependencies = ["yt-dlp", "mlx-whisper", "sherpa-onnx"]
8
+ # dependencies = ["yt-dlp", "soundfile", "pywhispercpp", "mlx-whisper"]
9
+
10
+ from pathlib import Path
11
+
12
+ # import mlx_whisper
13
+ import yt_dlp
14
+ # from pywhispercpp.model import Model as whisper_cpp
15
+ from whispercpp import Whisper
16
+
17
+ def download(url):
18
+ ydl_opts = {
19
+ "format": "m4a/bestaudio/best",
20
+ "postprocessors": [
21
+ {
22
+ "key": "FFmpegExtractAudio",
23
+ "preferredcodec": "wav",
24
+ }
25
+ ],
26
+ "outtmpl": "%(id)s.%(ext)s",
27
+ "postprocessor_args": [
28
+ "-ar",
29
+ "16000",
30
+ "-ac",
31
+ "1",
32
+ ],
33
+ }
34
+
35
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
36
+ error_code = ydl.download([url])
37
+ if error_code != 0:
38
+ raise Exception("Download failed")
39
+
40
+ info = ydl.extract_info(url, download=False)
41
+ # print(info["id"])
42
+ return info["id"] + ".wav"
43
+
44
+
45
+ def asr_mlx(wav_file):
46
+ result = mlx_whisper.transcribe(
47
+ wav_file,
48
+ path_or_hf_repo="mlx-community/whisper-large-v3-turbo",
49
+ )
50
+
51
+ for seg in result["segments"]:
52
+ print(seg["text"])
53
+
54
+
55
+ def asr_cpp(wav_file):
56
+ from whisper_cpp import Whisper
57
+ whisper = Whisper("/home/ubuntu/code/whisper.cpp/models/ggml-large-v3-turbo.bin")
58
+ # print(wav_file)
59
+ whisper.transcribe(wav_file, diarize=True)
60
+ whisper.output(output_csv=True, output_jsn=True, output_lrc=True, output_srt=True, output_txt=True, output_vtt=True, log_score=True)
61
+
62
+ def asr_fast(wav_file):
63
+ from faster_whisper import WhisperModel
64
+ model = WhisperModel('large-v3-turbo', device="cpu", compute_type="float32")
65
+ # model = Whisper.from_pretrained(
66
+ # "ggml-large-v3-turbo.bin",
67
+ # basedir="/home/ubuntu/code/whisper.cpp/models/", # for dev
68
+ # )
69
+ segments, info = model.transcribe(
70
+ wav_file, language="zh", initial_prompt="以下是普通话的句子。"
71
+ )
72
+ # print(info)
73
+
74
+ # result = [s.text for s in segments if '请不吝点赞' not in s.text]
75
+ result = [s.text for s in segments]
76
+ print(result)
77
+ return '\n'.join(result)
78
+
79
+
80
+ import gradio as gr
81
+
82
+ def handleURL(url):
83
+ f = download(url)
84
+ return asr_fast(f)
85
+ # return "Hello, " + name + "!" * int(intensity)
86
+
87
+ # demo = gr.Interface( fn=handleURL, inputs=["text"], outputs=["text"])
88
+
89
+ with gr.Blocks() as demo:
90
+ gr.Markdown(
91
+ """
92
+ # fetch video from url, and transcibe to text
93
+ ---
94
+
95
+ running very poor server with performance, maybe 1.5x time cost with orignal video processing.
96
+ try yourself locally if your have macbook with silicon or high performance GPU.
97
+ <https://gist.github.com/yanyaoer/5cc7b0dd6729f306ad3cb740d501cabd#file-0-video2text-py>
98
+
99
+ """)
100
+ name = gr.Textbox(label="video url for transcibe")
101
+ output = gr.Textbox(label="Output")
102
+ greet_btn = gr.Button("submit")
103
+ greet_btn.click(fn=handleURL, inputs=name, outputs=output)
104
+
105
+
106
+ if __name__ == "__main__":
107
+ import sys
108
+
109
+ u = (
110
+ sys.argv[1] if len(sys.argv) > 1 else "https://www.bilibili.com/video/BV1ZMNrejEnH/"
111
+ )
112
+ # w = download(u)
113
+ # w = "/tmp/BV1ZMNrejEnH.wav"
114
+ # w = "/tmp/1746897004.wav"
115
+ # w = "BV1ZMNrejEnH.wav"
116
+ # w = "abc.wav"
117
+ # asr_fast(w)
118
+ # asr_mlx(w)
119
+ # print(r)
120
+ demo.launch(share=True)