Spaces:
Sleeping
Sleeping
Upload Whisper_音檔辨識.py
Browse files- Whisper_音檔辨識.py +59 -0
Whisper_音檔辨識.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import whisper
|
3 |
+
import tempfile
|
4 |
+
import librosa
|
5 |
+
import soundfile as sf
|
6 |
+
import os
|
7 |
+
|
8 |
+
# 檢查是否有可用的 GPU,否則使用 CPU
|
9 |
+
device = "cuda" if whisper.torch.cuda.is_available() else "cpu"
|
10 |
+
|
11 |
+
# 載入 Whisper 模型並指定設備(GPU 或 CPU)
|
12 |
+
model = whisper.load_model("small", device=device)
|
13 |
+
|
14 |
+
def transcribe_audio_and_save(audio):
|
15 |
+
# 根據上傳的音效檔路徑獲取檔名
|
16 |
+
input_filename = os.path.splitext(os.path.basename(audio))[0]
|
17 |
+
|
18 |
+
# 將音效檔轉換為 WAV 格式並處理
|
19 |
+
with tempfile.NamedTemporaryFile(suffix=".wav") as audio_file:
|
20 |
+
# 使用 librosa 讀取音效檔,支援多種音效格式
|
21 |
+
audio_data, sample_rate = librosa.load(audio, sr=16000)
|
22 |
+
sf.write(audio_file.name, audio_data, sample_rate)
|
23 |
+
|
24 |
+
# 使用 Whisper 辨識音訊並啟用時間戳功能
|
25 |
+
result = model.transcribe(audio_file.name, task='transcribe', verbose=True)
|
26 |
+
original_text = result['text'] # 直接的 Whisper 輸出
|
27 |
+
segments = result['segments'] # 使用段落資訊
|
28 |
+
|
29 |
+
# 將辨識結果每段之間加入空格
|
30 |
+
text_with_spaces = ""
|
31 |
+
for segment in segments:
|
32 |
+
text_with_spaces += segment['text'] + " " # 在每句之間加入空格
|
33 |
+
|
34 |
+
# 儲存辨識結果到文字檔,檔名根據音效檔檔名生成
|
35 |
+
output_filename = f"{input_filename}_transcription.txt"
|
36 |
+
output_path = os.path.join(tempfile.gettempdir(), output_filename)
|
37 |
+
with open(output_path, "w") as f:
|
38 |
+
f.write("Original transcription:\n")
|
39 |
+
f.write(original_text + "\n\n")
|
40 |
+
f.write("Transcription with spaces:\n")
|
41 |
+
f.write(text_with_spaces.strip() + "\n")
|
42 |
+
|
43 |
+
return original_text, text_with_spaces.strip(), output_path
|
44 |
+
|
45 |
+
# Gradio 介面設置,兩個文字框和一個儲存文檔的按鈕
|
46 |
+
iface = gr.Interface(
|
47 |
+
fn=transcribe_audio_and_save,
|
48 |
+
inputs=gr.Audio(type="filepath", label="上傳音效檔"), # 上傳音效檔
|
49 |
+
outputs=[
|
50 |
+
gr.Textbox(label="原始辨識文字"), # 第一個框顯示原始辨識
|
51 |
+
gr.Textbox(label="加入空格後的文字"), # 第二個框顯示加入空格的辨識
|
52 |
+
gr.File(label="下載辨識結果文檔") # 提供下載結果的文檔
|
53 |
+
],
|
54 |
+
title="Whisper 音效辨識與儲存",
|
55 |
+
description="上傳音效檔,Whisper 會自動辨識語音,並分別顯示原始輸出和加入空格的輸出,還可以下載結果文檔。"
|
56 |
+
)
|
57 |
+
|
58 |
+
# 啟動介面
|
59 |
+
iface.launch(server_port=7862)
|