File size: 12,429 Bytes
0666a2d f5dc719 0666a2d 3215c20 cff5a01 0117db0 cff5a01 0666a2d 0117db0 0666a2d f5dc719 cff5a01 17a7d8d cff5a01 0666a2d 856102f fcb5546 0666a2d 82057f3 cff5a01 0666a2d 178706a 0666a2d cff5a01 0666a2d 178706a 0666a2d 178706a 0666a2d cff5a01 178706a cff5a01 922901f 0666a2d 922901f cff5a01 82057f3 178706a 0666a2d cff5a01 178706a 0666a2d 178706a 0666a2d 178706a 0666a2d cff5a01 0666a2d 82057f3 cff5a01 0666a2d 82057f3 0666a2d 3215c20 0666a2d 82057f3 178706a cff5a01 82057f3 856102f cff5a01 82057f3 856102f 82057f3 856102f 82057f3 856102f 82057f3 856102f 82057f3 856102f 82057f3 856102f 82057f3 856102f 82057f3 856102f 82057f3 856102f 82057f3 856102f 82057f3 856102f 82057f3 856102f cff5a01 0666a2d 178706a 0666a2d 82057f3 0666a2d f5dc719 0666a2d 7323fd3 cff5a01 856102f cff5a01 0666a2d 856102f 0666a2d f5dc719 0666a2d 178706a 0666a2d 178706a 0666a2d 82057f3 0666a2d f5dc719 0666a2d 178706a 0666a2d 178706a 0666a2d 856102f 178706a cff5a01 0666a2d 856102f cff5a01 856102f cff5a01 856102f cff5a01 856102f 82057f3 856102f 82057f3 856102f 178706a 0666a2d 178706a 0666a2d 82057f3 f5dc719 0666a2d 178706a 0117db0 82057f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 |
from flask import Flask, request, jsonify, Response
from faster_whisper import WhisperModel
import torch
import time
import datetime
from threading import Semaphore
import os
from werkzeug.utils import secure_filename
import tempfile
from moviepy.editor import VideoFileClip
import logging
import torchaudio
import ffmpeg # ffmpeg-python
# ------------------------------------
# 日誌
# ------------------------------------
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
app = Flask(__name__)
# ------------------------------------
# 設定
# ------------------------------------
MAX_CONCURRENT_REQUESTS = 1
MAX_FILE_DURATION = 60 * 30 # 30 分鐘
TEMPORARY_FOLDER = tempfile.gettempdir()
ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'opus', 'aiff'}
ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)
API_KEY = os.environ.get("API_KEY") # 在 HF Space 的 Repo secrets 設定
MODEL_NAME = os.environ.get("WHISPER_MODEL", "guillaumekln/faster-whisper-large-v2")
# 預設提示(可用 ?prompt 覆蓋)
DEFAULT_INITIAL_PROMPT = "請使用繁體中文輸出"
# ------------------------------------
# 裝置與模型
# ------------------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"
logging.info(f"使用設備: {device},計算類型: {compute_type}")
beamsize = 2
try:
wmodel = WhisperModel(
MODEL_NAME,
device=device,
compute_type=compute_type,
download_root="./model_cache"
)
logging.info(f"模型 {MODEL_NAME} 載入成功.")
except Exception as e:
logging.error(f"載入模型 {MODEL_NAME} 失敗: {e}")
wmodel = None
# ------------------------------------
# 併發控制
# ------------------------------------
request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
active_requests = 0
# ------------------------------------
# 小工具
# ------------------------------------
def validate_api_key(req):
api_key = req.headers.get('X-API-Key')
return api_key == API_KEY if API_KEY else True # 若沒設定 API_KEY,預設放行(可依需求改)
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def cleanup_temp_files(*file_paths):
for file_path in file_paths:
try:
if file_path and os.path.exists(file_path):
os.remove(file_path)
logging.info(f"刪除暫存檔案: {file_path}")
except Exception as e:
logging.error(f"刪除暫存檔案 {file_path} 出錯: {str(e)}")
def extract_audio_from_video(video_path, output_audio_path):
"""
使用 ffmpeg 從影片擷取 PCM WAV,並用 moviepy 檢查長度
"""
try:
# 先擷取音訊
ffmpeg.input(video_path).output(
output_audio_path,
acodec='pcm_s16le'
# 可加參數: ar=44100, ac=2
).run(capture_stdout=True, capture_stderr=True)
# 再檢查影片時長
video = VideoFileClip(video_path)
if video.duration > MAX_FILE_DURATION:
video.close()
raise ValueError(f"視頻時長超過 {MAX_FILE_DURATION} 秒")
video.close()
return output_audio_path
except Exception as e:
logging.exception("提取視頻中的音訊出錯")
raise Exception(f"提取視頻中的音訊出錯: {str(e)}")
def fmt_mmss_mmm(seconds: float) -> str:
"""
轉成 MM:SS.mmm(符合需求,如 00:01.000)
若未來需要小時欄位,可改為 HH:MM:SS.mmm。
"""
if seconds is None:
seconds = 0.0
total_ms = int(round(seconds * 1000))
minutes, ms = divmod(total_ms, 60_000)
sec, ms = divmod(ms, 1000)
return f"{minutes:02d}:{sec:02d}.{ms:03d}"
def read_lang_param_with_default_zh():
"""
讀取 ?lang= 參數;沒帶或為 auto 時預設繁體中文 (zh)
"""
lang_param = request.args.get("lang", "").strip()
if not lang_param or lang_param.lower() == "auto":
return "zh"
return lang_param
def read_initial_prompt():
"""
讀取 ?prompt= 參數;沒帶則使用 DEFAULT_INITIAL_PROMPT
"""
prompt = request.args.get("prompt", "").strip()
return prompt if prompt else DEFAULT_INITIAL_PROMPT
def run_transcribe_pipeline(uploaded_file_path: str, file_extension: str):
"""
共用的轉錄流程:處理影片/音訊、長度檢查、呼叫 Faster-Whisper。
回傳:(segments_iterable, is_video, temp_audio_path)
"""
is_video = file_extension in ALLOWED_VIDEO_EXTENSIONS
temp_audio_path = None
if is_video:
temp_audio_path = os.path.join(TEMPORARY_FOLDER, f"temp_audio_{int(time.time())}.wav")
extract_audio_from_video(uploaded_file_path, temp_audio_path)
transcription_file = temp_audio_path
else:
transcription_file = uploaded_file_path
# 檢查音訊長度
try:
waveform, sample_rate = torchaudio.load(transcription_file, format=file_extension)
duration = waveform.size(1) / sample_rate
if duration > MAX_FILE_DURATION:
raise ValueError(f"音訊時長超過 {MAX_FILE_DURATION} 秒")
except Exception:
logging.exception(f"使用 torchaudio.load 載入音訊檔出錯: {transcription_file}")
try:
torchaudio.set_audio_backend("soundfile")
waveform, sample_rate = torchaudio.load(transcription_file)
duration = waveform.size(1) / sample_rate
if duration > MAX_FILE_DURATION:
raise ValueError(f"音訊時長超過 {MAX_FILE_DURATION} 秒")
except Exception as soundfile_err:
logging.exception(f"使用 soundfile 後端載入音訊檔出錯: {transcription_file}")
raise Exception(f'使用兩個後端載入音訊檔都出錯: {str(soundfile_err)}')
finally:
torchaudio.set_audio_backend("default")
# 預設語言 zh,並帶 initial_prompt(可被 ?lang / ?prompt 覆蓋)
language = read_lang_param_with_default_zh()
initial_prompt = read_initial_prompt()
# 轉錄(保留 segment 級時間)
segments, info = wmodel.transcribe(
transcription_file,
beam_size=beamsize,
vad_filter=True,
without_timestamps=False, # 要保留時間戳
compression_ratio_threshold=2.4,
word_timestamps=False, # 如需字級,設 True
language=language,
initial_prompt=initial_prompt
)
return segments, is_video, temp_audio_path
# ------------------------------------
# 健康檢查與狀態
# ------------------------------------
@app.route("/health", methods=["GET"])
def health_check():
return jsonify({
'status': 'API 正在運行',
'timestamp': datetime.datetime.now().isoformat(),
'device': device,
'compute_type': compute_type,
'active_requests': active_requests,
'max_duration_supported': MAX_FILE_DURATION,
'supported_formats': list(ALLOWED_EXTENSIONS),
'model': MODEL_NAME,
'default_language': 'zh',
'default_initial_prompt': DEFAULT_INITIAL_PROMPT
})
@app.route("/status/busy", methods=["GET"])
def server_busy():
is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
return jsonify({
'is_busy': is_busy,
'active_requests': active_requests,
'max_capacity': MAX_CONCURRENT_REQUESTS
})
# ------------------------------------
# 端點 1:JSON(start/end 為 "MM:SS.mmm" 字串)
# ------------------------------------
@app.route("/whisper_transcribe", methods=["POST"])
def transcribe_json():
global active_requests
if not validate_api_key(request):
return jsonify({'error': '無效的 API 金鑰'}), 401
if not request_semaphore.acquire(blocking=False):
return jsonify({'error': '伺服器繁忙'}), 503
active_requests += 1
t0 = time.time()
temp_file_path = None
temp_audio_path = None
try:
if wmodel is None:
return jsonify({'error': '模型載入失敗。請檢查伺服器日誌。'}), 500
if 'file' not in request.files:
return jsonify({'error': '未提供檔'}), 400
file = request.files['file']
if not (file and allowed_file(file.filename)):
return jsonify({'error': f'無效的檔案格式。支持:{", ".join(ALLOWED_EXTENSIONS)}'}), 400
# 儲存上傳檔
temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
file.save(temp_file_path)
file_extension = file.filename.rsplit('.', 1)[1].lower()
# 執行轉錄流程
try:
segments_iter, is_video, temp_audio_path = run_transcribe_pipeline(temp_file_path, file_extension)
except Exception as e:
return jsonify({'error': str(e)}), 400
# 組 JSON:start/end 以 "MM:SS.mmm"
results = []
for seg in segments_iter:
start = seg.start or 0.0
end = seg.end or 0.0
text = (seg.text or "").strip()
results.append({
"start": fmt_mmss_mmm(start),
"end": fmt_mmss_mmm(end),
"text": text
})
return jsonify({
'file_type': 'video' if is_video else 'audio',
'segments': results
}), 200
except Exception as e:
logging.exception("轉錄過程中發生異常")
return jsonify({'error': str(e)}), 500
finally:
cleanup_temp_files(temp_file_path, temp_audio_path)
active_requests -= 1
request_semaphore.release()
logging.info(f"/whisper_transcribe 用時:{time.time() - t0:.2f}s (活動請求:{active_requests})")
# ------------------------------------
# 端點 2:純文字(整段合併,沒有時間戳)
# ------------------------------------
@app.route("/whisper_transcribe_text", methods=["POST"])
def transcribe_text_only():
global active_requests
if not validate_api_key(request):
return jsonify({'error': '無效的 API 金鑰'}), 401
if not request_semaphore.acquire(blocking=False):
return jsonify({'error': '伺服器繁忙'}), 503
active_requests += 1
t0 = time.time()
temp_file_path = None
temp_audio_path = None
try:
if wmodel is None:
return jsonify({'error': '模型載入失敗。請檢查伺服器日誌。'}), 500
if 'file' not in request.files:
return jsonify({'error': '未提供檔'}), 400
file = request.files['file']
if not (file and allowed_file(file.filename)):
return jsonify({'error': f'無效的檔案格式。支持:{", ".join(ALLOWED_EXTENSIONS)}'}), 400
# 儲存上傳檔
temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
file.save(temp_file_path)
file_extension = file.filename.rsplit('.', 1)[1].lower()
# 執行轉錄流程(沿用同一流程,僅輸出不同)
try:
segments_iter, is_video, temp_audio_path = run_transcribe_pipeline(temp_file_path, file_extension)
except Exception as e:
return jsonify({'error': str(e)}), 400
# 合併純文字
full_text = " ".join((seg.text or "").strip() for seg in segments_iter if (seg.text or "").strip())
# 直接回「純文字」
return Response(full_text, mimetype="text/plain; charset=utf-8"), 200
except Exception as e:
logging.exception("轉錄過程中發生異常")
return jsonify({'error': str(e)}), 500
finally:
cleanup_temp_files(temp_file_path, temp_audio_path)
active_requests -= 1
request_semaphore.release()
logging.info(f"/whisper_transcribe_text 用時:{time.time() - t0:.2f}s (活動請求:{active_requests})")
if __name__ == "__main__":
if not os.path.exists(TEMPORARY_FOLDER):
os.makedirs(TEMPORARY_FOLDER)
logging.info(f"新建暫存檔案夾: {TEMPORARY_FOLDER}")
app.run(host="0.0.0.0", port=7860, threaded=True) |