DavidLanz commited on
Commit
82057f3
·
verified ·
1 Parent(s): 856102f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -27
app.py CHANGED
@@ -32,6 +32,9 @@ ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)
32
  API_KEY = os.environ.get("API_KEY") # 在 HF Space 的 Repo secrets 設定
33
  MODEL_NAME = os.environ.get("WHISPER_MODEL", "guillaumekln/faster-whisper-large-v2")
34
 
 
 
 
35
  # ------------------------------------
36
  # 裝置與模型
37
  # ------------------------------------
@@ -64,7 +67,7 @@ active_requests = 0
64
  # ------------------------------------
65
  def validate_api_key(req):
66
  api_key = req.headers.get('X-API-Key')
67
- return api_key == API_KEY if API_KEY else False
68
 
69
  def allowed_file(filename):
70
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@@ -83,7 +86,7 @@ def extract_audio_from_video(video_path, output_audio_path):
83
  使用 ffmpeg 從影片擷取 PCM WAV,並用 moviepy 檢查長度
84
  """
85
  try:
86
- # 先擷取聲音
87
  ffmpeg.input(video_path).output(
88
  output_audio_path,
89
  acodec='pcm_s16le'
@@ -94,17 +97,17 @@ def extract_audio_from_video(video_path, output_audio_path):
94
  video = VideoFileClip(video_path)
95
  if video.duration > MAX_FILE_DURATION:
96
  video.close()
97
- raise ValueError(f"影音時長超過 {MAX_FILE_DURATION} 秒")
98
  video.close()
99
 
100
  return output_audio_path
101
  except Exception as e:
102
- logging.exception("提取影音中的聲音出錯")
103
- raise Exception(f"提取影音中的聲音出錯: {str(e)}")
104
 
105
  def fmt_mmss_mmm(seconds: float) -> str:
106
  """
107
- 轉成 MM:SS.mmm(符合你給的 JSON 範例,如 00:01.000)
108
  若未來需要小時欄位,可改為 HH:MM:SS.mmm。
109
  """
110
  if seconds is None:
@@ -114,17 +117,26 @@ def fmt_mmss_mmm(seconds: float) -> str:
114
  sec, ms = divmod(ms, 1000)
115
  return f"{minutes:02d}:{sec:02d}.{ms:03d}"
116
 
117
- def read_lang_param():
118
  """
119
- 讀取 ?lang= 參數;'auto' 或空字串時回傳 None(自動偵測)
120
  """
121
  lang_param = request.args.get("lang", "").strip()
122
- return None if (not lang_param or lang_param.lower() == "auto") else lang_param
 
 
 
 
 
 
 
 
 
123
 
124
  def run_transcribe_pipeline(uploaded_file_path: str, file_extension: str):
125
  """
126
- 共用的轉錄流程:處理影片/聲音、長度檢查、呼叫 Faster-Whisper。
127
- 回傳:(segments_iterable, is_video)
128
  """
129
  is_video = file_extension in ALLOWED_VIDEO_EXTENSIONS
130
  temp_audio_path = None
@@ -135,36 +147,40 @@ def run_transcribe_pipeline(uploaded_file_path: str, file_extension: str):
135
  transcription_file = temp_audio_path
136
  else:
137
  transcription_file = uploaded_file_path
138
- # 檢查聲音長度
139
  try:
140
  waveform, sample_rate = torchaudio.load(transcription_file, format=file_extension)
141
  duration = waveform.size(1) / sample_rate
142
  if duration > MAX_FILE_DURATION:
143
- raise ValueError(f"聲音時長超過 {MAX_FILE_DURATION} 秒")
144
  except Exception:
145
- logging.exception(f"使用 torchaudio.load 載入聲音檔出錯: {transcription_file}")
146
  try:
147
  torchaudio.set_audio_backend("soundfile")
148
  waveform, sample_rate = torchaudio.load(transcription_file)
149
  duration = waveform.size(1) / sample_rate
150
  if duration > MAX_FILE_DURATION:
151
- raise ValueError(f"聲音時長超過 {MAX_FILE_DURATION} 秒")
152
  except Exception as soundfile_err:
153
- logging.exception(f"使用 soundfile 後端載入聲音檔出錯: {transcription_file}")
154
- raise Exception(f'使用兩個後端載入聲音檔都出錯: {str(soundfile_err)}')
155
  finally:
156
  torchaudio.set_audio_backend("default")
157
 
 
 
 
 
158
  # 轉錄(保留 segment 級時間)
159
- language = read_lang_param()
160
  segments, info = wmodel.transcribe(
161
  transcription_file,
162
  beam_size=beamsize,
163
  vad_filter=True,
164
- without_timestamps=False, # 要保留時間戳
165
  compression_ratio_threshold=2.4,
166
- word_timestamps=False, # 如需字級,設 True
167
- language=language
 
168
  )
169
 
170
  return segments, is_video, temp_audio_path
@@ -182,7 +198,9 @@ def health_check():
182
  'active_requests': active_requests,
183
  'max_duration_supported': MAX_FILE_DURATION,
184
  'supported_formats': list(ALLOWED_EXTENSIONS),
185
- 'model': MODEL_NAME
 
 
186
  })
187
 
188
  @app.route("/status/busy", methods=["GET"])
@@ -208,7 +226,7 @@ def transcribe_json():
208
  return jsonify({'error': '伺服器繁忙'}), 503
209
 
210
  active_requests += 1
211
- start_time = time.time()
212
  temp_file_path = None
213
  temp_audio_path = None
214
 
@@ -260,7 +278,7 @@ def transcribe_json():
260
  cleanup_temp_files(temp_file_path, temp_audio_path)
261
  active_requests -= 1
262
  request_semaphore.release()
263
- logging.info(f"處理時間:{time.time() - start_time:.2f}s (活動請求:{active_requests})")
264
 
265
  # ------------------------------------
266
  # 端點 2:純文字(整段合併,沒有時間戳)
@@ -276,7 +294,7 @@ def transcribe_text_only():
276
  return jsonify({'error': '伺服器繁忙'}), 503
277
 
278
  active_requests += 1
279
- start_time = time.time()
280
  temp_file_path = None
281
  temp_audio_path = None
282
 
@@ -317,7 +335,7 @@ def transcribe_text_only():
317
  cleanup_temp_files(temp_file_path, temp_audio_path)
318
  active_requests -= 1
319
  request_semaphore.release()
320
- logging.info(f"處理時間:{time.time() - start_time:.2f}s (活動請求:{active_requests})")
321
 
322
 
323
  if __name__ == "__main__":
@@ -325,4 +343,4 @@ if __name__ == "__main__":
325
  os.makedirs(TEMPORARY_FOLDER)
326
  logging.info(f"新建暫存檔案夾: {TEMPORARY_FOLDER}")
327
 
328
- app.run(host="0.0.0.0", port=7860, threaded=True)
 
32
  API_KEY = os.environ.get("API_KEY") # 在 HF Space 的 Repo secrets 設定
33
  MODEL_NAME = os.environ.get("WHISPER_MODEL", "guillaumekln/faster-whisper-large-v2")
34
 
35
+ # 預設提示(可用 ?prompt 覆蓋)
36
+ DEFAULT_INITIAL_PROMPT = "請使用繁體中文輸出"
37
+
38
  # ------------------------------------
39
  # 裝置與模型
40
  # ------------------------------------
 
67
  # ------------------------------------
68
  def validate_api_key(req):
69
  api_key = req.headers.get('X-API-Key')
70
+ return api_key == API_KEY if API_KEY else True # 若沒設定 API_KEY,預設放行(可依需求改)
71
 
72
  def allowed_file(filename):
73
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
 
86
  使用 ffmpeg 從影片擷取 PCM WAV,並用 moviepy 檢查長度
87
  """
88
  try:
89
+ # 先擷取音訊
90
  ffmpeg.input(video_path).output(
91
  output_audio_path,
92
  acodec='pcm_s16le'
 
97
  video = VideoFileClip(video_path)
98
  if video.duration > MAX_FILE_DURATION:
99
  video.close()
100
+ raise ValueError(f"視頻時長超過 {MAX_FILE_DURATION} 秒")
101
  video.close()
102
 
103
  return output_audio_path
104
  except Exception as e:
105
+ logging.exception("提取視頻中的音訊出錯")
106
+ raise Exception(f"提取視頻中的音訊出錯: {str(e)}")
107
 
108
  def fmt_mmss_mmm(seconds: float) -> str:
109
  """
110
+ 轉成 MM:SS.mmm(符合需求,如 00:01.000)
111
  若未來需要小時欄位,可改為 HH:MM:SS.mmm。
112
  """
113
  if seconds is None:
 
117
  sec, ms = divmod(ms, 1000)
118
  return f"{minutes:02d}:{sec:02d}.{ms:03d}"
119
 
120
+ def read_lang_param_with_default_zh():
121
  """
122
+ 讀取 ?lang= 參數;沒帶或為 auto 時預設繁體中文 (zh)
123
  """
124
  lang_param = request.args.get("lang", "").strip()
125
+ if not lang_param or lang_param.lower() == "auto":
126
+ return "zh"
127
+ return lang_param
128
+
129
+ def read_initial_prompt():
130
+ """
131
+ 讀取 ?prompt= 參數;沒帶則使用 DEFAULT_INITIAL_PROMPT
132
+ """
133
+ prompt = request.args.get("prompt", "").strip()
134
+ return prompt if prompt else DEFAULT_INITIAL_PROMPT
135
 
136
  def run_transcribe_pipeline(uploaded_file_path: str, file_extension: str):
137
  """
138
+ 共用的轉錄流程:處理影片/音訊、長度檢查、呼叫 Faster-Whisper。
139
+ 回傳:(segments_iterable, is_video, temp_audio_path)
140
  """
141
  is_video = file_extension in ALLOWED_VIDEO_EXTENSIONS
142
  temp_audio_path = None
 
147
  transcription_file = temp_audio_path
148
  else:
149
  transcription_file = uploaded_file_path
150
+ # 檢查音訊長度
151
  try:
152
  waveform, sample_rate = torchaudio.load(transcription_file, format=file_extension)
153
  duration = waveform.size(1) / sample_rate
154
  if duration > MAX_FILE_DURATION:
155
+ raise ValueError(f"音訊時長超過 {MAX_FILE_DURATION} 秒")
156
  except Exception:
157
+ logging.exception(f"使用 torchaudio.load 載入音訊檔出錯: {transcription_file}")
158
  try:
159
  torchaudio.set_audio_backend("soundfile")
160
  waveform, sample_rate = torchaudio.load(transcription_file)
161
  duration = waveform.size(1) / sample_rate
162
  if duration > MAX_FILE_DURATION:
163
+ raise ValueError(f"音訊時長超過 {MAX_FILE_DURATION} 秒")
164
  except Exception as soundfile_err:
165
+ logging.exception(f"使用 soundfile 後端載入音訊檔出錯: {transcription_file}")
166
+ raise Exception(f'使用兩個後端載入音訊檔都出錯: {str(soundfile_err)}')
167
  finally:
168
  torchaudio.set_audio_backend("default")
169
 
170
+ # 預設語言 zh,並帶 initial_prompt(可被 ?lang / ?prompt 覆蓋)
171
+ language = read_lang_param_with_default_zh()
172
+ initial_prompt = read_initial_prompt()
173
+
174
  # 轉錄(保留 segment 級時間)
 
175
  segments, info = wmodel.transcribe(
176
  transcription_file,
177
  beam_size=beamsize,
178
  vad_filter=True,
179
+ without_timestamps=False, # 要保留時間戳
180
  compression_ratio_threshold=2.4,
181
+ word_timestamps=False, # 如需字級,設 True
182
+ language=language,
183
+ initial_prompt=initial_prompt
184
  )
185
 
186
  return segments, is_video, temp_audio_path
 
198
  'active_requests': active_requests,
199
  'max_duration_supported': MAX_FILE_DURATION,
200
  'supported_formats': list(ALLOWED_EXTENSIONS),
201
+ 'model': MODEL_NAME,
202
+ 'default_language': 'zh',
203
+ 'default_initial_prompt': DEFAULT_INITIAL_PROMPT
204
  })
205
 
206
  @app.route("/status/busy", methods=["GET"])
 
226
  return jsonify({'error': '伺服器繁忙'}), 503
227
 
228
  active_requests += 1
229
+ t0 = time.time()
230
  temp_file_path = None
231
  temp_audio_path = None
232
 
 
278
  cleanup_temp_files(temp_file_path, temp_audio_path)
279
  active_requests -= 1
280
  request_semaphore.release()
281
+ logging.info(f"/whisper_transcribe 用時:{time.time() - t0:.2f}s (活動請求:{active_requests})")
282
 
283
  # ------------------------------------
284
  # 端點 2:純文字(整段合併,沒有時間戳)
 
294
  return jsonify({'error': '伺服器繁忙'}), 503
295
 
296
  active_requests += 1
297
+ t0 = time.time()
298
  temp_file_path = None
299
  temp_audio_path = None
300
 
 
335
  cleanup_temp_files(temp_file_path, temp_audio_path)
336
  active_requests -= 1
337
  request_semaphore.release()
338
+ logging.info(f"/whisper_transcribe_text 用時:{time.time() - t0:.2f}s (活動請求:{active_requests})")
339
 
340
 
341
  if __name__ == "__main__":
 
343
  os.makedirs(TEMPORARY_FOLDER)
344
  logging.info(f"新建暫存檔案夾: {TEMPORARY_FOLDER}")
345
 
346
+ app.run(host="0.0.0.0", port=7860, threaded=True)