Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -216,7 +216,7 @@ def check_file(audio_file_path):
|
|
| 216 |
return audio_file_path, None
|
| 217 |
|
| 218 |
|
| 219 |
-
def transcribe_audio(audio_file_path, prompt, language, auto_detect_language):
|
| 220 |
# Check and process the file first
|
| 221 |
processed_path, error_message = check_file(audio_file_path)
|
| 222 |
|
|
@@ -227,7 +227,7 @@ def transcribe_audio(audio_file_path, prompt, language, auto_detect_language):
|
|
| 227 |
with open(processed_path, "rb") as file:
|
| 228 |
transcription = client.audio.transcriptions.create(
|
| 229 |
file=(os.path.basename(processed_path), file.read()),
|
| 230 |
-
model=
|
| 231 |
prompt=prompt,
|
| 232 |
response_format="json",
|
| 233 |
language=None if auto_detect_language else language,
|
|
@@ -236,7 +236,7 @@ def transcribe_audio(audio_file_path, prompt, language, auto_detect_language):
|
|
| 236 |
return transcription.text
|
| 237 |
|
| 238 |
|
| 239 |
-
def translate_audio(audio_file_path, prompt):
|
| 240 |
# Check and process the file first
|
| 241 |
processed_path, error_message = check_file(audio_file_path)
|
| 242 |
|
|
@@ -247,7 +247,7 @@ def translate_audio(audio_file_path, prompt):
|
|
| 247 |
with open(processed_path, "rb") as file:
|
| 248 |
translation = client.audio.translations.create(
|
| 249 |
file=(os.path.basename(processed_path), file.read()),
|
| 250 |
-
model=
|
| 251 |
prompt=prompt,
|
| 252 |
response_format="json",
|
| 253 |
temperature=0.0,
|
|
@@ -274,7 +274,7 @@ def create_srt_from_text(transcription_text):
|
|
| 274 |
return "".join(srt_lines)
|
| 275 |
|
| 276 |
|
| 277 |
-
def generate_subtitles(audio_file_path, prompt, language, auto_detect_language):
|
| 278 |
"""Converts Whisper JSON transcription to SRT format."""
|
| 279 |
# Check and process the file first
|
| 280 |
processed_path, error_message = check_file(audio_file_path)
|
|
@@ -286,7 +286,7 @@ def generate_subtitles(audio_file_path, prompt, language, auto_detect_language):
|
|
| 286 |
with open(processed_path, "rb") as file:
|
| 287 |
transcription_json = client.audio.transcriptions.create(
|
| 288 |
file=(os.path.basename(processed_path), file.read()),
|
| 289 |
-
model=
|
| 290 |
prompt=prompt,
|
| 291 |
response_format="json",
|
| 292 |
language=None if auto_detect_language else language, # Conditional language parameter
|
|
@@ -305,10 +305,36 @@ def generate_subtitles(audio_file_path, prompt, language, auto_detect_language):
|
|
| 305 |
temp_srt_file.write(srt_content)
|
| 306 |
|
| 307 |
# Generate subtitles and add to video if MP4
|
| 308 |
-
if audio_file_path.lower().endswith(".mp4"):
|
| 309 |
try:
|
| 310 |
-
#
|
| 311 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
except subprocess.CalledProcessError as e:
|
| 313 |
return None, None, f"Error during subtitle addition: {e}", temp_srt_path # Return the temp_srt_path
|
| 314 |
|
|
@@ -386,6 +412,11 @@ with gr.Blocks() as demo:
|
|
| 386 |
audio_input = gr.File(
|
| 387 |
type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
|
| 388 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
with gr.Row():
|
| 390 |
transcribe_prompt = gr.Textbox(
|
| 391 |
label="Prompt (Optional)",
|
|
@@ -402,7 +433,7 @@ with gr.Blocks() as demo:
|
|
| 402 |
transcription_output = gr.Textbox(label="Transcription")
|
| 403 |
transcribe_button.click(
|
| 404 |
transcribe_audio,
|
| 405 |
-
inputs=[audio_input, transcribe_prompt, language, auto_detect_language],
|
| 406 |
outputs=transcription_output,
|
| 407 |
)
|
| 408 |
with gr.TabItem("Translation"):
|
|
@@ -411,6 +442,11 @@ with gr.Blocks() as demo:
|
|
| 411 |
audio_input_translate = gr.File(
|
| 412 |
type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
|
| 413 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
with gr.Row():
|
| 415 |
translate_prompt = gr.Textbox(
|
| 416 |
label="Prompt (Optional)",
|
|
@@ -420,7 +456,7 @@ with gr.Blocks() as demo:
|
|
| 420 |
translation_output = gr.Textbox(label="Translation")
|
| 421 |
translate_button.click(
|
| 422 |
translate_audio,
|
| 423 |
-
inputs=[audio_input_translate, translate_prompt],
|
| 424 |
outputs=translation_output,
|
| 425 |
)
|
| 426 |
with gr.TabItem("Subtitle Maker"):
|
|
@@ -429,6 +465,11 @@ with gr.Blocks() as demo:
|
|
| 429 |
label="Upload Audio/Video",
|
| 430 |
file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS],
|
| 431 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
transcribe_prompt_subtitles = gr.Textbox(
|
| 433 |
label="Prompt (Optional)",
|
| 434 |
info="Specify any context or spelling corrections.",
|
|
@@ -452,6 +493,7 @@ with gr.Blocks() as demo:
|
|
| 452 |
transcribe_prompt_subtitles,
|
| 453 |
language_subtitles,
|
| 454 |
auto_detect_language_subtitles,
|
|
|
|
| 455 |
],
|
| 456 |
outputs=[srt_output, video_output, gr.Textbox(label="Error")],
|
| 457 |
)
|
|
|
|
| 216 |
return audio_file_path, None
|
| 217 |
|
| 218 |
|
| 219 |
+
def transcribe_audio(audio_file_path, prompt, language, auto_detect_language, model):
|
| 220 |
# Check and process the file first
|
| 221 |
processed_path, error_message = check_file(audio_file_path)
|
| 222 |
|
|
|
|
| 227 |
with open(processed_path, "rb") as file:
|
| 228 |
transcription = client.audio.transcriptions.create(
|
| 229 |
file=(os.path.basename(processed_path), file.read()),
|
| 230 |
+
model=model,
|
| 231 |
prompt=prompt,
|
| 232 |
response_format="json",
|
| 233 |
language=None if auto_detect_language else language,
|
|
|
|
| 236 |
return transcription.text
|
| 237 |
|
| 238 |
|
| 239 |
+
def translate_audio(audio_file_path, prompt, model):
|
| 240 |
# Check and process the file first
|
| 241 |
processed_path, error_message = check_file(audio_file_path)
|
| 242 |
|
|
|
|
| 247 |
with open(processed_path, "rb") as file:
|
| 248 |
translation = client.audio.translations.create(
|
| 249 |
file=(os.path.basename(processed_path), file.read()),
|
| 250 |
+
model=model,
|
| 251 |
prompt=prompt,
|
| 252 |
response_format="json",
|
| 253 |
temperature=0.0,
|
|
|
|
| 274 |
return "".join(srt_lines)
|
| 275 |
|
| 276 |
|
| 277 |
+
def generate_subtitles(audio_file_path, prompt, language, auto_detect_language, model):
|
| 278 |
"""Converts Whisper JSON transcription to SRT format."""
|
| 279 |
# Check and process the file first
|
| 280 |
processed_path, error_message = check_file(audio_file_path)
|
|
|
|
| 286 |
with open(processed_path, "rb") as file:
|
| 287 |
transcription_json = client.audio.transcriptions.create(
|
| 288 |
file=(os.path.basename(processed_path), file.read()),
|
| 289 |
+
model=model,
|
| 290 |
prompt=prompt,
|
| 291 |
response_format="json",
|
| 292 |
language=None if auto_detect_language else language, # Conditional language parameter
|
|
|
|
| 305 |
temp_srt_file.write(srt_content)
|
| 306 |
|
| 307 |
# Generate subtitles and add to video if MP4
|
| 308 |
+
if audio_file_path.lower().endswith((".mp4", ".webm")):
|
| 309 |
try:
|
| 310 |
+
# Use ffmpeg to add subtitles to the video
|
| 311 |
+
output_file_path = audio_file_path.replace(os.path.splitext(audio_file_path)[1], "_with_subs" + os.path.splitext(audio_file_path)[1])
|
| 312 |
+
subprocess.run(
|
| 313 |
+
[
|
| 314 |
+
"ffmpeg",
|
| 315 |
+
"-i",
|
| 316 |
+
audio_file_path,
|
| 317 |
+
"-i",
|
| 318 |
+
temp_srt_path,
|
| 319 |
+
"-map",
|
| 320 |
+
"0:v",
|
| 321 |
+
"-map",
|
| 322 |
+
"0:a",
|
| 323 |
+
"-map",
|
| 324 |
+
"1",
|
| 325 |
+
"-c:v",
|
| 326 |
+
"copy",
|
| 327 |
+
"-c:a",
|
| 328 |
+
"copy",
|
| 329 |
+
"-c:s",
|
| 330 |
+
"mov_text",
|
| 331 |
+
"-metadata:s:s:0",
|
| 332 |
+
"language=eng",
|
| 333 |
+
output_file_path,
|
| 334 |
+
],
|
| 335 |
+
check=True,
|
| 336 |
+
)
|
| 337 |
+
return None, output_file_path, None, temp_srt_path # Return the temp_srt_path
|
| 338 |
except subprocess.CalledProcessError as e:
|
| 339 |
return None, None, f"Error during subtitle addition: {e}", temp_srt_path # Return the temp_srt_path
|
| 340 |
|
|
|
|
| 412 |
audio_input = gr.File(
|
| 413 |
type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
|
| 414 |
)
|
| 415 |
+
model_choice_transcribe = gr.Dropdown(
|
| 416 |
+
choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
|
| 417 |
+
value="whisper-large-v3",
|
| 418 |
+
label="Model",
|
| 419 |
+
)
|
| 420 |
with gr.Row():
|
| 421 |
transcribe_prompt = gr.Textbox(
|
| 422 |
label="Prompt (Optional)",
|
|
|
|
| 433 |
transcription_output = gr.Textbox(label="Transcription")
|
| 434 |
transcribe_button.click(
|
| 435 |
transcribe_audio,
|
| 436 |
+
inputs=[audio_input, transcribe_prompt, language, auto_detect_language, model_choice_transcribe],
|
| 437 |
outputs=transcription_output,
|
| 438 |
)
|
| 439 |
with gr.TabItem("Translation"):
|
|
|
|
| 442 |
audio_input_translate = gr.File(
|
| 443 |
type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
|
| 444 |
)
|
| 445 |
+
model_choice_translate = gr.Dropdown(
|
| 446 |
+
choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
|
| 447 |
+
value="whisper-large-v3",
|
| 448 |
+
label="Model",
|
| 449 |
+
)
|
| 450 |
with gr.Row():
|
| 451 |
translate_prompt = gr.Textbox(
|
| 452 |
label="Prompt (Optional)",
|
|
|
|
| 456 |
translation_output = gr.Textbox(label="Translation")
|
| 457 |
translate_button.click(
|
| 458 |
translate_audio,
|
| 459 |
+
inputs=[audio_input_translate, translate_prompt, model_choice_translate],
|
| 460 |
outputs=translation_output,
|
| 461 |
)
|
| 462 |
with gr.TabItem("Subtitle Maker"):
|
|
|
|
| 465 |
label="Upload Audio/Video",
|
| 466 |
file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS],
|
| 467 |
)
|
| 468 |
+
model_choice_subtitles = gr.Dropdown(
|
| 469 |
+
choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
|
| 470 |
+
value="whisper-large-v3",
|
| 471 |
+
label="Model",
|
| 472 |
+
)
|
| 473 |
transcribe_prompt_subtitles = gr.Textbox(
|
| 474 |
label="Prompt (Optional)",
|
| 475 |
info="Specify any context or spelling corrections.",
|
|
|
|
| 493 |
transcribe_prompt_subtitles,
|
| 494 |
language_subtitles,
|
| 495 |
auto_detect_language_subtitles,
|
| 496 |
+
model_choice_subtitles,
|
| 497 |
],
|
| 498 |
outputs=[srt_output, video_output, gr.Textbox(label="Error")],
|
| 499 |
)
|