Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -23,6 +23,7 @@ from faster_whisper import WhisperModel
|
|
23 |
from mutagen.mp3 import MP3
|
24 |
from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
|
25 |
from TTS.api import TTS
|
|
|
26 |
|
27 |
# Suppress warnings
|
28 |
warnings.filterwarnings("ignore")
|
@@ -318,6 +319,11 @@ def save_project(audio_path, preset_name, effects):
|
|
318 |
pickle.dump(project_data, f)
|
319 |
return out_path
|
320 |
|
|
|
|
|
|
|
|
|
|
|
321 |
# === Trim Silence Automatically (VAD) ===
|
322 |
def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
|
323 |
audio = AudioSegment.from_file(audio_file)
|
@@ -345,25 +351,12 @@ def mix_tracks(track1, track2, volume_offset=0):
|
|
345 |
mixed.export(out_path, format="wav")
|
346 |
return out_path
|
347 |
|
348 |
-
# ===
|
349 |
-
def
|
350 |
-
|
351 |
-
|
352 |
-
spec = spec_from_file_location("plugin", plugin_file.name)
|
353 |
-
plugin = module_from_spec(spec)
|
354 |
-
spec.loader.exec_module(plugin)
|
355 |
-
|
356 |
-
# Run plugin
|
357 |
-
audio = AudioSegment.from_file(audio_file)
|
358 |
-
processed = plugin.process(audio)
|
359 |
-
|
360 |
-
out_path = os.path.join(tempfile.gettempdir(), "plugin_output.wav")
|
361 |
-
processed.export(out_path, format="wav")
|
362 |
-
return out_path
|
363 |
-
except Exception as e:
|
364 |
-
return f"β οΈ Plugin error: {str(e)}"
|
365 |
|
366 |
-
# === UI ===
|
367 |
effect_options = [
|
368 |
"Noise Reduction",
|
369 |
"Compress Dynamic Range",
|
@@ -443,13 +436,13 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
443 |
clear_btn=None
|
444 |
)
|
445 |
|
446 |
-
# --- Transcribe & Edit
|
447 |
with gr.Tab("π Transcribe & Edit"):
|
448 |
gr.Interface(
|
449 |
fn=transcribe_audio,
|
450 |
inputs=gr.Audio(label="Upload Audio", type="filepath"),
|
451 |
outputs=gr.Textbox(label="Transcribed Text", lines=10),
|
452 |
-
title="Transcribe
|
453 |
description="Convert voice to text and edit it before exporting again."
|
454 |
)
|
455 |
|
@@ -479,28 +472,62 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
479 |
description="Detect speaker similarity using AI."
|
480 |
)
|
481 |
|
482 |
-
# ---
|
483 |
-
def
|
484 |
-
|
485 |
-
import json
|
486 |
-
data = {"effects": selected_effects, "preset": preset_name, "format": export_format}
|
487 |
-
encoded = base64.b64encode(json.dumps(data).encode()).decode()
|
488 |
-
return f"https://your-space-url?preset={encoded}"
|
489 |
|
490 |
-
with gr.Tab("
|
491 |
gr.Interface(
|
492 |
-
fn=
|
493 |
inputs=[
|
494 |
-
gr.
|
495 |
-
gr.
|
496 |
-
gr.
|
497 |
],
|
498 |
-
outputs=gr.
|
499 |
-
title="
|
500 |
-
description="
|
501 |
)
|
502 |
|
503 |
-
# ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
with gr.Tab("βοΈ Trim Silence Automatically"):
|
505 |
gr.Interface(
|
506 |
fn=detect_silence,
|
@@ -511,10 +538,10 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
511 |
],
|
512 |
outputs=gr.File(label="Trimmed Output"),
|
513 |
title="Auto-Detect & Remove Silence",
|
514 |
-
description="
|
515 |
)
|
516 |
|
517 |
-
# --- Load/Save Project ===
|
518 |
with gr.Tab("π Save/Load Project"):
|
519 |
gr.Interface(
|
520 |
fn=save_project,
|
@@ -528,6 +555,17 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
528 |
description="Save your session, effects, and settings in one file to reuse later."
|
529 |
)
|
530 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
# --- Mix Two Tracks ===
|
532 |
with gr.Tab("π Mix Two Tracks"):
|
533 |
gr.Interface(
|
@@ -542,17 +580,17 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
542 |
description="Mix or subtract two audio files."
|
543 |
)
|
544 |
|
545 |
-
# ---
|
546 |
-
with gr.Tab("
|
547 |
gr.Interface(
|
548 |
-
fn=
|
549 |
inputs=[
|
550 |
-
gr.
|
551 |
-
gr.
|
552 |
],
|
553 |
-
outputs=gr.Audio(label="
|
554 |
-
title="
|
555 |
-
description="
|
556 |
)
|
557 |
|
558 |
demo.launch()
|
|
|
23 |
from mutagen.mp3 import MP3
|
24 |
from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
|
25 |
from TTS.api import TTS
|
26 |
+
import pickle
|
27 |
|
28 |
# Suppress warnings
|
29 |
warnings.filterwarnings("ignore")
|
|
|
319 |
pickle.dump(project_data, f)
|
320 |
return out_path
|
321 |
|
322 |
+
def load_project(project_file):
|
323 |
+
with open(project_file.name, "rb") as f:
|
324 |
+
data = pickle.load(f)
|
325 |
+
return data["preset"], data["effects"]
|
326 |
+
|
327 |
# === Trim Silence Automatically (VAD) ===
|
328 |
def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
|
329 |
audio = AudioSegment.from_file(audio_file)
|
|
|
351 |
mixed.export(out_path, format="wav")
|
352 |
return out_path
|
353 |
|
354 |
+
# === Voice Style Transfer (Dummy) ===
|
355 |
+
def apply_style_transfer(audio_path, mood="Happy"):
|
356 |
+
# Replace with real model later
|
357 |
+
return audio_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
|
359 |
+
# === UI Setup ===
|
360 |
effect_options = [
|
361 |
"Noise Reduction",
|
362 |
"Compress Dynamic Range",
|
|
|
436 |
clear_btn=None
|
437 |
)
|
438 |
|
439 |
+
# --- Transcribe & Edit Tab ===
|
440 |
with gr.Tab("π Transcribe & Edit"):
|
441 |
gr.Interface(
|
442 |
fn=transcribe_audio,
|
443 |
inputs=gr.Audio(label="Upload Audio", type="filepath"),
|
444 |
outputs=gr.Textbox(label="Transcribed Text", lines=10),
|
445 |
+
title="Transcribe Spoken Content",
|
446 |
description="Convert voice to text and edit it before exporting again."
|
447 |
)
|
448 |
|
|
|
472 |
description="Detect speaker similarity using AI."
|
473 |
)
|
474 |
|
475 |
+
# --- Voice Cloning (Dummy) ===
|
476 |
+
def clone_voice(*args):
|
477 |
+
return "β οΈ Voice cloning requires additional setup"
|
|
|
|
|
|
|
|
|
478 |
|
479 |
+
with gr.Tab("π Voice Cloning (Dubbing)"):
|
480 |
gr.Interface(
|
481 |
+
fn=clone_voice,
|
482 |
inputs=[
|
483 |
+
gr.File(label="Source Voice Clip"),
|
484 |
+
gr.File(label="Target Voice Clip"),
|
485 |
+
gr.Textbox(label="Text to Clone", lines=5)
|
486 |
],
|
487 |
+
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
488 |
+
title="Replace One Voice With Another",
|
489 |
+
description="Clone voice from source to target speaker using AI"
|
490 |
)
|
491 |
|
492 |
+
# --- Auto-Save / Resume Sessions ===
|
493 |
+
session_state = gr.State()
|
494 |
+
|
495 |
+
def auto_save_session(audio, preset, effects):
|
496 |
+
return {"audio": audio, "preset": preset, "effects": effects}
|
497 |
+
|
498 |
+
def auto_load_session(session):
|
499 |
+
if session and "audio" in session:
|
500 |
+
return session["audio"], session["preset"], session["effects"]
|
501 |
+
return None, None, None
|
502 |
+
|
503 |
+
with gr.Tab("π§Ύ Auto-Save & Resume"):
|
504 |
+
gr.Interface(
|
505 |
+
fn=auto_save_session,
|
506 |
+
inputs=[
|
507 |
+
gr.Audio(label="Upload Audio", type="filepath"),
|
508 |
+
gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
|
509 |
+
gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
|
510 |
+
],
|
511 |
+
outputs=session_state,
|
512 |
+
title="Auto-Save Your Session",
|
513 |
+
description="Save your current state and resume editing later",
|
514 |
+
allow_flagging="never"
|
515 |
+
)
|
516 |
+
|
517 |
+
gr.Interface(
|
518 |
+
fn=auto_load_session,
|
519 |
+
inputs=session_state,
|
520 |
+
outputs=[
|
521 |
+
gr.Audio(label="Loaded Audio", type="filepath"),
|
522 |
+
gr.Dropdown(choices=preset_names, label="Loaded Preset"),
|
523 |
+
gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
|
524 |
+
],
|
525 |
+
title="Resume Last Session",
|
526 |
+
description="Reload your last edit state",
|
527 |
+
allow_flagging="never"
|
528 |
+
)
|
529 |
+
|
530 |
+
# --- VAD β Detect & Remove Silence ===
|
531 |
with gr.Tab("βοΈ Trim Silence Automatically"):
|
532 |
gr.Interface(
|
533 |
fn=detect_silence,
|
|
|
538 |
],
|
539 |
outputs=gr.File(label="Trimmed Output"),
|
540 |
title="Auto-Detect & Remove Silence",
|
541 |
+
description="Detect and trim silence at start/end or between words"
|
542 |
)
|
543 |
|
544 |
+
# --- Load/Save Project File (.aiproj) ===
|
545 |
with gr.Tab("π Save/Load Project"):
|
546 |
gr.Interface(
|
547 |
fn=save_project,
|
|
|
555 |
description="Save your session, effects, and settings in one file to reuse later."
|
556 |
)
|
557 |
|
558 |
+
gr.Interface(
|
559 |
+
fn=load_project,
|
560 |
+
inputs=gr.File(label="Upload .aiproj File"),
|
561 |
+
outputs=[
|
562 |
+
gr.Dropdown(choices=preset_names, label="Loaded Preset"),
|
563 |
+
gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
|
564 |
+
],
|
565 |
+
title="Resume Last Project",
|
566 |
+
description="Load your saved session"
|
567 |
+
)
|
568 |
+
|
569 |
# --- Mix Two Tracks ===
|
570 |
with gr.Tab("π Mix Two Tracks"):
|
571 |
gr.Interface(
|
|
|
580 |
description="Mix or subtract two audio files."
|
581 |
)
|
582 |
|
583 |
+
# --- Voice Style Transfer (Dummy) ===
|
584 |
+
with gr.Tab("π§ Voice Style Transfer"):
|
585 |
gr.Interface(
|
586 |
+
fn=apply_style_transfer,
|
587 |
inputs=[
|
588 |
+
gr.Audio(label="Upload Voice Clip", type="filepath"),
|
589 |
+
gr.Radio(["Happy", "Sad", "Angry", "Calm"], label="Choose Tone")
|
590 |
],
|
591 |
+
outputs=gr.Audio(label="Stylized Output", type="filepath"),
|
592 |
+
title="Change Emotional Tone of Voice",
|
593 |
+
description="Shift the emotional style of any voice clip."
|
594 |
)
|
595 |
|
596 |
demo.launch()
|