tee342 commited on
Commit
b4e6504
Β·
verified Β·
1 Parent(s): ccbe83c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -46
app.py CHANGED
@@ -23,6 +23,7 @@ from faster_whisper import WhisperModel
23
  from mutagen.mp3 import MP3
24
  from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
25
  from TTS.api import TTS
 
26
 
27
  # Suppress warnings
28
  warnings.filterwarnings("ignore")
@@ -318,6 +319,11 @@ def save_project(audio_path, preset_name, effects):
318
  pickle.dump(project_data, f)
319
  return out_path
320
 
 
 
 
 
 
321
  # === Trim Silence Automatically (VAD) ===
322
  def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
323
  audio = AudioSegment.from_file(audio_file)
@@ -345,25 +351,12 @@ def mix_tracks(track1, track2, volume_offset=0):
345
  mixed.export(out_path, format="wav")
346
  return out_path
347
 
348
- # === Load Custom Plugins ===
349
- def load_plugin(plugin_file, audio_file):
350
- try:
351
- from importlib.util import spec_from_file_location, module_from_spec
352
- spec = spec_from_file_location("plugin", plugin_file.name)
353
- plugin = module_from_spec(spec)
354
- spec.loader.exec_module(plugin)
355
-
356
- # Run plugin
357
- audio = AudioSegment.from_file(audio_file)
358
- processed = plugin.process(audio)
359
-
360
- out_path = os.path.join(tempfile.gettempdir(), "plugin_output.wav")
361
- processed.export(out_path, format="wav")
362
- return out_path
363
- except Exception as e:
364
- return f"⚠️ Plugin error: {str(e)}"
365
 
366
- # === UI ===
367
  effect_options = [
368
  "Noise Reduction",
369
  "Compress Dynamic Range",
@@ -443,13 +436,13 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
443
  clear_btn=None
444
  )
445
 
446
- # --- Transcribe & Edit ---
447
  with gr.Tab("πŸ“ Transcribe & Edit"):
448
  gr.Interface(
449
  fn=transcribe_audio,
450
  inputs=gr.Audio(label="Upload Audio", type="filepath"),
451
  outputs=gr.Textbox(label="Transcribed Text", lines=10),
452
- title="Transcribe & Edit Spoken Content",
453
  description="Convert voice to text and edit it before exporting again."
454
  )
455
 
@@ -479,28 +472,62 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
479
  description="Detect speaker similarity using AI."
480
  )
481
 
482
- # --- Auto-Save Sessions ===
483
- def encode_preset(selected_effects, preset_name, export_format):
484
- import base64
485
- import json
486
- data = {"effects": selected_effects, "preset": preset_name, "format": export_format}
487
- encoded = base64.b64encode(json.dumps(data).encode()).decode()
488
- return f"https://your-space-url?preset={encoded}"
489
 
490
- with gr.Tab("🧾 Share Session"):
491
  gr.Interface(
492
- fn=encode_preset,
493
  inputs=[
494
- gr.CheckboxGroup(choices=effect_options, label="Effects"),
495
- gr.Dropdown(choices=preset_names, label="Preset"),
496
- gr.Dropdown(choices=["MP3", "WAV"], label="Format")
497
  ],
498
- outputs=gr.Textbox(label="Shareable Link", lines=1),
499
- title="Save Your Settings and Share Them",
500
- description="Generate a link to share your effect chain with others."
501
  )
502
 
503
- # --- VAD – Detect & Remove Silence ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
  with gr.Tab("βœ‚οΈ Trim Silence Automatically"):
505
  gr.Interface(
506
  fn=detect_silence,
@@ -511,10 +538,10 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
511
  ],
512
  outputs=gr.File(label="Trimmed Output"),
513
  title="Auto-Detect & Remove Silence",
514
- description="Trim intros/outs or between speech automatically"
515
  )
516
 
517
- # --- Load/Save Project ===
518
  with gr.Tab("πŸ“ Save/Load Project"):
519
  gr.Interface(
520
  fn=save_project,
@@ -528,6 +555,17 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
528
  description="Save your session, effects, and settings in one file to reuse later."
529
  )
530
 
 
 
 
 
 
 
 
 
 
 
 
531
  # --- Mix Two Tracks ===
532
  with gr.Tab("πŸ”€ Mix Two Tracks"):
533
  gr.Interface(
@@ -542,17 +580,17 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
542
  description="Mix or subtract two audio files."
543
  )
544
 
545
- # --- Custom Effect Plugin System ===
546
- with gr.Tab("🧩 Load Custom Effect"):
547
  gr.Interface(
548
- fn=load_plugin,
549
  inputs=[
550
- gr.File(label="Upload .py plugin"),
551
- gr.Audio(label="Upload Audio", type="filepath")
552
  ],
553
- outputs=gr.Audio(label="Processed Output", type="filepath"),
554
- title="Run Your Own Python Effect",
555
- description="Upload a .py file with a 'process' function"
556
  )
557
 
558
  demo.launch()
 
23
  from mutagen.mp3 import MP3
24
  from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
25
  from TTS.api import TTS
26
+ import pickle
27
 
28
  # Suppress warnings
29
  warnings.filterwarnings("ignore")
 
319
  pickle.dump(project_data, f)
320
  return out_path
321
 
322
+ def load_project(project_file):
323
+ with open(project_file.name, "rb") as f:
324
+ data = pickle.load(f)
325
+ return data["preset"], data["effects"]
326
+
327
  # === Trim Silence Automatically (VAD) ===
328
  def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
329
  audio = AudioSegment.from_file(audio_file)
 
351
  mixed.export(out_path, format="wav")
352
  return out_path
353
 
354
+ # === Voice Style Transfer (Dummy) ===
355
+ def apply_style_transfer(audio_path, mood="Happy"):
356
+ # Replace with real model later
357
+ return audio_path
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
+ # === UI Setup ===
360
  effect_options = [
361
  "Noise Reduction",
362
  "Compress Dynamic Range",
 
436
  clear_btn=None
437
  )
438
 
439
+ # --- Transcribe & Edit Tab ===
440
  with gr.Tab("πŸ“ Transcribe & Edit"):
441
  gr.Interface(
442
  fn=transcribe_audio,
443
  inputs=gr.Audio(label="Upload Audio", type="filepath"),
444
  outputs=gr.Textbox(label="Transcribed Text", lines=10),
445
+ title="Transcribe Spoken Content",
446
  description="Convert voice to text and edit it before exporting again."
447
  )
448
 
 
472
  description="Detect speaker similarity using AI."
473
  )
474
 
475
+ # --- Voice Cloning (Dummy) ===
476
+ def clone_voice(*args):
477
+ return "⚠️ Voice cloning requires additional setup"
 
 
 
 
478
 
479
+ with gr.Tab("🎭 Voice Cloning (Dubbing)"):
480
  gr.Interface(
481
+ fn=clone_voice,
482
  inputs=[
483
+ gr.File(label="Source Voice Clip"),
484
+ gr.File(label="Target Voice Clip"),
485
+ gr.Textbox(label="Text to Clone", lines=5)
486
  ],
487
+ outputs=gr.Audio(label="Cloned Output", type="filepath"),
488
+ title="Replace One Voice With Another",
489
+ description="Clone voice from source to target speaker using AI"
490
  )
491
 
492
+ # --- Auto-Save / Resume Sessions ===
493
+ session_state = gr.State()
494
+
495
+ def auto_save_session(audio, preset, effects):
496
+ return {"audio": audio, "preset": preset, "effects": effects}
497
+
498
+ def auto_load_session(session):
499
+ if session and "audio" in session:
500
+ return session["audio"], session["preset"], session["effects"]
501
+ return None, None, None
502
+
503
+ with gr.Tab("🧾 Auto-Save & Resume"):
504
+ gr.Interface(
505
+ fn=auto_save_session,
506
+ inputs=[
507
+ gr.Audio(label="Upload Audio", type="filepath"),
508
+ gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
509
+ gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
510
+ ],
511
+ outputs=session_state,
512
+ title="Auto-Save Your Session",
513
+ description="Save your current state and resume editing later",
514
+ allow_flagging="never"
515
+ )
516
+
517
+ gr.Interface(
518
+ fn=auto_load_session,
519
+ inputs=session_state,
520
+ outputs=[
521
+ gr.Audio(label="Loaded Audio", type="filepath"),
522
+ gr.Dropdown(choices=preset_names, label="Loaded Preset"),
523
+ gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
524
+ ],
525
+ title="Resume Last Session",
526
+ description="Reload your last edit state",
527
+ allow_flagging="never"
528
+ )
529
+
530
+ # --- VAD – Detect & Remove Silence ===
531
  with gr.Tab("βœ‚οΈ Trim Silence Automatically"):
532
  gr.Interface(
533
  fn=detect_silence,
 
538
  ],
539
  outputs=gr.File(label="Trimmed Output"),
540
  title="Auto-Detect & Remove Silence",
541
+ description="Detect and trim silence at start/end or between words"
542
  )
543
 
544
+ # --- Load/Save Project File (.aiproj) ===
545
  with gr.Tab("πŸ“ Save/Load Project"):
546
  gr.Interface(
547
  fn=save_project,
 
555
  description="Save your session, effects, and settings in one file to reuse later."
556
  )
557
 
558
+ gr.Interface(
559
+ fn=load_project,
560
+ inputs=gr.File(label="Upload .aiproj File"),
561
+ outputs=[
562
+ gr.Dropdown(choices=preset_names, label="Loaded Preset"),
563
+ gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
564
+ ],
565
+ title="Resume Last Project",
566
+ description="Load your saved session"
567
+ )
568
+
569
  # --- Mix Two Tracks ===
570
  with gr.Tab("πŸ”€ Mix Two Tracks"):
571
  gr.Interface(
 
580
  description="Mix or subtract two audio files."
581
  )
582
 
583
+ # --- Voice Style Transfer (Dummy) ===
584
+ with gr.Tab("🧠 Voice Style Transfer"):
585
  gr.Interface(
586
+ fn=apply_style_transfer,
587
  inputs=[
588
+ gr.Audio(label="Upload Voice Clip", type="filepath"),
589
+ gr.Radio(["Happy", "Sad", "Angry", "Calm"], label="Choose Tone")
590
  ],
591
+ outputs=gr.Audio(label="Stylized Output", type="filepath"),
592
+ title="Change Emotional Tone of Voice",
593
+ description="Shift the emotional style of any voice clip."
594
  )
595
 
596
  demo.launch()