tee342 commited on
Commit
2f52f6c
Β·
verified Β·
1 Parent(s): 9f645d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +238 -48
app.py CHANGED
@@ -23,16 +23,6 @@ from mutagen.mp3 import MP3
23
  from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
24
  from TTS.api import TTS
25
  import pickle
26
- import subprocess
27
-
28
- # Install OpenVoice from GitHub during startup
29
- try:
30
- import openvoice
31
- except ImportError:
32
- print("Installing OpenVoice from GitHub...")
33
- subprocess.run(["pip", "install", "git+https://github.com/myshell-ai/OpenVoice.git"])
34
- from openvoice.se_extractor import get_se
35
- from openvoice.api import ToneColorConverter
36
 
37
  # Suppress warnings
38
  warnings.filterwarnings("ignore")
@@ -292,23 +282,6 @@ def generate_tts(text):
292
  tts.tts_to_file(text=text, file_path=out_path)
293
  return out_path
294
 
295
- # === Save/Load Project File (.aiproj) ===
296
- def save_project(audio_path, preset_name, effects):
297
- project_data = {
298
- "audio": AudioSegment.from_file(audio_path).raw_data,
299
- "preset": preset_name,
300
- "effects": effects
301
- }
302
- out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
303
- with open(out_path, "wb") as f:
304
- pickle.dump(project_data, f)
305
- return out_path
306
-
307
- def load_project(project_file):
308
- with open(project_file.name, "rb") as f:
309
- data = pickle.load(f)
310
- return data["preset"], data["effects"]
311
-
312
  # === Trim Silence Automatically (VAD) ===
313
  def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
314
  audio = AudioSegment.from_file(audio_file)
@@ -336,27 +309,244 @@ def mix_tracks(track1, track2, volume_offset=0):
336
  mixed.export(out_path, format="wav")
337
  return out_path
338
 
339
- # === Voice Style Transfer (Clone Voice) ===
340
- from openvoice.se_extractor import get_se
341
- from openvoice.api import ToneColorConverter
 
 
 
 
 
 
 
 
342
 
343
- tone_converter = ToneColorConverter().to("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  def clone_voice(source_audio, target_audio, text):
346
- source_se, _ = get_se(source_audio)
347
- target_se, _ = get_se(target_audio)
348
-
349
- out_path = os.path.join(tempfile.gettempdir(), "cloned_output.wav")
350
-
351
- # Generate base TTS
352
- tts.tts_to_file(text=text, file_path=out_path)
353
-
354
- # Apply voice conversion
355
- tone_converter.convert(
356
- audio_src_path=out_path,
357
- src_se=source_se,
358
- tgt_se=target_se,
359
- output_path=out_path
360
- )
361
-
362
- return out_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
24
  from TTS.api import TTS
25
  import pickle
 
 
 
 
 
 
 
 
 
 
26
 
27
  # Suppress warnings
28
  warnings.filterwarnings("ignore")
 
282
  tts.tts_to_file(text=text, file_path=out_path)
283
  return out_path
284
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  # === Trim Silence Automatically (VAD) ===
286
  def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
287
  audio = AudioSegment.from_file(audio_file)
 
309
  mixed.export(out_path, format="wav")
310
  return out_path
311
 
312
+ # === Save/Load Project File (.aiproj) ===
313
+ def save_project(audio_path, preset_name, effects):
314
+ project_data = {
315
+ "audio": AudioSegment.from_file(audio_path).raw_data,
316
+ "preset": preset_name,
317
+ "effects": effects
318
+ }
319
+ out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
320
+ with open(out_path, "wb") as f:
321
+ pickle.dump(project_data, f)
322
+ return out_path
323
 
324
+ def load_project(project_file):
325
+ with open(project_file.name, "rb") as f:
326
+ data = pickle.load(f)
327
+ return data["preset"], data["effects"]
328
 
329
+ # === Auto-Save / Resume Sessions ===
330
+ def save_or_resume_session(audio, preset, effects, action="save"):
331
+ if action == "save":
332
+ return {"audio": audio, "preset": preset, "effects": effects}, None, None, None
333
+ elif action == "load" and isinstance(audio, dict):
334
+ return (
335
+ None,
336
+ audio.get("audio"),
337
+ audio.get("preset"),
338
+ audio.get("effects")
339
+ )
340
+ return None, None, None, None
341
+
342
+ # === Voice Cloning – Fallback Version for Hugging Face ===
343
  def clone_voice(source_audio, target_audio, text):
344
+ print("⚠️ Voice cloning not available in browser version β€” use local install for full support")
345
+ return generate_tts(text)
346
+
347
+ # === UI Setup ===
348
+ effect_options = [
349
+ "Noise Reduction",
350
+ "Compress Dynamic Range",
351
+ "Add Reverb",
352
+ "Pitch Shift",
353
+ "Echo",
354
+ "Stereo Widening",
355
+ "Bass Boost",
356
+ "Treble Boost",
357
+ "Normalize"
358
+ ]
359
+
360
+ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
361
+ gr.Markdown("## 🎧 Ultimate AI Audio Studio\nUpload, edit, export β€” powered by AI!")
362
+
363
+ # --- Single File Studio ---
364
+ with gr.Tab("🎡 Single File Studio"):
365
+ gr.Interface(
366
+ fn=process_audio,
367
+ inputs=[
368
+ gr.Audio(label="Upload Audio", type="filepath"),
369
+ gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
370
+ gr.Checkbox(label="Isolate Vocals After Effects"),
371
+ gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
372
+ gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
373
+ ],
374
+ outputs=[
375
+ gr.Audio(label="Processed Audio", type="filepath"),
376
+ gr.Image(label="Waveform Preview"),
377
+ gr.Textbox(label="Session Log (JSON)", lines=5),
378
+ gr.Textbox(label="Detected Genre", lines=1),
379
+ gr.Textbox(label="Status", value="βœ… Ready", lines=1)
380
+ ],
381
+ title="Edit One File at a Time",
382
+ description="Apply effects, preview waveform, and get full session log.",
383
+ flagging_mode="never",
384
+ submit_btn="Process Audio",
385
+ clear_btn=None
386
+ )
387
+
388
+ # --- Batch Processing ---
389
+ with gr.Tab("πŸ”Š Batch Processing"):
390
+ gr.Interface(
391
+ fn=batch_process_audio,
392
+ inputs=[
393
+ gr.File(label="Upload Multiple Files", file_count="multiple"),
394
+ gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
395
+ gr.Checkbox(label="Isolate Vocals After Effects"),
396
+ gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
397
+ gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
398
+ ],
399
+ outputs=[
400
+ gr.File(label="Download ZIP of All Processed Files"),
401
+ gr.Textbox(label="Status", value="βœ… Ready", lines=1)
402
+ ],
403
+ title="Batch Audio Processor",
404
+ description="Upload multiple files, apply effects in bulk, and download all results in a single ZIP.",
405
+ flagging_mode="never",
406
+ submit_btn="Process All Files",
407
+ clear_btn=None
408
+ )
409
+
410
+ # --- Remix Mode ---
411
+ with gr.Tab("πŸŽ› Remix Mode"):
412
+ gr.Interface(
413
+ fn=stem_split,
414
+ inputs=gr.Audio(label="Upload Music Track", type="filepath"),
415
+ outputs=[
416
+ gr.File(label="Vocals"),
417
+ gr.File(label="Drums"),
418
+ gr.File(label="Bass"),
419
+ gr.File(label="Other")
420
+ ],
421
+ title="Split Into Drums, Bass, Vocals, and More",
422
+ description="Use AI to separate musical elements like vocals, drums, and bass.",
423
+ flagging_mode="never",
424
+ clear_btn=None
425
+ )
426
+
427
+ # --- Transcribe & Edit ---
428
+ with gr.Tab("πŸ“ Transcribe & Edit"):
429
+ gr.Interface(
430
+ fn=transcribe_audio,
431
+ inputs=gr.Audio(label="Upload Audio", type="filepath"),
432
+ outputs=gr.Textbox(label="Transcribed Text", lines=10),
433
+ title="Transcribe Spoken Content",
434
+ description="Convert voice to text and edit it before exporting again."
435
+ )
436
+
437
+ # --- TTS Voice Generator ---
438
+ with gr.Tab("πŸ’¬ TTS Voice Generator"):
439
+ gr.Interface(
440
+ fn=generate_tts,
441
+ inputs=gr.Textbox(label="Enter Text", lines=5),
442
+ outputs=gr.Audio(label="Generated Speech", type="filepath"),
443
+ title="Text-to-Speech Generator",
444
+ description="Type anything and turn it into natural-sounding speech."
445
+ )
446
+
447
+ # --- VAD – Detect & Remove Silence ===
448
+ with gr.Tab("βœ‚οΈ Trim Silence Automatically"):
449
+ gr.Interface(
450
+ fn=detect_silence,
451
+ inputs=[
452
+ gr.File(label="Upload Track"),
453
+ gr.Slider(minimum=-100, maximum=-10, value=-50, label="Silence Threshold (dB)"),
454
+ gr.Number(label="Min Silence Length (ms)", value=1000)
455
+ ],
456
+ outputs=gr.File(label="Trimmed Output"),
457
+ title="Auto-Detect & Remove Silence",
458
+ description="Detect and trim silence at start/end or between words"
459
+ )
460
+
461
+ # --- Load/Save Project File (.aiproj) ===
462
+ with gr.Tab("πŸ“ Save/Load Project"):
463
+ gr.Interface(
464
+ fn=save_project,
465
+ inputs=[
466
+ gr.File(label="Original Audio"),
467
+ gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
468
+ gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
469
+ ],
470
+ outputs=gr.File(label="Project File (.aiproj)"),
471
+ title="Save Everything Together",
472
+ description="Save your session, effects, and settings in one file to reuse later."
473
+ )
474
+
475
+ gr.Interface(
476
+ fn=load_project,
477
+ inputs=gr.File(label="Upload .aiproj File"),
478
+ outputs=[
479
+ gr.Dropdown(choices=preset_names, label="Loaded Preset"),
480
+ gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
481
+ ],
482
+ title="Resume Last Project",
483
+ description="Load your saved session"
484
+ )
485
+
486
+ # --- Auto-Save / Resume Sessions ===
487
+ session_state = gr.State()
488
+
489
+ with gr.Tab("🧾 Auto-Save & Resume"):
490
+ gr.Markdown("Save your current state and resume editing later.")
491
+
492
+ action_radio = gr.Radio(["save", "load"], label="Action", value="save")
493
+ audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
494
+ preset_dropdown = gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0] if preset_names else None)
495
+ effect_checkbox = gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
496
+ save_btn = gr.Button("Save or Load Session")
497
+
498
+ loaded_audio = gr.Audio(label="Loaded Audio", type="filepath")
499
+ loaded_preset = gr.Dropdown(choices=preset_names, label="Loaded Preset")
500
+ loaded_effects = gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
501
+
502
+ save_btn.click(
503
+ fn=save_or_resume_session,
504
+ inputs=[audio_input, preset_dropdown, effect_checkbox, action_radio],
505
+ outputs=[session_state, loaded_audio, loaded_preset, loaded_effects]
506
+ )
507
+
508
+ # --- Mix Two Tracks ===
509
+ with gr.Tab("πŸ”€ Mix Two Tracks"):
510
+ gr.Interface(
511
+ fn=mix_tracks,
512
+ inputs=[
513
+ gr.File(label="Main Track"),
514
+ gr.File(label="Background Track"),
515
+ gr.Slider(minimum=-10, maximum=10, value=0, label="Volume Offset (dB)")
516
+ ],
517
+ outputs=gr.File(label="Mixed Output"),
518
+ title="Overlay Two Tracks",
519
+ description="Mix, blend, or subtract two audio files."
520
+ )
521
+
522
+ # === Voice Style Transfer (Dummy) ===
523
+ def apply_style_transfer(audio_path, mood="Happy"):
524
+ return audio_path
525
+
526
+ with gr.Tab("🧠 Voice Style Transfer"):
527
+ gr.Interface(
528
+ fn=apply_style_transfer,
529
+ inputs=[
530
+ gr.Audio(label="Upload Voice Clip", type="filepath"),
531
+ gr.Radio(["Happy", "Sad", "Angry", "Calm"], label="Choose Tone")
532
+ ],
533
+ outputs=gr.Audio(label="Stylized Output", type="filepath"),
534
+ title="Change Emotional Tone of Voice",
535
+ description="Shift the emotional style of any voice clip."
536
+ )
537
+
538
+ # --- Voice Cloning (Fallback) ===
539
+ with gr.Tab("🎭 Voice Cloning (Demo)"):
540
+ gr.Interface(
541
+ fn=clone_voice,
542
+ inputs=[
543
+ gr.File(label="Source Voice Clip"),
544
+ gr.File(label="Target Voice Clip"),
545
+ gr.Textbox(label="Text to Clone", lines=5)
546
+ ],
547
+ outputs=gr.Audio(label="Cloned Output", type="filepath"),
548
+ title="Replace One Voice With Another (Demo)",
549
+ description="Clone voice from source to target speaker using AI"
550
+ )
551
+
552
+ demo.launch()