tee342 commited on
Commit
31bd509
Β·
verified Β·
1 Parent(s): 440dd71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -87
app.py CHANGED
@@ -22,7 +22,7 @@ from faster_whisper import WhisperModel
22
  from mutagen.mp3 import MP3
23
  from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
24
 
25
- # Suppress warnings for cleaner logs
26
  warnings.filterwarnings("ignore")
27
 
28
  # === Helper Functions ===
@@ -54,7 +54,7 @@ def apply_reverb(audio):
54
  return audio.overlay(reverb, position=1000)
55
 
56
  def apply_pitch_shift(audio, semitones=-2):
57
- new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12))) # βœ… Fixed: extra closing parenthesis
58
  samples = np.array(audio.get_array_of_samples())
59
  resampled = np.interp(
60
  np.arange(0, len(samples), 2 ** (semitones / 12)),
@@ -306,47 +306,52 @@ def analyze_audio(audio_path):
306
 
307
  return stats, image
308
 
309
- # === Vocal Removal (Karaoke Mode) ===
310
- def vocal_removal(audio_path):
311
- stems = stem_split(audio_path)
312
- instrumental = stems[0] + stems[1] + stems[2] # drums + bass + other
313
- out_path = os.path.join(tempfile.gettempdir(), "instrumental.wav")
314
- torchaudio.save(out_path, instrumental, 44100)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  return out_path
316
 
317
- # === Metadata Tagging ===
318
- def tag_mp3(file_path, title, artist, album, year):
319
- try:
320
- audio = MP3(file_path)
321
- try:
322
- audio.tags = ID3()
323
- except:
324
- audio.add_tags()
325
- audio.tags.add(TIT2(encoding=3, text=title))
326
- audio.tags.add(TPE1(encoding=3, text=artist))
327
- if album:
328
- audio.tags.add(TALB(encoding=3, text=album))
329
- if year:
330
- audio.tags.add(TYER(encoding=3, text=str(year)))
331
- audio.save()
332
- return file_path
333
- except Exception as e:
334
- return None
335
-
336
- # === Voice Style Transfer (Dummy) ===
337
- def apply_style_transfer(audio_path, mood="Happy"):
338
- # Replace with real model later
339
- return audio_path
340
 
341
- # === Session Sharing (URL Encode) ===
342
- def encode_preset(selected_effects, preset_name, export_format):
343
- import base64
344
- import json
345
- data = {"effects": selected_effects, "preset": preset_name, "format": export_format}
346
- encoded = base64.b64encode(json.dumps(data).encode()).decode()
347
- return f"https://huggingface.co/spaces/tee342/AudioMaster?preset={encoded}"
348
 
349
- # === UI ===
350
  effect_options = [
351
  "Noise Reduction",
352
  "Compress Dynamic Range",
@@ -360,7 +365,7 @@ effect_options = [
360
  ]
361
 
362
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
363
- gr.Markdown("## 🎧 AI Audio Studio – The Ultimate AI-Powered Tool\nUpload, edit, and export polished tracks β€” all powered by AI!")
364
 
365
  # --- Single File Studio ---
366
  with gr.Tab("🎡 Single File Studio"):
@@ -433,7 +438,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
433
  inputs=gr.Audio(label="Upload Audio", type="filepath"),
434
  outputs=gr.Textbox(label="Transcribed Text", lines=10),
435
  title="Transcribe & Edit Spoken Content",
436
- description="Convert voice to text, then edit the script before exporting again."
437
  )
438
 
439
  # --- TTS Voice Generator ---
@@ -446,70 +451,68 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
446
  description="Type anything and turn it into natural-sounding speech."
447
  )
448
 
449
- # --- Audio Analysis Dashboard ---
450
- with gr.Tab("πŸ“Š Audio Analysis"):
451
  gr.Interface(
452
- fn=analyze_audio,
453
- inputs=gr.Audio(label="Upload Track", type="filepath"),
454
- outputs=[
455
- gr.JSON(label="Audio Stats"),
456
- gr.Image(label="Waveform Graph")
457
  ],
458
- title="View Loudness, BPM, Silence, and More",
459
- description="Analyze audio loudness, tempo, and frequency content."
 
460
  )
461
 
462
- # --- Voice Style Transfer ---
463
- with gr.Tab("🧠 Voice Style Transfer"):
464
  gr.Interface(
465
- fn=apply_style_transfer,
466
  inputs=[
467
- gr.Audio(label="Upload Voice Clip", type="filepath"),
468
- gr.Radio(["Happy", "Sad", "Angry", "Calm"], label="Choose Tone")
469
  ],
470
- outputs=gr.Audio(label="Stylized Output", type="filepath"),
471
- title="Change Emotional Tone of Voice",
472
- description="Shift the emotional style of any voice clip."
473
  )
474
 
475
- # --- Session Sharing ---
476
- with gr.Tab("🧾 Session Sharing"):
477
  gr.Interface(
478
- fn=encode_preset,
479
- inputs=[
480
- gr.CheckboxGroup(choices=effect_options, label="Effects"),
481
- gr.Dropdown(choices=preset_names, label="Preset"),
482
- gr.Dropdown(choices=["MP3", "WAV"], label="Format")
483
- ],
484
- outputs=gr.Textbox(label="Shareable Link", lines=1),
485
- title="Save Your Settings and Share Them",
486
- description="Generate a link to share your effect chain with others."
487
  )
488
 
489
- # --- Vocal Removal (Karaoke Mode) ---
490
- with gr.Tab("🎯 Vocal Removal (Karaoke Mode)"):
491
  gr.Interface(
492
- fn=vocal_removal,
493
- inputs=gr.Audio(label="Upload Song", type="filepath"),
494
- outputs=gr.Audio(label="Instrumental Only", type="filepath"),
495
- title="Remove Vocals from Any Track",
496
- description="Create karaoke versions using AI"
 
 
 
 
497
  )
498
 
499
- # --- Metadata Tagging ---
500
- with gr.Tab("πŸ—‚ Add MP3 Tags"):
501
  gr.Interface(
502
- fn=tag_mp3,
503
  inputs=[
504
- gr.File(label="Upload MP3/WAV"),
505
- gr.Textbox(label="Title"),
506
- gr.Textbox(label="Artist"),
507
- gr.Textbox(label="Album"),
508
- gr.Number(label="Year")
509
  ],
510
- outputs=gr.File(label="Tagged Audio File"),
511
- title="Add Title, Artist, Album, Year to MP3",
512
- description="Enhance your exported files with metadata tags"
513
  )
514
 
515
  demo.launch()
 
22
  from mutagen.mp3 import MP3
23
  from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
24
 
25
+ # Suppress warnings
26
  warnings.filterwarnings("ignore")
27
 
28
  # === Helper Functions ===
 
54
  return audio.overlay(reverb, position=1000)
55
 
56
  def apply_pitch_shift(audio, semitones=-2):
57
+ new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
58
  samples = np.array(audio.get_array_of_samples())
59
  resampled = np.interp(
60
  np.arange(0, len(samples), 2 ** (semitones / 12)),
 
306
 
307
  return stats, image
308
 
309
+ # === Auto-Save Sessions ===
310
+ def auto_save_session(data):
311
+ import base64
312
+ encoded = base64.b64encode(json.dumps(data).encode()).decode()
313
+ return f"https://your-studio-url?load={encoded}"
314
+
315
+ # === Voiceprint Matching ===
316
+ from resemblyzer import preprocess_wav, VoiceEncoder
317
+
318
+ encoder = VoiceEncoder()
319
+
320
+ def match_speakers(clip1, clip2):
321
+ wav1 = preprocess_wav(clip1)
322
+ wav2 = preprocess_wav(clip2)
323
+ embed1 = encoder.embed_utterance(wav1)
324
+ embed2 = encoder.embed_utterance(wav2)
325
+ similarity = np.inner(embed1, embed2)
326
+ return f"Speaker Match Score: {similarity:.2f}"
327
+
328
+ # === Mix Two Tracks ===
329
+ def mix_tracks(track1, track2, volume_offset=0):
330
+ a1 = AudioSegment.from_file(track1)
331
+ a2 = AudioSegment.from_file(track2)
332
+ mixed = a1.overlay(a2 - volume_offset)
333
+ out_path = os.path.join(tempfile.gettempdir(), "mixed.wav")
334
+ mixed.export(out_path, format="wav")
335
  return out_path
336
 
337
+ # === Save/Load Project File (.aiproj) ===
338
+ def save_project(audio_path, preset_name, effects):
339
+ project_data = {
340
+ "audio": AudioSegment.from_file(audio_path).raw_data,
341
+ "preset": preset_name,
342
+ "effects": effects
343
+ }
344
+ out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
345
+ with open(out_path, "wb") as f:
346
+ pickle.dump(project_data, f)
347
+ return out_path
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
+ def load_project(project_file):
350
+ with open(project_file.name, "rb") as f:
351
+ data = pickle.load(f)
352
+ return data["preset"], data["effects"]
 
 
 
353
 
354
+ # === UI ===
355
  effect_options = [
356
  "Noise Reduction",
357
  "Compress Dynamic Range",
 
365
  ]
366
 
367
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
368
+ gr.Markdown("## 🎧 Ultimate AI Audio Studio\nUpload, edit, export β€” powered by AI!")
369
 
370
  # --- Single File Studio ---
371
  with gr.Tab("🎡 Single File Studio"):
 
438
  inputs=gr.Audio(label="Upload Audio", type="filepath"),
439
  outputs=gr.Textbox(label="Transcribed Text", lines=10),
440
  title="Transcribe & Edit Spoken Content",
441
+ description="Convert voice to text and edit it before re-exporting."
442
  )
443
 
444
  # --- TTS Voice Generator ---
 
451
  description="Type anything and turn it into natural-sounding speech."
452
  )
453
 
454
+ # --- Voiceprint Matching ---
455
+ with gr.Tab("πŸ§β€β™‚οΈ Match Speakers"):
456
  gr.Interface(
457
+ fn=match_speakers,
458
+ inputs=[
459
+ gr.File(label="Clip 1"),
460
+ gr.File(label="Clip 2")
 
461
  ],
462
+ outputs=gr.Textbox(label="Match Score", lines=1),
463
+ title="Are These the Same Person?",
464
+ description="Detect speaker similarity using AI."
465
  )
466
 
467
+ # --- Voice Cloning (AI Dubbing) ---
468
+ with gr.Tab("🎭 Voice Cloning (AI Dubbing)"):
469
  gr.Interface(
470
+ fn=clone_voice,
471
  inputs=[
472
+ gr.File(label="Source Voice"),
473
+ gr.File(label="Target Voice")
474
  ],
475
+ outputs=gr.Audio(label="Cloned Output", type="filepath"),
476
+ title="Replace Voice with Another",
477
+ description="Clone voice from source to target speaker."
478
  )
479
 
480
+ # --- AI Mastering Mode ---
481
+ with gr.Tab("πŸ“ˆ AI Mastering Mode"):
482
  gr.Interface(
483
+ fn=ai_mastering,
484
+ inputs=gr.Audio(label="Upload Track", type="filepath"),
485
+ outputs=gr.Audio(label="Mastered Output", type="filepath"),
486
+ title="Auto-Master Your Track",
487
+ description="Smart mastering for streaming platforms like Spotify, YouTube, or podcasts."
 
 
 
 
488
  )
489
 
490
+ # --- Mix Two Tracks ---
491
+ with gr.Tab("πŸ”€ Mix Two Tracks"):
492
  gr.Interface(
493
+ fn=mix_tracks,
494
+ inputs=[
495
+ gr.File(label="Main Track"),
496
+ gr.File(label="Background Track"),
497
+ gr.Slider(minimum=-10, maximum=10, value=0, label="Volume Offset (dB)")
498
+ ],
499
+ outputs=gr.File(label="Mixed Output"),
500
+ title="Overlay Two Tracks",
501
+ description="Mix, blend, or subtract two audio files."
502
  )
503
 
504
+ # --- Load/Save Project ---
505
+ with gr.Tab("πŸ“ Save/Load Project"):
506
  gr.Interface(
507
+ fn=save_project,
508
  inputs=[
509
+ gr.File(label="Original Audio"),
510
+ gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
511
+ gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
 
 
512
  ],
513
+ outputs=gr.File(label="Project File (.aiproj)"),
514
+ title="Save Everything Together",
515
+ description="Save your session, effects, and settings in one file to reuse later."
516
  )
517
 
518
  demo.launch()