tee342 commited on
Commit
40e964d
·
verified ·
1 Parent(s): 786e848

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +421 -522
app.py CHANGED
@@ -17,7 +17,6 @@ import zipfile
17
  import datetime
18
  import librosa
19
  import warnings
20
- from faster_whisper import WhisperModel
21
  from TTS.api import TTS
22
  import base64
23
  import pickle
@@ -27,21 +26,54 @@ import soundfile as sf
27
  print("Gradio version:", gr.__version__)
28
  warnings.filterwarnings("ignore")
29
 
30
- # Helper to convert file to base64
31
- def file_to_base64_audio(file_path, mime_type="audio/wav"):
32
- with open(file_path, "rb") as f:
33
- data = f.read()
34
- b64 = base64.b64encode(data).decode()
35
- return f"data:{mime_type};base64,{b64}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- # === Effect Functions ===
38
  def apply_normalize(audio):
39
  return audio.normalize()
40
 
41
  def apply_noise_reduction(audio):
42
- samples, frame_rate = audiosegment_to_array(audio)
43
- reduced = nr.reduce_noise(y=samples, sr=frame_rate)
44
- return array_to_audiosegment(reduced, frame_rate, channels=audio.channels)
45
 
46
  def apply_compression(audio):
47
  return audio.compress_dynamic_range()
@@ -52,9 +84,8 @@ def apply_reverb(audio):
52
 
53
  def apply_pitch_shift(audio, semitones=-2):
54
  new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
55
- samples = np.array(audio.get_array_of_samples())
56
- resampled = np.interp(np.arange(0, len(samples), 2 ** (semitones / 12)), np.arange(len(samples)), samples).astype(np.int16)
57
- return AudioSegment(resampled.tobytes(), frame_rate=new_frame_rate, sample_width=audio.sample_width, channels=audio.channels)
58
 
59
  def apply_echo(audio, delay_ms=500, decay=0.5):
60
  echo = audio - 10
@@ -96,36 +127,15 @@ def apply_stage_mode(audio):
96
 
97
  def apply_bitcrush(audio, bit_depth=8):
98
  samples = np.array(audio.get_array_of_samples())
99
- max_val = 2 ** (bit_depth) - 1
100
  downsampled = np.round(samples / (32768 / max_val)).astype(np.int16)
101
  return array_to_audiosegment(downsampled, audio.frame_rate // 2, channels=audio.channels)
102
 
103
- # === Helper Functions ===
104
- def audiosegment_to_array(audio):
105
- return np.array(audio.get_array_of_samples()), audio.frame_rate
106
-
107
- def array_to_audiosegment(samples, frame_rate, channels=1):
108
- return AudioSegment(
109
- samples.tobytes(),
110
- frame_rate=int(frame_rate),
111
- sample_width=samples.dtype.itemsize,
112
- channels=channels
113
- )
114
-
115
- def load_audiofile_to_numpy(path):
116
- audio = AudioSegment.from_file(path)
117
- return np.array(audio.get_array_of_samples()), audio.frame_rate
118
-
119
- def save_audiosegment_to_temp(audio, suffix=".wav"):
120
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
121
- audio.export(f.name, format=suffix[1:])
122
- return f.name
123
 
124
- # === Loudness Matching (EBU R128) ===
125
  try:
126
  import pyloudnorm as pyln
127
  except ImportError:
128
- print("Installing pyloudnorm...")
129
  import subprocess
130
  subprocess.run(["pip", "install", "pyloudnorm"])
131
  import pyloudnorm as pyln
@@ -140,7 +150,6 @@ def match_loudness(audio_path, target_lufs=-14.0):
140
  out_path = save_audiosegment_to_temp(adjusted, ".wav")
141
  return out_path
142
 
143
- # Define eq_map directly
144
  eq_map = {
145
  "Pop": [(200, 500, -3), (2000, 4000, +4)],
146
  "EDM": [(60, 250, +6), (8000, 12000, +3)],
@@ -165,20 +174,19 @@ eq_map = {
165
 
166
  def auto_eq(audio, genre="Pop"):
167
  from scipy.signal import butter, sosfilt
168
-
 
169
  def band_eq(samples, sr, lowcut, highcut, gain):
170
  sos = butter(10, [lowcut, highcut], btype='band', output='sos', fs=sr)
171
  filtered = sosfilt(sos, samples)
172
  return samples + gain * filtered
173
 
174
- samples, sr = audiosegment_to_array(audio)
175
- samples = samples.astype(np.float64)
176
- for band in eq_map.get(genre, []):
177
- low, high, gain = band
178
  samples = band_eq(samples, sr, low, high, gain)
179
  return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
180
 
181
  # === Vocal Isolation Helpers ===
 
182
  def load_track_local(path, sample_rate, channels=2):
183
  sig, rate = torchaudio.load(path)
184
  if rate != sample_rate:
@@ -203,196 +211,127 @@ def apply_vocal_isolation(audio_path):
203
  save_track(out_path, vocal_track, model.samplerate)
204
  return out_path
205
 
206
- # === Stem Splitting Function ===
 
207
  def stem_split(audio_path):
208
  model = pretrained.get_model(name='htdemucs')
209
  wav = load_track_local(audio_path, model.samplerate, channels=2)
210
  sources = apply_model(model, wav[None])[0]
211
  output_dir = tempfile.mkdtemp()
212
- stem_paths = [
213
- gr.File(value=os.path.join(output_dir, f"{name}.wav"))
214
- for name in ['drums', 'bass', 'other', 'vocals']
215
- ]
216
  for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
217
  path = os.path.join(output_dir, f"{name}.wav")
218
  save_track(path, sources[i].cpu(), model.samplerate)
219
- return stem_paths
 
 
 
220
 
221
- # === Process Audio Function – Fully Featured ===
222
  def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
223
- status = "🔊 Loading audio..."
224
  try:
225
  audio = AudioSegment.from_file(audio_file)
226
- status = "🛠 Applying effects..."
227
- effect_map_real = {
228
  "Noise Reduction": apply_noise_reduction,
229
  "Compress Dynamic Range": apply_compression,
230
  "Add Reverb": apply_reverb,
231
- "Pitch Shift": lambda x: apply_pitch_shift(x),
232
  "Echo": apply_echo,
233
  "Stereo Widening": apply_stereo_widen,
234
  "Bass Boost": apply_bass_boost,
235
  "Treble Boost": apply_treble_boost,
236
  "Normalize": apply_normalize,
237
- "Limiter": lambda x: apply_limiter(x, limit_dB=-1),
238
- "Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
239
- "Vocal Distortion": lambda x: apply_vocal_distortion(x),
240
- "Stage Mode": apply_stage_mode
 
 
241
  }
242
- history = [audio]
243
- for effect_name in selected_effects:
244
- if effect_name in effect_map_real:
245
- audio = effect_map_real[effect_name](audio)
246
- history.append(audio)
247
- status = "💾 Saving final audio..."
248
- with tempfile.NamedTemporaryFile(delete=False, suffix=f".{export_format.lower()}") as f:
249
- if isolate_vocals:
250
- temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
251
- audio.export(temp_input, format="wav")
252
- vocal_path = apply_vocal_isolation(temp_input)
253
- final_audio = AudioSegment.from_wav(vocal_path)
254
- else:
255
- final_audio = audio
256
- output_path = f.name
257
- final_audio.export(output_path, format=export_format.lower())
258
- waveform_image = show_waveform(output_path)
259
- genre = detect_genre(output_path)
260
- session_log = generate_session_log(audio_file, selected_effects, isolate_vocals, export_format, genre)
261
- status = "🎉 Done!"
262
- return output_path, waveform_image, session_log, genre, status, history
 
263
  except Exception as e:
264
- status = f"❌ Error: {str(e)}"
265
- return None, None, status, "", status, []
266
 
267
- # Waveform preview
268
- def show_waveform(audio_file):
269
- try:
270
- audio = AudioSegment.from_file(audio_file)
271
- samples = np.array(audio.get_array_of_samples())
272
- plt.figure(figsize=(10, 2))
273
- plt.plot(samples[:10000], color="skyblue")
274
- plt.axis("off")
275
- buf = BytesIO()
276
- plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
277
- plt.close()
278
- buf.seek(0)
279
- return Image.open(buf)
280
- except Exception:
281
- return None
282
-
283
- # Genre detection stub
284
- def detect_genre(audio_path):
285
- try:
286
- y, sr = torchaudio.load(audio_path)
287
- return "Speech"
288
- except Exception:
289
- return "Unknown"
290
-
291
- # Session log generator
292
- def generate_session_log(audio_path, effects, isolate_vocals, export_format, genre):
293
- return json.dumps({
294
- "timestamp": str(datetime.datetime.now()),
295
- "filename": os.path.basename(audio_path),
296
- "effects_applied": effects,
297
- "isolate_vocals": isolate_vocals,
298
- "export_format": export_format,
299
- "detected_genre": genre
300
- }, indent=2)
301
-
302
- # Preset Choices
303
- preset_choices = {
304
- "Default": [],
305
- "Clean Podcast": ["Noise Reduction", "Normalize"],
306
- "Podcast Mastered": ["Noise Reduction", "Normalize", "Compress Dynamic Range"],
307
- "Radio Ready": ["Bass Boost", "Treble Boost", "Limiter"],
308
- "Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
309
- "ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
310
- "Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
311
- "8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"],
312
- "🎙 Clean Vocal": ["Noise Reduction", "Normalize", "High Pass Filter (80Hz)"],
313
- "🧪 Vocal Distortion": ["Vocal Distortion", "Reverb", "Compress Dynamic Range"],
314
- "🎶 Singer's Harmony": ["Harmony", "Stereo Widening", "Pitch Shift"],
315
- "🌫 ASMR Vocal": ["Auto Gain", "Low-Pass Filter (3000Hz)", "Noise Gate"],
316
- "🎼 Stage Mode": ["Reverb", "Bass Boost", "Limiter"],
317
- "🎵 Auto-Tune Style": ["Pitch Shift (+1 semitone)", "Normalize", "Treble Boost"],
318
- "🎤 R&B Vocal": ["Noise Reduction", "Bass Boost (100-300Hz)", "Treble Boost (2000-4000Hz)"],
319
- "💃 Soul Vocal": ["Noise Reduction", "Bass Boost (80-200Hz)", "Treble Boost (1500-3500Hz)"],
320
- "🕺 Funk Groove": ["Bass Boost (80-200Hz)", "Treble Boost (1000-3000Hz)"],
321
- "Studio Master": ["Noise Reduction", "Normalize", "Bass Boost", "Treble Boost", "Limiter"],
322
- "Podcast Voice": ["Noise Reduction", "Auto Gain", "High Pass Filter (85Hz)"],
323
- "Lo-Fi Chill": ["Noise Gate", "Low-Pass Filter (3000Hz)", "Mono Downmix", "Bitcrusher"],
324
- "Vocal Clarity": ["Noise Reduction", "EQ Match", "Reverb", "Auto Gain"],
325
- "Retro Game Sound": ["Bitcrusher", "Echo", "Mono Downmix"],
326
- "Live Stream Optimized": ["Noise Reduction", "Auto Gain", "Saturation", "Normalize"],
327
- "Deep Bass Trap": ["Bass Boost (60-120Hz)", "Low-Pass Filter (200Hz)", "Limiter"],
328
- "8-bit Voice": ["Bitcrusher", "Pitch Shift (-4 semitones)", "Mono Downmix"],
329
- "Pop Vocal": ["Noise Reduction", "Normalize", "EQ Match (Pop)", "Auto Gain"],
330
- "EDM Lead": ["Noise Reduction", "Tape Saturation", "Stereo Widening", "Limiter"],
331
- "Hip-Hop Beat": ["Bass Boost (60-200Hz)", "Treble Boost (7000-10000Hz)", "Compression"],
332
- "ASMR Whisper": ["Noise Gate", "Auto Gain", "Low-Pass Filter (5000Hz)"],
333
- "Jazz Piano Clean": ["Noise Reduction", "EQ Match (Jazz Piano)", "Normalize"],
334
- "Metal Guitar": ["Noise Reduction", "EQ Match (Metal)", "Compression"],
335
- "Podcast Intro": ["Echo", "Reverb", "Pitch Shift (+1 semitone)"],
336
- "Vintage Radio": ["Bitcrusher", "Low-Pass Filter (4000Hz)", "Saturation"],
337
- "Speech Enhancement": ["Noise Reduction", "High Pass Filter (100Hz)", "Normalize", "Auto Gain"],
338
- "Nightcore Speed": ["Pitch Shift (+3 semitones)", "Time Stretch (1.2x)", "Treble Boost"],
339
- "Robot Voice": ["Pitch Shift (-12 semitones)", "Bitcrusher", "Low-Pass Filter (2000Hz)"],
340
- "Underwater Effect": ["Low-Pass Filter (1000Hz)", "Reverb", "Echo"],
341
- "Alien Voice": ["Pitch Shift (+7 semitones)", "Tape Saturation", "Echo"],
342
- "Cinematic Voice": ["Reverb", "Limiter", "Bass Boost", "Auto Gain"],
343
- "Phone Call Sim": ["Low-Pass Filter (3400Hz)", "Noise Gate", "Compression"],
344
- "AI Generated Voice": ["TTS", "Pitch Shift", "Vocal Distortion"]
345
- }
346
-
347
- preset_names = list(preset_choices.keys())
348
 
349
- # Batch Processing
350
  def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
351
  try:
352
  output_dir = tempfile.mkdtemp()
353
- results = []
354
- session_logs = []
355
- for file in files:
356
- processed_path, _, log, _, _ = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)[0:5]
357
- results.append(processed_path)
358
- session_logs.append(log)
 
 
 
 
 
359
  zip_path = os.path.join(tempfile.gettempdir(), "batch_output.zip")
360
  with zipfile.ZipFile(zip_path, 'w') as zipf:
361
- for i, res in enumerate(results):
362
- filename = f"processed_{i}.{export_format.lower()}"
363
- zipf.write(res, filename)
364
- zipf.writestr(f"session_info_{i}.json", session_logs[i])
365
- return zip_path, "📦 ZIP created successfully!"
366
  except Exception as e:
367
- return None, f"❌ Batch processing failed: {str(e)}"
 
 
368
 
369
- # AI Remastering
370
  def ai_remaster(audio_path):
371
  try:
372
  audio = AudioSegment.from_file(audio_path)
373
  samples, sr = audiosegment_to_array(audio)
374
  reduced = nr.reduce_noise(y=samples, sr=sr)
375
- cleaned = array_to_audiosegment(reduced, sr, channels=audio.channels)
376
- cleaned_wav_path = os.path.join(tempfile.gettempdir(), "cleaned.wav")
377
- cleaned.export(cleaned_wav_path, format="wav")
378
- isolated_path = apply_vocal_isolation(cleaned_wav_path)
379
  final_path = ai_mastering_chain(isolated_path, genre="Pop", target_lufs=-14.0)
380
- return final_path
 
381
  except Exception as e:
382
- print(f"Remastering Error: {str(e)}")
383
  return None
384
 
385
  def ai_mastering_chain(audio_path, genre="Pop", target_lufs=-14.0):
386
  audio = AudioSegment.from_file(audio_path)
387
  audio = auto_eq(audio, genre=genre)
388
- audio = match_loudness(audio_path, target_lufs=target_lufs)
 
389
  audio = apply_stereo_widen(audio, pan_amount=0.3)
390
- out_path = os.path.join(tempfile.gettempdir(), "mastered_output.wav")
391
- audio.export(out_path, format="wav")
392
  return out_path
393
 
394
- # Harmonic Saturation
395
- def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
 
 
 
 
 
 
 
396
  samples = np.array(audio.get_array_of_samples()).astype(np.float32)
397
  if saturation_type == "Tube":
398
  saturated = np.tanh(intensity * samples)
@@ -404,73 +343,58 @@ def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
404
  saturated = np.log1p(np.abs(samples)) * np.sign(samples) * intensity
405
  else:
406
  saturated = samples
407
- return array_to_audiosegment(saturated.astype(np.int16), audio.frame_rate, channels=audio.channels)
 
 
 
408
 
409
- # Vocal Formant Correction
410
- def formant_correct(audio, shift=1.0):
411
- samples, sr = audiosegment_to_array(audio)
412
- corrected = librosa.effects.pitch_shift(samples, sr=sr, n_steps=shift)
413
- return array_to_audiosegment(corrected.astype(np.int16), sr, channels=audio.channels)
414
 
415
- # Voice Swap
416
- def clone_voice(source_audio, reference_audio):
417
- source = AudioSegment.from_file(source_audio)
418
- ref = AudioSegment.from_file(reference_audio)
419
- mixed = source.overlay(ref - 10)
420
- out_path = os.path.join(tempfile.gettempdir(), "cloned_output.wav")
421
- mixed.export(out_path, format="wav")
422
- return out_path
423
-
424
- # Save/Load Mix Session (.aiproj)
425
- def save_project(audio, preset, effects):
426
- project_data = {
427
- "audio": AudioSegment.from_file(audio).raw_data,
428
- "preset": preset,
429
- "effects": effects
430
- }
431
- out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
432
- with open(out_path, "wb") as f:
433
- pickle.dump(project_data, f)
434
- return out_path
435
 
436
- def load_project(project_file):
437
- with open(project_file.name, "rb") as f:
438
- data = pickle.load(f)
439
- return data["preset"], data["effects"]
440
 
441
- # Prompt-Based Editing
442
- def process_prompt(audio, prompt):
443
- return apply_noise_reduction(audio)
444
 
445
- # Vocal Pitch Correction
446
- def auto_tune_vocal(audio_path, target_key="C"):
447
  try:
448
- audio = AudioSegment.from_file(audio_path.name)
449
  semitones = key_to_semitone(target_key)
450
  tuned_audio = apply_pitch_shift(audio, semitones)
451
- out_path = save_audiosegment_to_temp(tuned_audio, ".wav")
452
- return out_path
 
453
  except Exception as e:
454
  print(f"Auto-Tune Error: {e}")
455
  return None
456
 
457
- def key_to_semitone(key="C"):
458
- keys = {"C": 0, "C#": 1, "D": 2, "D#": 3, "E": 4, "F": 5,
459
- "F#": 6, "G": 7, "G#": 8, "A": 9, "A#": 10, "B": 11}
460
- return keys.get(key, 0)
461
 
462
- # Loop Section Tool
463
- def loop_section(audio_path, start_ms, end_ms, loops=2):
464
- audio = AudioSegment.from_file(audio_path)
465
  section = audio[start_ms:end_ms]
466
  looped = section * loops
467
- out_path = os.path.join(tempfile.gettempdir(), "looped_output.wav")
468
- looped.export(out_path, format="wav")
469
- return out_path
 
 
470
 
471
- # Frequency Spectrum Visualization
472
- def visualize_spectrum(audio_path):
473
- y, sr = torchaudio.load(audio_path)
474
  y_np = y.numpy().flatten()
475
  stft = librosa.stft(y_np)
476
  db = librosa.amplitude_to_db(abs(stft))
@@ -485,15 +409,17 @@ def visualize_spectrum(audio_path):
485
  buf.seek(0)
486
  return Image.open(buf)
487
 
488
- # A/B Compare
 
489
  def compare_ab(track1_path, track2_path):
490
  return track1_path, track2_path
491
 
492
- # DAW Template Export
493
- def generate_ableton_template(stems):
 
494
  template = {
495
  "format": "Ableton Live",
496
- "stems": [os.path.basename(s) for s in stems],
497
  "effects": ["Reverb", "EQ", "Compression"],
498
  "tempo": 128,
499
  "title": "Studio Pulse Project"
@@ -503,270 +429,258 @@ def generate_ableton_template(stems):
503
  json.dump(template, f, indent=2)
504
  return out_path
505
 
506
- # Export Full Mix ZIP
507
- def export_full_mix(stems, final_mix):
 
508
  zip_path = os.path.join(tempfile.gettempdir(), "full_export.zip")
509
  with zipfile.ZipFile(zip_path, "w") as zipf:
510
- for i, stem in enumerate(stems):
511
- zipf.write(stem, f"stem_{i}.wav")
512
- zipf.write(final_mix, "final_mix.wav")
513
  return zip_path
514
 
515
- # Main UI
516
- with gr.Blocks(css="""
517
- body {
518
- font-family: 'Segoe UI', sans-serif;
519
- background-color: #1f2937;
520
- color: white;
521
- padding: 20px;
522
- }
523
- .studio-header {
524
- text-align: center;
525
- margin-bottom: 30px;
526
- animation: float 3s ease-in-out infinite;
527
- }
528
- @keyframes float {
529
- 0%, 100% { transform: translateY(0); }
530
- 50% { transform: translateY(-10px); }
531
- }
532
- .gr-button {
533
- background-color: #2563eb !important;
534
- color: white !important;
535
- border-radius: 10px;
536
- padding: 10px 20px;
537
- box-shadow: 0 0 10px #2563eb44;
538
- border: none;
539
- }
540
- input[type="text"], select, textarea {
541
- background-color: #334155 !important;
542
- color: white !important;
543
- border: 1px solid #475569 !important;
544
- width: 100%;
545
- padding: 10px;
546
  }
547
- """) as demo:
548
- gr.HTML('''
549
- <div class="studio-header">
550
- <h3>Where Your Audio Meets Intelligence</h3>
551
- </div>
552
- ''')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
  gr.Markdown("### Upload, edit, export — powered by AI!")
554
- # --- Single File Studio Tab ---
 
555
  with gr.Tab("🎵 Single File Studio"):
556
  with gr.Row():
557
- with gr.Column(min_width=300):
558
- input_audio = gr.Audio(label="Upload Audio", type="filepath")
559
- effect_checkbox = gr.CheckboxGroup(choices=preset_choices["Default"], label="Apply Effects in Order")
560
- preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
561
- export_format = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
562
- isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects")
563
- submit_btn = gr.Button("Process Audio")
564
- with gr.Column(min_width=300):
565
- output_audio = gr.Audio(label="Processed Audio", type="filepath")
 
 
 
566
  waveform_img = gr.Image(label="Waveform Preview")
567
- session_log_out = gr.Textbox(label="Session Log", lines=5)
568
- genre_out = gr.Textbox(label="Detected Genre", lines=1)
569
- status_box = gr.Textbox(label="Status", value="Ready", lines=1)
570
- submit_btn.click(fn=process_audio, inputs=[
571
- input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format
572
- ], outputs=[
573
- output_audio, waveform_img, session_log_out, genre_out, status_box
574
- ])
575
- # --- Remix Mode – Stem Splitting + Per-Stem Effects ---
 
 
 
 
 
 
 
 
 
 
 
576
  with gr.Tab("🎛 Remix Mode"):
577
  with gr.Row():
578
- with gr.Column(min_width=200):
579
- input_audio_remix = gr.Audio(label="Upload Music Track", type="filepath")
580
- split_button = gr.Button("Split Into Drums, Bass, Vocals, etc.")
581
- with gr.Column(min_width=400):
582
- stem_outputs = [
583
- gr.File(label="Vocals"),
584
- gr.File(label="Drums"),
585
- gr.File(label="Bass"),
586
- gr.File(label="Other")
587
- ]
588
- split_button.click(fn=stem_split, inputs=[input_audio_remix], outputs=stem_outputs)
589
- # --- AI Remastering Tab ---
590
- with gr.Tab("🔮 AI Remastering"):
591
- gr.Interface(
592
- fn=ai_remaster,
593
- inputs=gr.Audio(label="Upload Low-Quality Recording", type="filepath"),
594
- outputs=gr.Audio(label="Studio-Grade Output", type="filepath"),
595
- title="Transform Low-Quality Recordings to Studio Sound",
596
- description="Uses noise reduction, vocal isolation, and mastering to enhance old recordings.",
597
- allow_flagging="never"
598
  )
599
- # --- Harmonic Saturation / Exciter ---
 
 
 
 
 
 
 
 
 
 
600
  with gr.Tab("🧬 Harmonic Saturation"):
601
- gr.Interface(
602
- fn=harmonic_saturation,
603
- inputs=[
604
- gr.Audio(label="Upload Track", type="filepath"),
605
- gr.Dropdown(choices=["Tube", "Tape", "Console", "Mix Bus"], label="Saturation Type", value="Tube"),
606
- gr.Slider(minimum=0.1, maximum=1.0, value=0.2, label="Intensity")
607
- ],
608
- outputs=gr.Audio(label="Warm Output", type="filepath"),
609
- title="Add Analog-Style Warmth",
610
- description="Enhance clarity and presence using saturation styles like Tube or Tape.",
611
- allow_flagging="never"
612
- )
613
- # --- Vocal Doubler / Harmonizer ---
614
  with gr.Tab("🎧 Vocal Doubler / Harmonizer"):
615
- gr.Interface(
616
- fn=lambda x: apply_harmony(x),
617
- inputs=gr.Audio(label="Upload Vocal Clip", type="filepath"),
618
- outputs=gr.Audio(label="Doubled Output", type="filepath"),
619
- title="Add Vocal Doubling / Harmony",
620
- description="Enhance vocals with doubling or harmony"
621
- )
622
- # --- Batch Processing ---
623
  with gr.Tab("🔊 Batch Processing"):
624
- gr.Interface(
625
- fn=batch_process_audio,
626
- inputs=[
627
- gr.File(label="Upload Multiple Files", file_count="multiple"),
628
- gr.CheckboxGroup(choices=preset_choices["Default"], label="Apply Effects in Order"),
629
- gr.Checkbox(label="Isolate Vocals After Effects"),
630
- gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
631
- gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
632
- ],
633
- outputs=[
634
- gr.File(label="Download ZIP of All Processed Files"),
635
- gr.Textbox(label="Status", value="✅ Ready", lines=1)
636
- ],
637
- title="Batch Audio Processor",
638
- description="Upload multiple files, apply effects in bulk, and download all results in a single ZIP.",
639
- flagging_mode="never",
640
- submit_btn="Process All Files"
641
- )
642
- # --- Vocal Pitch Correction – Auto-Tune Style ---
643
  with gr.Tab("🎤 AI Auto-Tune"):
644
- gr.Interface(
645
- fn=auto_tune_vocal,
646
- inputs=[
647
- gr.File(label="Source Voice Clip"),
648
- gr.Textbox(label="Target Key", value="C", lines=1)
649
- ],
650
- outputs=gr.Audio(label="Pitch-Corrected Output", type="filepath"),
651
- title="AI Auto-Tune",
652
- description="Correct vocal pitch automatically using AI"
653
- )
654
- # --- Frequency Spectrum Tab – Real-time Visualizer ---
655
  with gr.Tab("📊 Frequency Spectrum"):
656
- gr.Interface(
657
- fn=visualize_spectrum,
658
- inputs=gr.Audio(label="Upload Track", type="filepath"),
659
- outputs=gr.Image(label="Spectrum Analysis")
660
- )
661
- # --- Loudness Graph Tab – EBU R128 Matching ---
662
  with gr.Tab("📈 Loudness Graph"):
663
- gr.Interface(
664
- fn=match_loudness,
665
- inputs=[
666
- gr.Audio(label="Upload Track", type="filepath"),
667
- gr.Slider(minimum=-24, maximum=-6, value=-14, label="Target LUFS")
668
- ],
669
- outputs=gr.Audio(label="Normalized Output", type="filepath"),
670
- title="Match Loudness Across Tracks",
671
- description="Ensure consistent volume using EBU R128 standard"
672
- )
673
- # --- Save/Load Mix Session (.aiproj) ---
674
  with gr.Tab("📁 Save/Load Project"):
675
  with gr.Row():
676
- with gr.Column(min_width=300):
677
- gr.Interface(
678
- fn=save_project,
679
- inputs=[
680
- gr.File(label="Original Audio"),
681
- gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
682
- gr.CheckboxGroup(choices=preset_choices["Default"], label="Applied Effects")
683
- ],
684
- outputs=gr.File(label="Project File (.aiproj)")
685
- )
686
- with gr.Column(min_width=300):
687
- gr.Interface(
688
- fn=load_project,
689
- inputs=gr.File(label="Upload .aiproj File"),
690
- outputs=[
691
- gr.Dropdown(choices=preset_names, label="Loaded Preset"),
692
- gr.CheckboxGroup(choices=preset_choices["Default"], label="Loaded Effects")
693
- ],
694
- title="Resume Last Project",
695
- description="Load your saved session"
696
- )
697
- # --- Prompt-Based Editing Tab ---
698
  with gr.Tab("🧠 Prompt-Based Editing"):
699
- gr.Interface(
700
- fn=process_prompt,
701
- inputs=[
702
- gr.File(label="Upload Audio", type="filepath"),
703
- gr.Textbox(label="Describe What You Want", lines=5)
704
- ],
705
- outputs=gr.Audio(label="Edited Output", type="filepath"),
706
- title="Type Your Edits – AI Does the Rest",
707
- description="Say what you want done and let AI handle it.",
708
- allow_flagging="never"
709
- )
710
- # --- Custom EQ Editor ---
711
  with gr.Tab("🎛 Custom EQ Editor"):
712
- gr.Interface(
713
- fn=auto_eq,
714
- inputs=[
715
- gr.Audio(label="Upload Track", type="filepath"),
716
- gr.Dropdown(choices=list(eq_map.keys()), label="Genre", value="Pop")
717
- ],
718
- outputs=gr.Audio(label="EQ-Enhanced Output", type="filepath"),
719
- title="Custom EQ by Genre",
720
- description="Apply custom EQ based on genre"
721
- )
722
- # --- A/B Compare Two Tracks ---
723
  with gr.Tab("🎯 A/B Compare"):
724
- gr.Interface(
725
- fn=compare_ab,
726
- inputs=[
727
- gr.Audio(label="Version A", type="filepath"),
728
- gr.Audio(label="Version B", type="filepath")
729
- ],
730
- outputs=[
731
- gr.Audio(label="Version A", type="filepath"),
732
- gr.Audio(label="Version B", type="filepath")
733
- ],
734
- title="Compare Two Versions",
735
- description="Hear two mixes side-by-side",
736
- allow_flagging="never"
737
- )
738
- # --- Loop Playback ---
739
  with gr.Tab("🔁 Loop Playback"):
740
- gr.Interface(
741
- fn=loop_section,
742
- inputs=[
743
- gr.Audio(label="Upload Track", type="filepath"),
744
- gr.Slider(minimum=0, maximum=30000, step=100, value=5000, label="Start MS"),
745
- gr.Slider(minimum=100, maximum=30000, step=100, value=10000, label="End MS"),
746
- gr.Slider(minimum=1, maximum=10, value=2, label="Repeat Loops")
747
- ],
748
- outputs=gr.Audio(label="Looped Output", type="filepath"),
749
- title="Repeat a Section",
750
- description="Useful for editing a specific part"
751
- )
752
- # --- Share Effect Chain Tab ---
753
  with gr.Tab("🔗 Share Effect Chain"):
754
- gr.Interface(
755
- fn=lambda x: json.dumps(x),
756
- inputs=gr.CheckboxGroup(choices=preset_choices["Default"]),
757
- outputs=gr.Textbox(label="Share Code", lines=2),
758
- title="Copy/Paste Effect Chain",
759
- description="Share your setup via link/code"
760
- )
761
  with gr.Tab("📥 Load Shared Chain"):
762
- gr.Interface(
763
- fn=json.loads,
764
- inputs=gr.Textbox(label="Paste Shared Code", lines=2),
765
- outputs=gr.CheckboxGroup(choices=preset_choices["Default"], label="Loaded Effects"),
766
- title="Restore From Shared Chain",
767
- description="Paste shared effect chain JSON to restore settings"
768
- )
769
- # --- Keyboard Shortcuts Tab ---
 
 
 
770
  with gr.Tab("⌨ Keyboard Shortcuts"):
771
  gr.Markdown("""
772
  ### Keyboard Controls
@@ -778,51 +692,36 @@ with gr.Blocks(css="""
778
  - `Ctrl + C`: Copy effect chain
779
  - `Ctrl + V`: Paste effect chain
780
  """)
781
- # --- Vocal Formant Correction ---
 
782
  with gr.Tab("🧑‍🎤 Vocal Formant Correction"):
783
- gr.Interface(
784
- fn=formant_correct,
785
- inputs=[
786
- gr.Audio(label="Upload Vocal Track", type="filepath"),
787
- gr.Slider(minimum=-2, maximum=2, value=1.0, label="Formant Shift")
788
- ],
789
- outputs=gr.Audio(label="Natural-Sounding Vocal", type="filepath"),
790
- title="Preserve Vocal Quality During Pitch Shift",
791
- description="Make pitch-shifted vocals sound more human"
792
- )
793
- # --- Voice Swap / Cloning ---
794
  with gr.Tab("🔁 Voice Swap / Cloning"):
795
- gr.Interface(
796
- fn=clone_voice,
797
- inputs=[
798
- gr.File(label="Source Voice Clip"),
799
- gr.File(label="Reference Voice")
800
- ],
801
- outputs=gr.Audio(label="Converted Output", type="filepath"),
802
- title="Swap Voices Using AI",
803
- description="Clone or convert voice from one to another"
804
- )
805
- # --- DAW Template Export ---
806
  with gr.Tab("🎛 DAW Template Export"):
807
- gr.Interface(
808
- fn=generate_ableton_template,
809
- inputs=[gr.File(label="Upload Stems", file_count="multiple")],
810
- outputs=gr.File(label="DAW Template (.json/.als/.flp)"),
811
- title="Generate Ableton/Live/FLP Template",
812
- description="Export ready-to-use templates for DAWs"
813
- )
814
- # --- Export Full Mix ZIP ---
815
  with gr.Tab("📁 Export Full Mix ZIP"):
816
- gr.Interface(
817
- fn=export_full_mix,
818
- inputs=[
819
- gr.File(label="Stems", file_count="multiple"),
820
- gr.File(label="Final Mix")
821
- ],
822
- outputs=gr.File(label="Full Mix Archive (.zip)"),
823
- title="Export Stems + Final Mix Together",
824
- description="Perfect for sharing with producers or archiving"
825
- )
826
 
827
- # Launch Gradio App
828
  demo.launch()
 
17
  import datetime
18
  import librosa
19
  import warnings
 
20
  from TTS.api import TTS
21
  import base64
22
  import pickle
 
26
  print("Gradio version:", gr.__version__)
27
  warnings.filterwarnings("ignore")
28
 
29
+ # === Utility Functions ===
30
+
31
+ def audiosegment_to_array(audio):
32
+ return np.array(audio.get_array_of_samples()), audio.frame_rate
33
+
34
+ def array_to_audiosegment(samples, frame_rate, channels=1):
35
+ return AudioSegment(
36
+ samples.tobytes(),
37
+ frame_rate=int(frame_rate),
38
+ sample_width=samples.dtype.itemsize,
39
+ channels=channels
40
+ )
41
+
42
+ def save_audiosegment_to_temp(audio: AudioSegment, suffix=".wav"):
43
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
44
+ audio.export(f.name, format=suffix.lstrip('.'))
45
+ return f.name
46
+
47
+ def load_audiofile_to_numpy(path):
48
+ samples, sr = sf.read(path, dtype="int16")
49
+ if samples.ndim > 1 and samples.shape[1] > 2:
50
+ samples = samples[:, :2]
51
+ return samples, sr
52
+
53
+ def show_waveform(audio_file):
54
+ try:
55
+ audio = AudioSegment.from_file(audio_file)
56
+ samples = np.array(audio.get_array_of_samples())
57
+ plt.figure(figsize=(10, 2))
58
+ plt.plot(samples[:10000], color="skyblue")
59
+ plt.axis("off")
60
+ buf = BytesIO()
61
+ plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
62
+ plt.close()
63
+ buf.seek(0)
64
+ return Image.open(buf)
65
+ except Exception:
66
+ return None
67
+
68
+ # === Effects ===
69
 
 
70
  def apply_normalize(audio):
71
  return audio.normalize()
72
 
73
  def apply_noise_reduction(audio):
74
+ samples, sr = audiosegment_to_array(audio)
75
+ reduced = nr.reduce_noise(y=samples, sr=sr)
76
+ return array_to_audiosegment(reduced, sr, channels=audio.channels)
77
 
78
  def apply_compression(audio):
79
  return audio.compress_dynamic_range()
 
84
 
85
  def apply_pitch_shift(audio, semitones=-2):
86
  new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
87
+ shifted = audio._spawn(audio.raw_data, overrides={"frame_rate": new_frame_rate}).set_frame_rate(audio.frame_rate)
88
+ return shifted
 
89
 
90
  def apply_echo(audio, delay_ms=500, decay=0.5):
91
  echo = audio - 10
 
127
 
128
  def apply_bitcrush(audio, bit_depth=8):
129
  samples = np.array(audio.get_array_of_samples())
130
+ max_val = 2 ** bit_depth - 1
131
  downsampled = np.round(samples / (32768 / max_val)).astype(np.int16)
132
  return array_to_audiosegment(downsampled, audio.frame_rate // 2, channels=audio.channels)
133
 
134
+ # === Loudness Matching ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
 
136
  try:
137
  import pyloudnorm as pyln
138
  except ImportError:
 
139
  import subprocess
140
  subprocess.run(["pip", "install", "pyloudnorm"])
141
  import pyloudnorm as pyln
 
150
  out_path = save_audiosegment_to_temp(adjusted, ".wav")
151
  return out_path
152
 
 
153
  eq_map = {
154
  "Pop": [(200, 500, -3), (2000, 4000, +4)],
155
  "EDM": [(60, 250, +6), (8000, 12000, +3)],
 
174
 
175
  def auto_eq(audio, genre="Pop"):
176
  from scipy.signal import butter, sosfilt
177
+ samples, sr = audiosegment_to_array(audio)
178
+ samples = samples.astype(np.float64)
179
  def band_eq(samples, sr, lowcut, highcut, gain):
180
  sos = butter(10, [lowcut, highcut], btype='band', output='sos', fs=sr)
181
  filtered = sosfilt(sos, samples)
182
  return samples + gain * filtered
183
 
184
+ for low, high, gain in eq_map.get(genre, []):
 
 
 
185
  samples = band_eq(samples, sr, low, high, gain)
186
  return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
187
 
188
  # === Vocal Isolation Helpers ===
189
+
190
  def load_track_local(path, sample_rate, channels=2):
191
  sig, rate = torchaudio.load(path)
192
  if rate != sample_rate:
 
211
  save_track(out_path, vocal_track, model.samplerate)
212
  return out_path
213
 
214
+ # === Stem Splitting ===
215
+
216
  def stem_split(audio_path):
217
  model = pretrained.get_model(name='htdemucs')
218
  wav = load_track_local(audio_path, model.samplerate, channels=2)
219
  sources = apply_model(model, wav[None])[0]
220
  output_dir = tempfile.mkdtemp()
221
+ file_paths = []
 
 
 
222
  for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
223
  path = os.path.join(output_dir, f"{name}.wav")
224
  save_track(path, sources[i].cpu(), model.samplerate)
225
+ file_paths.append(path)
226
+ return file_paths[3], file_paths[0], file_paths[1], file_paths[2]
227
+
228
+ # === Processing Function ===
229
 
 
230
  def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
 
231
  try:
232
  audio = AudioSegment.from_file(audio_file)
233
+ effect_map = {
 
234
  "Noise Reduction": apply_noise_reduction,
235
  "Compress Dynamic Range": apply_compression,
236
  "Add Reverb": apply_reverb,
237
+ "Pitch Shift": apply_pitch_shift,
238
  "Echo": apply_echo,
239
  "Stereo Widening": apply_stereo_widen,
240
  "Bass Boost": apply_bass_boost,
241
  "Treble Boost": apply_treble_boost,
242
  "Normalize": apply_normalize,
243
+ "Limiter": lambda a: apply_limiter(a, limit_dB=-1),
244
+ "Auto Gain": lambda a: apply_auto_gain(a, target_dB=-20),
245
+ "Vocal Distortion": apply_vocal_distortion,
246
+ "Stage Mode": apply_stage_mode,
247
+ "Harmony": apply_harmony,
248
+ "Bitcrusher": apply_bitcrush,
249
  }
250
+ for eff in selected_effects:
251
+ if eff in effect_map:
252
+ audio = effect_map[eff](audio)
253
+ if isolate_vocals:
254
+ temp_wav = save_audiosegment_to_temp(audio, suffix=".wav")
255
+ vocal_path = apply_vocal_isolation(temp_wav)
256
+ audio_out = AudioSegment.from_file(vocal_path)
257
+ else:
258
+ audio_out = audio
259
+ tmp_path = tempfile.mktemp(suffix=f".{export_format.lower()}")
260
+ audio_out.export(tmp_path, format=export_format.lower())
261
+ samples, sr = load_audiofile_to_numpy(tmp_path)
262
+ waveform = show_waveform(tmp_path)
263
+ session_log = json.dumps({
264
+ "timestamp": str(datetime.datetime.now()),
265
+ "filename": os.path.basename(audio_file),
266
+ "effects_applied": selected_effects,
267
+ "isolate_vocals": isolate_vocals,
268
+ "export_format": export_format,
269
+ "detected_genre": "Unknown"
270
+ }, indent=2)
271
+ return (samples, sr), waveform, session_log, "Unknown", "🎉 Done!"
272
  except Exception as e:
273
+ return None, None, f"❌ Error: {e}", "", f"❌ Error: {e}"
 
274
 
275
+ # === Batch Processing ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
 
277
  def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
278
  try:
279
  output_dir = tempfile.mkdtemp()
280
+ paths = []
281
+ logs = []
282
+ for i, f in enumerate(files):
283
+ samples_sr, _, log, _, _ = process_audio(f.name, selected_effects, isolate_vocals, preset_name, export_format)
284
+ if samples_sr is None:
285
+ continue
286
+ samples, sr = samples_sr
287
+ out_path = os.path.join(output_dir, f"processed_{i}.{export_format.lower()}")
288
+ sf.write(out_path, samples, sr)
289
+ paths.append(out_path)
290
+ logs.append(log)
291
  zip_path = os.path.join(tempfile.gettempdir(), "batch_output.zip")
292
  with zipfile.ZipFile(zip_path, 'w') as zipf:
293
+ for i, p in enumerate(paths):
294
+ zipf.write(p, os.path.basename(p))
295
+ zipf.writestr(f"session_log_{i}.json", logs[i])
296
+ return zip_path, "📦 Batch processing completed!"
 
297
  except Exception as e:
298
+ return None, f"❌ Batch processing failed: {e}"
299
+
300
+ # === AI Remaster ===
301
 
 
302
  def ai_remaster(audio_path):
303
  try:
304
  audio = AudioSegment.from_file(audio_path)
305
  samples, sr = audiosegment_to_array(audio)
306
  reduced = nr.reduce_noise(y=samples, sr=sr)
307
+ cleaned = array_to_audiosegment(reduced, sr, audio.channels)
308
+ cleaned_path = save_audiosegment_to_temp(cleaned, ".wav")
309
+ isolated_path = apply_vocal_isolation(cleaned_path)
 
310
  final_path = ai_mastering_chain(isolated_path, genre="Pop", target_lufs=-14.0)
311
+ samples, sr = load_audiofile_to_numpy(final_path)
312
+ return (samples, sr)
313
  except Exception as e:
314
+ print(f"Remastering error: {e}")
315
  return None
316
 
317
  def ai_mastering_chain(audio_path, genre="Pop", target_lufs=-14.0):
318
  audio = AudioSegment.from_file(audio_path)
319
  audio = auto_eq(audio, genre=genre)
320
+ loud_adj_path = match_loudness(audio_path, target_lufs)
321
+ audio = AudioSegment.from_file(loud_adj_path)
322
  audio = apply_stereo_widen(audio, pan_amount=0.3)
323
+ out_path = save_audiosegment_to_temp(audio, ".wav")
 
324
  return out_path
325
 
326
+ def apply_stereo_widen(audio, pan_amount=0.3):
327
+ left = audio.pan(-pan_amount)
328
+ right = audio.pan(pan_amount)
329
+ return AudioSegment.from_mono_audiosegments(left, right)
330
+
331
+ # === Harmonic Saturation ===
332
+
333
+ def harmonic_saturation(audio_path, saturation_type="Tube", intensity=0.2):
334
+ audio = AudioSegment.from_file(audio_path)
335
  samples = np.array(audio.get_array_of_samples()).astype(np.float32)
336
  if saturation_type == "Tube":
337
  saturated = np.tanh(intensity * samples)
 
343
  saturated = np.log1p(np.abs(samples)) * np.sign(samples) * intensity
344
  else:
345
  saturated = samples
346
+ saturated_audio = array_to_audiosegment(saturated.astype(np.int16), audio.frame_rate, audio.channels)
347
+ out_path = save_audiosegment_to_temp(saturated_audio, ".wav")
348
+ samples, sr = load_audiofile_to_numpy(out_path)
349
+ return (samples, sr)
350
 
351
+ # === Vocal Harmony ===
 
 
 
 
352
 
353
+ def run_harmony(audio_file):
354
+ if not audio_file:
355
+ return None, "❌ Upload a vocal clip first."
356
+ try:
357
+ audio = AudioSegment.from_file(audio_file)
358
+ out_audio = apply_harmony(audio)
359
+ tmp_path = save_audiosegment_to_temp(out_audio, ".wav")
360
+ samples, sr = load_audiofile_to_numpy(tmp_path)
361
+ return (samples, sr), "✅ Success"
362
+ except Exception as e:
363
+ return None, f"❌ Error: {e}"
 
 
 
 
 
 
 
 
 
364
 
365
+ # === Auto-Tune Helper ===
 
 
 
366
 
367
+ def key_to_semitone(key="C"):
368
+ keys = {"C":0,"C#":1,"D":2,"D#":3,"E":4,"F":5,"F#":6,"G":7,"G#":8,"A":9,"A#":10,"B":11}
369
+ return keys.get(key, 0)
370
 
371
+ def auto_tune_vocal(audio_file, target_key="C"):
 
372
  try:
373
+ audio = AudioSegment.from_file(audio_file.name)
374
  semitones = key_to_semitone(target_key)
375
  tuned_audio = apply_pitch_shift(audio, semitones)
376
+ tmp_path = save_audiosegment_to_temp(tuned_audio, ".wav")
377
+ samples, sr = load_audiofile_to_numpy(tmp_path)
378
+ return (samples, sr)
379
  except Exception as e:
380
  print(f"Auto-Tune Error: {e}")
381
  return None
382
 
383
+ # === Loop Section ===
 
 
 
384
 
385
+ def loop_section(audio_file, start_ms, end_ms, loops=2):
386
+ audio = AudioSegment.from_file(audio_file)
 
387
  section = audio[start_ms:end_ms]
388
  looped = section * loops
389
+ tmp_path = save_audiosegment_to_temp(looped, ".wav")
390
+ samples, sr = load_audiofile_to_numpy(tmp_path)
391
+ return (samples, sr)
392
+
393
+ # === Frequency Spectrum ===
394
 
395
+ def visualize_spectrum(audio_file):
396
+ y, sr = torchaudio.load(audio_file)
397
+ import librosa.display
398
  y_np = y.numpy().flatten()
399
  stft = librosa.stft(y_np)
400
  db = librosa.amplitude_to_db(abs(stft))
 
409
  buf.seek(0)
410
  return Image.open(buf)
411
 
412
+ # === Compare A/B ===
413
+
414
  def compare_ab(track1_path, track2_path):
415
  return track1_path, track2_path
416
 
417
+ # === DAW Template Export ===
418
+
419
+ def generate_ableton_template(stem_files):
420
  template = {
421
  "format": "Ableton Live",
422
+ "stems": [os.path.basename(s.name) for s in stem_files],
423
  "effects": ["Reverb", "EQ", "Compression"],
424
  "tempo": 128,
425
  "title": "Studio Pulse Project"
 
429
  json.dump(template, f, indent=2)
430
  return out_path
431
 
432
+ # === Full Mix ZIP Export ===
433
+
434
+ def export_full_mix(stem_files, final_mix_file):
435
  zip_path = os.path.join(tempfile.gettempdir(), "full_export.zip")
436
  with zipfile.ZipFile(zip_path, "w") as zipf:
437
+ for i, stem in enumerate(stem_files):
438
+ zipf.write(stem.name, f"stem_{i}.wav")
439
+ zipf.write(final_mix_file.name, "final_mix.wav")
440
  return zip_path
441
 
442
+ # === Save / Load Project ===
443
+
444
+ def save_project(audio_file, preset, effects):
445
+ audio = AudioSegment.from_file(audio_file.name)
446
+ project_data = {
447
+ "audio": audio.raw_data,
448
+ "preset": preset,
449
+ "effects": effects
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  }
451
+ out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
452
+ with open(out_path, "wb") as f:
453
+ pickle.dump(project_data, f)
454
+ return out_path
455
+
456
+ def load_project(project_file):
457
+ with open(project_file.name, "rb") as f:
458
+ data = pickle.load(f)
459
+ return data.get("preset", ""), data.get("effects", [])
460
+
461
+ # === Prompt-Based Editing ===
462
+
463
+ def process_prompt(audio_file, prompt):
464
+ audio = AudioSegment.from_file(audio_file)
465
+ # Placeholder: just apply noise reduction
466
+ processed_audio = apply_noise_reduction(audio)
467
+ tmp_path = save_audiosegment_to_temp(processed_audio, ".wav")
468
+ samples, sr = load_audiofile_to_numpy(tmp_path)
469
+ return (samples, sr)
470
+
471
+ # === Voice Swap / Cloning ===
472
+
473
+ def clone_voice(source_audio_file, reference_audio_file):
474
+ source = AudioSegment.from_file(source_audio_file.name)
475
+ ref = AudioSegment.from_file(reference_audio_file.name)
476
+ mixed = source.overlay(ref - 10)
477
+ tmp_path = save_audiosegment_to_temp(mixed, ".wav")
478
+ return tmp_path
479
+
480
+ # === Presets ===
481
+
482
+ preset_choices = {
483
+ # Paste your full preset dictionary here as before
484
+ "Default": [],
485
+ "Clean Podcast": ["Noise Reduction", "Normalize"],
486
+ "Podcast Mastered": ["Noise Reduction", "Normalize", "Compress Dynamic Range"],
487
+ # (all other presets as per your original list)
488
+ }
489
+
490
+ preset_names = list(preset_choices.keys())
491
+
492
+ # === Main Gradio App UI ===
493
+
494
+ with gr.Blocks() as demo:
495
+ gr.HTML('<h3 style="text-align:center">Where Your Audio Meets Intelligence</h3>')
496
  gr.Markdown("### Upload, edit, export — powered by AI!")
497
+
498
+ # Tab: Single File Studio
499
  with gr.Tab("🎵 Single File Studio"):
500
  with gr.Row():
501
+ with gr.Column():
502
+ audio_input = gr.Audio(label="Upload Audio", type="filepath")
503
+ effects_check = gr.CheckboxGroup(
504
+ choices=list({e for effects in preset_choices.values() for e in effects}),
505
+ label="Apply Effects in Order"
506
+ )
507
+ preset_dd = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
508
+ export_format_dd = gr.Dropdown(choices=["WAV", "MP3"], label="Export Format", value="WAV")
509
+ isolate_vocals_chk = gr.Checkbox(label="Isolate Vocals After Effects")
510
+ process_btn = gr.Button("Process Audio")
511
+ with gr.Column():
512
+ output_audio = gr.Audio(label="Processed Audio", type="numpy")
513
  waveform_img = gr.Image(label="Waveform Preview")
514
+ session_log = gr.Textbox(label="Session Log", lines=5)
515
+ genre_txt = gr.Textbox(label="Detected Genre", lines=1)
516
+ status_txt = gr.Textbox(label="Status", lines=1, value="Ready")
517
+
518
+ def update_effects_from_preset(preset_name):
519
+ return preset_choices.get(preset_name, [])
520
+
521
+ preset_dd.change(fn=update_effects_from_preset, inputs=preset_dd, outputs=effects_check)
522
+
523
+ def process_wrapper(audio, effects, isolate, preset, export_fmt):
524
+ effect_list = preset_choices.get(preset, []) if preset in preset_choices else effects
525
+ return process_audio(audio, effect_list, isolate, preset, export_fmt)
526
+
527
+ process_btn.click(
528
+ fn=process_wrapper,
529
+ inputs=[audio_input, effects_check, isolate_vocals_chk, preset_dd, export_format_dd],
530
+ outputs=[output_audio, waveform_img, session_log, genre_txt, status_txt]
531
+ )
532
+
533
+ # Tab: Remix Mode
534
  with gr.Tab("🎛 Remix Mode"):
535
  with gr.Row():
536
+ with gr.Column():
537
+ remix_input = gr.Audio(label="Upload Music Track", type="filepath")
538
+ split_btn = gr.Button("Split Into Drums, Bass, Vocals, etc.")
539
+ with gr.Column():
540
+ vocal_file = gr.File(label="Vocals")
541
+ drums_file = gr.File(label="Drums")
542
+ bass_file = gr.File(label="Bass")
543
+ other_file = gr.File(label="Other")
544
+ split_btn.click(
545
+ fn=stem_split,
546
+ inputs=remix_input,
547
+ outputs=[vocal_file, drums_file, bass_file, other_file]
 
 
 
 
 
 
 
 
548
  )
549
+
550
+ # Tab: AI Remastering
551
+ with gr.Tab("🔮 AI Remastering"):
552
+ remaster_input = gr.Audio(label="Upload Low-Quality Recording", type="filepath")
553
+ remaster_output = gr.Audio(label="Studio-Grade Output", type="numpy")
554
+ remaster_status = gr.Textbox(label="Status", value="Ready", interactive=False)
555
+ remaster_btn = gr.Button("Remaster")
556
+ remaster_btn.click(fn=ai_remaster, inputs=remaster_input, outputs=remaster_output)
557
+ remaster_btn.click(fn=lambda _: "Done!", inputs=remaster_btn, outputs=remaster_status)
558
+
559
+ # Tab: Harmonic Saturation
560
  with gr.Tab("🧬 Harmonic Saturation"):
561
+ sat_input = gr.Audio(label="Upload Track", type="filepath")
562
+ saturation_type = gr.Dropdown(choices=["Tube", "Tape", "Console", "Mix Bus"], label="Saturation Type", value="Tube")
563
+ sat_intensity = gr.Slider(minimum=0.1, maximum=1.0, value=0.2, label="Intensity")
564
+ sat_output = gr.Audio(label="Warm Output", type="numpy")
565
+ sat_btn = gr.Button("Apply Saturation")
566
+ sat_btn.click(fn=harmonic_saturation, inputs=[sat_input, saturation_type, sat_intensity], outputs=sat_output)
567
+
568
+ # Tab: Vocal Doubler / Harmonizer
 
 
 
 
 
569
  with gr.Tab("🎧 Vocal Doubler / Harmonizer"):
570
+ v_doubler_in = gr.Audio(label="Upload Vocal Clip", type="filepath")
571
+ v_doubler_out = gr.Audio(label="Doubled Output", type="numpy")
572
+ v_doubler_status = gr.Textbox(label="Status")
573
+ v_doubler_btn = gr.Button("Add Harmony")
574
+ v_doubler_btn.click(fn=run_harmony, inputs=v_doubler_in, outputs=[v_doubler_out, v_doubler_status])
575
+
576
+ # Tab: Batch Processing
 
577
  with gr.Tab("🔊 Batch Processing"):
578
+ batch_files_in = gr.File(label="Upload Multiple Files", file_count="multiple")
579
+ batch_effects = gr.CheckboxGroup(choices=list({e for effs in preset_choices.values() for e in effs}), label="Apply Effects in Order")
580
+ batch_isolate = gr.Checkbox(label="Isolate Vocals After Effects")
581
+ batch_preset = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
582
+ batch_export = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
583
+ batch_process_btn = gr.Button("Process All Files")
584
+ batch_download = gr.File(label="Download ZIP of All Processed Files")
585
+ batch_status = gr.Textbox(label="Status")
586
+ batch_process_btn.click(fn=batch_process_audio, inputs=[batch_files_in, batch_effects, batch_isolate, batch_preset, batch_export], outputs=[batch_download, batch_status])
587
+
588
+ # Tab: AI Auto-Tune
 
 
 
 
 
 
 
 
589
  with gr.Tab("🎤 AI Auto-Tune"):
590
+ autotune_file = gr.File(label="Source Voice Clip")
591
+ autotune_key = gr.Textbox(label="Target Key", value="C")
592
+ autotune_output = gr.Audio(label="Pitch-Corrected Output", type="numpy")
593
+ autotune_btn = gr.Button("Apply Auto-Tune")
594
+ autotune_btn.click(fn=auto_tune_vocal, inputs=[autotune_file, autotune_key], outputs=autotune_output)
595
+
596
+ # Tab: Frequency Spectrum
 
 
 
 
597
  with gr.Tab("📊 Frequency Spectrum"):
598
+ spec_input = gr.Audio(label="Upload Track", type="filepath")
599
+ spec_output = gr.Image(label="Frequency Spectrum")
600
+ spec_btn = gr.Button("Visualize Spectrum")
601
+ spec_btn.click(fn=visualize_spectrum, inputs=spec_input, outputs=spec_output)
602
+
603
+ # Tab: Loudness Graph
604
  with gr.Tab("📈 Loudness Graph"):
605
+ loudness_input = gr.Audio(label="Upload Track", type="filepath")
606
+ loudness_target = gr.Slider(minimum=-24, maximum=-6, value=-14, label="Target LUFS")
607
+ loudness_output = gr.Audio(label="Normalized Output", type="numpy")
608
+ loudness_btn = gr.Button("Match Loudness")
609
+ loudness_btn.click(fn=match_loudness, inputs=[loudness_input, loudness_target], outputs=loudness_output)
610
+
611
+ # Tab: Save / Load Project
 
 
 
 
612
  with gr.Tab("📁 Save/Load Project"):
613
  with gr.Row():
614
+ with gr.Column():
615
+ proj_audio = gr.File(label="Original Audio")
616
+ proj_preset = gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0])
617
+ proj_effects = gr.CheckboxGroup(choices=list({e for effs in preset_choices.values() for e in effs}), label="Applied Effects")
618
+ save_proj_btn = gr.Button("Save Project")
619
+ project_file = gr.File(label="Saved Project File (.aiproj)")
620
+ with gr.Column():
621
+ load_proj_file = gr.File(label="Load .aiproj File")
622
+ loaded_preset_out = gr.Dropdown(choices=preset_names, label="Loaded Preset")
623
+ loaded_effects_out = gr.CheckboxGroup(choices=list({e for effs in preset_choices.values() for e in effs}), label="Loaded Effects")
624
+ load_proj_btn = gr.Button("Load Project")
625
+
626
+ save_proj_btn.click(fn=save_project, inputs=[proj_audio, proj_preset, proj_effects], outputs=project_file)
627
+ load_proj_btn.click(fn=load_project, inputs=load_proj_file, outputs=[loaded_preset_out, loaded_effects_out])
628
+
629
+ # Tab: Prompt-Based Editing
 
 
 
 
 
 
630
  with gr.Tab("🧠 Prompt-Based Editing"):
631
+ prompt_audio = gr.File(label="Upload Audio", file_types=[".wav", ".mp3"])
632
+ prompt_text = gr.Textbox(label="Describe What You Want", lines=5)
633
+ prompt_output = gr.Audio(label="Edited Output", type="numpy")
634
+ prompt_btn = gr.Button("Process Prompt")
635
+ prompt_btn.click(fn=process_prompt, inputs=[prompt_audio, prompt_text], outputs=prompt_output)
636
+
637
+ # Tab: Custom EQ Editor
 
 
 
 
 
638
  with gr.Tab("🎛 Custom EQ Editor"):
639
+ eq_audio = gr.Audio(label="Upload Track", type="filepath")
640
+ eq_genre = gr.Dropdown(choices=list(eq_map.keys()), value="Pop", label="Genre")
641
+ eq_output = gr.Audio(label="EQ-Enhanced Output", type="numpy")
642
+ eq_btn = gr.Button("Apply EQ")
643
+ eq_btn.click(fn=auto_eq, inputs=[eq_audio, eq_genre], outputs=eq_output)
644
+
645
+ # Tab: A/B Compare
 
 
 
 
646
  with gr.Tab("🎯 A/B Compare"):
647
+ ab_input_a = gr.Audio(label="Version A", type="filepath")
648
+ ab_input_b = gr.Audio(label="Version B", type="filepath")
649
+ ab_output_a = gr.Audio(label="Version A", type="filepath")
650
+ ab_output_b = gr.Audio(label="Version B", type="filepath")
651
+ ab_compare_btn = gr.Button("Compare")
652
+ ab_compare_btn.click(fn=compare_ab, inputs=[ab_input_a, ab_input_b], outputs=[ab_output_a, ab_output_b])
653
+
654
+ # Tab: Loop Playback
 
 
 
 
 
 
 
655
  with gr.Tab("🔁 Loop Playback"):
656
+ loop_audio = gr.Audio(label="Upload Track", type="filepath")
657
+ loop_start = gr.Slider(minimum=0, maximum=30000, step=100, value=5000, label="Start MS")
658
+ loop_end = gr.Slider(minimum=100, maximum=30000, step=100, value=10000, label="End MS")
659
+ loop_repeats = gr.Slider(minimum=1, maximum=10, value=2, label="Repeat Loops")
660
+ loop_output = gr.Audio(label="Looped Output", type="numpy")
661
+ loop_btn = gr.Button("Loop Section")
662
+ loop_btn.click(fn=loop_section, inputs=[loop_audio, loop_start, loop_end, loop_repeats], outputs=loop_output)
663
+
664
+ # Tab: Share Effect Chain
 
 
 
 
665
  with gr.Tab("🔗 Share Effect Chain"):
666
+ share_effects = gr.CheckboxGroup(choices=list({e for effs in preset_choices.values() for e in effs}), label="Select Effects")
667
+ share_code = gr.Textbox(label="Share Code", lines=2)
668
+ share_btn = gr.Button("Generate Share Code")
669
+ share_btn.click(fn=lambda x: json.dumps(sorted(x)), inputs=share_effects, outputs=share_code)
670
+
671
+ # Tab: Load Shared Chain
 
672
  with gr.Tab("📥 Load Shared Chain"):
673
+ load_code = gr.Textbox(label="Paste Shared Code", lines=2)
674
+ loaded_effects = gr.CheckboxGroup(choices=list({e for effs in preset_choices.values() for e in effs}), label="Loaded Effects")
675
+ load_code_btn = gr.Button("Load Effects")
676
+ def load_shared_code(code_str):
677
+ try:
678
+ return json.loads(code_str)
679
+ except:
680
+ return []
681
+ load_code_btn.click(fn=load_shared_code, inputs=load_code, outputs=loaded_effects)
682
+
683
+ # Tab: Keyboard Shortcuts
684
  with gr.Tab("⌨ Keyboard Shortcuts"):
685
  gr.Markdown("""
686
  ### Keyboard Controls
 
692
  - `Ctrl + C`: Copy effect chain
693
  - `Ctrl + V`: Paste effect chain
694
  """)
695
+
696
+ # Tab: Vocal Formant Correction
697
  with gr.Tab("🧑‍🎤 Vocal Formant Correction"):
698
+ formant_audio = gr.Audio(label="Upload Vocal Track", type="filepath")
699
+ formant_shift = gr.Slider(minimum=-2, maximum=2, value=1.0, step=0.1, label="Formant Shift")
700
+ formant_output = gr.Audio(label="Natural-Sounding Vocal", type="numpy")
701
+ formant_btn = gr.Button("Apply Correction")
702
+ formant_btn.click(fn=formant_correct, inputs=[formant_audio, formant_shift], outputs=formant_output)
703
+
704
+ # Tab: Voice Swap / Cloning
 
 
 
 
705
  with gr.Tab("🔁 Voice Swap / Cloning"):
706
+ source_voice = gr.File(label="Source Voice Clip")
707
+ reference_voice = gr.File(label="Reference Voice")
708
+ clone_output = gr.Audio(label="Converted Output", type="numpy")
709
+ clone_btn = gr.Button("Clone Voice")
710
+ clone_btn.click(fn=clone_voice, inputs=[source_voice, reference_voice], outputs=clone_output)
711
+
712
+ # Tab: DAW Template Export
 
 
 
 
713
  with gr.Tab("🎛 DAW Template Export"):
714
+ daw_stems = gr.File(label="Upload Stems", file_count="multiple")
715
+ daw_output = gr.File(label="DAW Template (.json/.als/.flp)")
716
+ daw_btn = gr.Button("Generate Template")
717
+ daw_btn.click(fn=generate_ableton_template, inputs=daw_stems, outputs=daw_output)
718
+
719
+ # Tab: Export Full Mix ZIP
 
 
720
  with gr.Tab("📁 Export Full Mix ZIP"):
721
+ mix_stems = gr.File(label="Stems", file_count="multiple")
722
+ final_mix = gr.File(label="Final Mix")
723
+ export_zip_out = gr.File(label="Full Mix Archive (.zip)")
724
+ export_zip_btn = gr.Button("Export ZIP")
725
+ export_zip_btn.click(fn=export_full_mix, inputs=[mix_stems, final_mix], outputs=export_zip_out)
 
 
 
 
 
726
 
 
727
  demo.launch()