tee342 commited on
Commit
3a131d6
Β·
verified Β·
1 Parent(s): 7841500

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +179 -12
app.py CHANGED
@@ -18,6 +18,9 @@ import datetime
18
  import librosa
19
  import joblib
20
  import warnings
 
 
 
21
 
22
  # Suppress warnings for cleaner logs
23
  warnings.filterwarnings("ignore")
@@ -51,7 +54,7 @@ def apply_reverb(audio):
51
  return audio.overlay(reverb, position=1000)
52
 
53
  def apply_pitch_shift(audio, semitones=-2):
54
- new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
55
  samples = np.array(audio.get_array_of_samples())
56
  resampled = np.interp(
57
  np.arange(0, len(samples), 2 ** (semitones / 12)),
@@ -172,7 +175,6 @@ def detect_genre(audio_path):
172
  try:
173
  y, sr = torchaudio.load(audio_path)
174
  mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
175
- # Dummy classifier – replace with real one later
176
  return "Speech"
177
  except Exception:
178
  return "Unknown"
@@ -195,7 +197,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
195
  try:
196
  audio = AudioSegment.from_file(audio_file)
197
  status = "πŸ›  Applying effects..."
198
-
199
  effect_map = {
200
  "Noise Reduction": apply_noise_reduction,
201
  "Compress Dynamic Range": apply_compression,
@@ -214,7 +216,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
214
  audio = effect_map[effect_name](audio)
215
 
216
  status = "πŸ’Ύ Saving final audio..."
217
- with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as f:
218
  if isolate_vocals:
219
  temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
220
  audio.export(temp_input, format="wav")
@@ -262,7 +264,87 @@ def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, ex
262
  except Exception as e:
263
  return None, f"❌ Batch processing failed: {str(e)}"
264
 
265
- # === Gradio Interface Setup ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  effect_options = [
267
  "Noise Reduction",
268
  "Compress Dynamic Range",
@@ -275,13 +357,10 @@ effect_options = [
275
  "Normalize"
276
  ]
277
 
278
- # === Multi-Tab UI ===
279
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
280
- gr.Markdown("""
281
- # 🎧 AI Audio Studio – Powered by Hugging Face & Demucs
282
- Upload, edit, and export audio with AI-powered tools.
283
- """)
284
 
 
285
  with gr.Tab("🎡 Single File Studio"):
286
  gr.Interface(
287
  fn=process_audio,
@@ -306,11 +385,12 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
306
  clear_btn=None
307
  )
308
 
 
309
  with gr.Tab("πŸ”Š Batch Processing"):
310
  gr.Interface(
311
  fn=batch_process_audio,
312
  inputs=[
313
- gr.File(label="Upload Multiple Audio Files", file_count="multiple"),
314
  gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
315
  gr.Checkbox(label="Isolate Vocals After Effects"),
316
  gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
@@ -327,7 +407,8 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
327
  clear_btn=None
328
  )
329
 
330
- with gr.Tab("πŸŽ› Remix Mode (Split Stems)"):
 
331
  gr.Interface(
332
  fn=stem_split,
333
  inputs=gr.Audio(label="Upload Music Track", type="filepath"),
@@ -343,4 +424,90 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
343
  clear_btn=None
344
  )
345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  demo.launch()
 
18
  import librosa
19
  import joblib
20
  import warnings
21
+ from faster_whisper import WhisperModel
22
+ from mutagen.mp3 import MP3
23
+ from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
24
 
25
  # Suppress warnings for cleaner logs
26
  warnings.filterwarnings("ignore")
 
54
  return audio.overlay(reverb, position=1000)
55
 
56
  def apply_pitch_shift(audio, semitones=-2):
57
+ new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12))
58
  samples = np.array(audio.get_array_of_samples())
59
  resampled = np.interp(
60
  np.arange(0, len(samples), 2 ** (semitones / 12)),
 
175
  try:
176
  y, sr = torchaudio.load(audio_path)
177
  mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
 
178
  return "Speech"
179
  except Exception:
180
  return "Unknown"
 
197
  try:
198
  audio = AudioSegment.from_file(audio_file)
199
  status = "πŸ›  Applying effects..."
200
+
201
  effect_map = {
202
  "Noise Reduction": apply_noise_reduction,
203
  "Compress Dynamic Range": apply_compression,
 
216
  audio = effect_map[effect_name](audio)
217
 
218
  status = "πŸ’Ύ Saving final audio..."
219
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
220
  if isolate_vocals:
221
  temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
222
  audio.export(temp_input, format="wav")
 
264
  except Exception as e:
265
  return None, f"❌ Batch processing failed: {str(e)}"
266
 
267
+ # === Whisper Transcription Tab ===
268
+ whisper_model = WhisperModel("base")
269
+
270
+ def transcribe_audio(audio_path):
271
+ segments, info = whisper_model.transcribe(audio_path, beam_size=5)
272
+ text = " ".join([seg.text for seg in segments])
273
+ return text
274
+
275
+ # === TTS Tab ===
276
+ from TTS.api import TTS
277
+
278
+ tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
279
+
280
+ def generate_tts(text):
281
+ out_path = os.path.join(tempfile.gettempdir(), "tts_output.wav")
282
+ tts.tts_to_file(text=text, file_path=out_path)
283
+ return out_path
284
+
285
+ # === Analyze Audio Stats ===
286
+ def analyze_audio(audio_path):
287
+ y, sr = torchaudio.load(audio_path)
288
+ rms = np.mean(librosa.feature.rms(y=y.numpy().flatten()))
289
+ tempo, _ = librosa.beat.beat_track(y=y.numpy().flatten(), sr=sr)
290
+ silence_ratio = np.mean(np.abs(y.numpy()) < 0.01)
291
+
292
+ plt.figure(figsize=(10, 4))
293
+ plt.plot(y.numpy().flatten(), color="lightblue")
294
+ plt.title("Loudness Over Time")
295
+ plt.tight_layout()
296
+ buf = BytesIO()
297
+ plt.savefig(buf, format="png")
298
+ plt.close()
299
+ image = Image.open(buf)
300
+
301
+ stats = {
302
+ "rms_loudness": float(rms),
303
+ "silence_ratio": float(silence_ratio),
304
+ "tempo_bpm": int(tempo)
305
+ }
306
+
307
+ return stats, image
308
+
309
+ # === Vocal Removal (Karaoke Mode) ===
310
+ def vocal_removal(audio_path):
311
+ stems = stem_split(audio_path)
312
+ instrumental = stems[0] + stems[1] + stems[2] # drums + bass + other
313
+ out_path = os.path.join(tempfile.gettempdir(), "instrumental.wav")
314
+ torchaudio.save(out_path, instrumental, 44100)
315
+ return out_path
316
+
317
+ # === Metadata Tagging ===
318
+ def tag_mp3(file_path, title, artist, album, year):
319
+ try:
320
+ audio = MP3(file_path)
321
+ try:
322
+ audio.tags = ID3()
323
+ except:
324
+ audio.add_tags()
325
+ audio.tags.add(TIT2(encoding=3, text=title))
326
+ audio.tags.add(TPE1(encoding=3, text=artist))
327
+ if album:
328
+ audio.tags.add(TALB(encoding=3, text=album))
329
+ if year:
330
+ audio.tags.add(TYER(encoding=3, text=str(year)))
331
+ audio.save()
332
+ return file_path
333
+ except Exception as e:
334
+ return None
335
+
336
+ # === Voice Style Transfer (Dummy) ===
337
+ def apply_style_transfer(audio_path, mood="Happy"):
338
+ # Replace with real model later
339
+ return audio_path
340
+
341
+ # === Session Sharing (URL Encode) ===
342
+ def encode_preset(selected_effects, preset_name, export_format):
343
+ data = {"effects": selected_effects, "preset": preset_name, "format": export_format}
344
+ encoded = base64.b64encode(json.dumps(data).encode()).decode()
345
+ return f"https://huggingface.co/spaces/YOUR_USERNAME/AudioMaster?preset={encoded}"
346
+
347
+ # === UI ===
348
  effect_options = [
349
  "Noise Reduction",
350
  "Compress Dynamic Range",
 
357
  "Normalize"
358
  ]
359
 
 
360
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
361
+ gr.Markdown("## 🎧 AI Audio Studio – The Ultimate AI-Powered Tool\nUpload, edit, and export polished tracks β€” all powered by AI!")
 
 
 
362
 
363
+ # --- Single File Studio ---
364
  with gr.Tab("🎡 Single File Studio"):
365
  gr.Interface(
366
  fn=process_audio,
 
385
  clear_btn=None
386
  )
387
 
388
+ # --- Batch Processing ---
389
  with gr.Tab("πŸ”Š Batch Processing"):
390
  gr.Interface(
391
  fn=batch_process_audio,
392
  inputs=[
393
+ gr.File(label="Upload Multiple Files", file_count="multiple"),
394
  gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
395
  gr.Checkbox(label="Isolate Vocals After Effects"),
396
  gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
 
407
  clear_btn=None
408
  )
409
 
410
+ # --- Remix Mode ---
411
+ with gr.Tab("πŸŽ› Remix Mode"):
412
  gr.Interface(
413
  fn=stem_split,
414
  inputs=gr.Audio(label="Upload Music Track", type="filepath"),
 
424
  clear_btn=None
425
  )
426
 
427
+ # --- Transcribe & Edit ---
428
+ with gr.Tab("πŸ“ Transcribe & Edit"):
429
+ gr.Interface(
430
+ fn=transcribe_audio,
431
+ inputs=gr.Audio(label="Upload Audio", type="filepath"),
432
+ outputs=gr.Textbox(label="Transcribed Text", lines=10),
433
+ title="Transcribe & Edit Spoken Content",
434
+ description="Convert voice to text, then edit the script before exporting again."
435
+ )
436
+
437
+ # --- TTS Voice Generator ---
438
+ with gr.Tab("πŸ’¬ TTS Voice Generator"):
439
+ gr.Interface(
440
+ fn=generate_tts,
441
+ inputs=gr.Textbox(label="Enter Text", lines=5),
442
+ outputs=gr.Audio(label="Generated Speech", type="filepath"),
443
+ title="Text-to-Speech Generator",
444
+ description="Type anything and turn it into natural-sounding speech."
445
+ )
446
+
447
+ # --- Audio Analysis Dashboard ---
448
+ with gr.Tab("πŸ“Š Audio Analysis"):
449
+ gr.Interface(
450
+ fn=analyze_audio,
451
+ inputs=gr.Audio(label="Upload Track", type="filepath"),
452
+ outputs=[
453
+ gr.JSON(label="Audio Stats"),
454
+ gr.Image(label="Waveform Graph")
455
+ ],
456
+ title="View Loudness, BPM, Silence, and More",
457
+ description="Analyze audio loudness, tempo, and frequency content."
458
+ )
459
+
460
+ # --- Voice Style Transfer ---
461
+ with gr.Tab("🧠 Voice Style Transfer"):
462
+ gr.Interface(
463
+ fn=apply_style_transfer,
464
+ inputs=[
465
+ gr.Audio(label="Upload Voice Clip", type="filepath"),
466
+ gr.Radio(["Happy", "Sad", "Angry", "Calm"], label="Choose Tone")
467
+ ],
468
+ outputs=gr.Audio(label="Stylized Output", type="filepath"),
469
+ title="Change Emotional Tone of Voice",
470
+ description="Shift the emotional style of any voice clip."
471
+ )
472
+
473
+ # --- Session Sharing ---
474
+ with gr.Tab("🧾 Session Sharing"):
475
+ gr.Interface(
476
+ fn=encode_preset,
477
+ inputs=[
478
+ gr.CheckboxGroup(choices=effect_options, label="Effects"),
479
+ gr.Dropdown(choices=preset_names, label="Preset"),
480
+ gr.Dropdown(choices=["MP3", "WAV"], label="Format")
481
+ ],
482
+ outputs=gr.Textbox(label="Shareable Link", lines=1),
483
+ title="Save Your Settings and Share Them",
484
+ description="Generate a link to share your effect chain with others."
485
+ )
486
+
487
+ # --- Vocal Removal (Karaoke Mode) ---
488
+ with gr.Tab("🎯 Vocal Removal (Karaoke Mode)"):
489
+ gr.Interface(
490
+ fn=vocal_removal,
491
+ inputs=gr.Audio(label="Upload Song", type="filepath"),
492
+ outputs=gr.Audio(label="Instrumental Only", type="filepath"),
493
+ title="Remove Vocals from Any Track",
494
+ description="Create karaoke versions using AI"
495
+ )
496
+
497
+ # --- Metadata Tagging ---
498
+ with gr.Tab("πŸ—‚ Add MP3 Tags"):
499
+ gr.Interface(
500
+ fn=tag_mp3,
501
+ inputs=[
502
+ gr.File(label="Upload MP3/WAV"),
503
+ gr.Textbox(label="Title"),
504
+ gr.Textbox(label="Artist"),
505
+ gr.Textbox(label="Album"),
506
+ gr.Number(label="Year")
507
+ ],
508
+ outputs=gr.File(label="Tagged Audio File"),
509
+ title="Add Title, Artist, Album, Year to MP3",
510
+ description="Enhance your exported files with metadata tags"
511
+ )
512
+
513
  demo.launch()