tee342 commited on
Commit
1cda55b
Β·
verified Β·
1 Parent(s): f9afce2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -171
app.py CHANGED
@@ -118,9 +118,40 @@ def apply_stage_mode(audio):
118
  processed = apply_bass_boost(processed, gain=6)
119
  return apply_limiter(processed, limit_dB=-2)
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  # === Auto-EQ per Genre ===
122
  def auto_eq(audio, genre="Pop"):
123
- # Define frequency bands based on genre
124
  eq_map = {
125
  "Pop": [(200, 500, -3), (2000, 4000, +4)], # Cut muddiness, boost vocals
126
  "EDM": [(60, 250, +6), (8000, 12000, +3)], # Maximize bass & sparkle
@@ -169,7 +200,7 @@ def process_prompt(audio_path, prompt):
169
  audio = apply_reverb(audio)
170
 
171
  if "pitch" in prompt.lower() and "correct" in prompt.lower():
172
- audio = apply_pitch_shift(audio, 0) # Placeholder
173
 
174
  if "harmony" in prompt.lower() or "double" in prompt.lower():
175
  audio = apply_harmony(audio)
@@ -178,42 +209,20 @@ def process_prompt(audio_path, prompt):
178
  audio.export(out_path, format="wav")
179
  return out_path
180
 
181
- # === Real-Time EQ Sliders ===
182
- def real_time_eq(audio, low_gain=0, mid_gain=0, high_gain=0):
183
- samples, sr = audiosegment_to_array(audio)
184
- samples = samples.astype(np.float64)
185
-
186
- # Low EQ: 20–500Hz
187
- sos_low = butter(10, [20, 500], btype='band', output='sos', fs=sr)
188
- samples = sosfilt(sos_low, samples) * (10 ** (low_gain / 20))
189
-
190
- # Mid EQ: 500–4000Hz
191
- sos_mid = butter(10, [500, 4000], btype='band', output='sos', fs=sr)
192
- samples += sosfilt(sos_mid, samples) * (10 ** (mid_gain / 20))
193
-
194
- # High EQ: 4000–20000Hz
195
- sos_high = butter(10, [4000, 20000], btype='high', output='sos', fs=sr)
196
- samples += sosfilt(sos_high, samples) * (10 ** (high_gain / 20))
197
-
198
- return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
199
-
200
- # === AI Suggest Presets Based on Genre ===
201
- genre_preset_map = {
202
- "Speech": ["Clean Podcast", "Normalize"],
203
- "Pop": ["Vocal Clarity", "Limiter", "Stereo Expansion"],
204
- "EDM": ["Heavy Bass", "Stereo Expansion", "Limiter", "Phaser"],
205
- "Rock": ["Distortion", "Punchy Mids", "Reverb"],
206
- "Hip-Hop": ["Deep Bass", "Vocal Presence", "Saturation"]
207
- }
208
-
209
  def suggest_preset_by_genre(audio_path):
210
  try:
211
  y, sr = torchaudio.load(audio_path)
212
  mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
213
  genre = "Pop"
214
- return genre_preset_map.get(genre, ["Default"])
215
  except Exception:
216
- return ["Default"]
 
 
 
 
 
217
 
218
  # === Create Karaoke Video from Audio + Lyrics ===
219
  def create_karaoke_video(audio_path, lyrics, bg_image=None):
@@ -534,6 +543,7 @@ def diarize_and_transcribe(audio_path):
534
  def visualize_spectrum(audio_path):
535
  y, sr = torchaudio.load(audio_path)
536
  y_np = y.numpy().flatten()
 
537
  stft = librosa.stft(y_np)
538
  db = librosa.amplitude_to_db(abs(stft))
539
 
@@ -548,32 +558,6 @@ def visualize_spectrum(audio_path):
548
  buf.seek(0)
549
  return Image.open(buf)
550
 
551
- # === Real-Time EQ Slider Wrapper ===
552
- def real_time_eq_slider(audio, low_gain, mid_gain, high_gain):
553
- return real_time_eq(audio, low_gain, mid_gain, high_gain)
554
-
555
- # === Cloud Project Sync (Premium Feature) ===
556
- def cloud_save_project(audio, preset, effects, project_name, project_id=""):
557
- project_data = {
558
- "audio": audio,
559
- "preset": preset,
560
- "effects": effects
561
- }
562
- project_path = os.path.join(tempfile.gettempdir(), f"{project_name}.aiproj")
563
- with open(project_path, "wb") as f:
564
- pickle.dump(project_data, f)
565
- return project_path, f"βœ… '{project_name}' saved to cloud"
566
-
567
- def cloud_load_project(project_id):
568
- if not project_id:
569
- return None, None, None
570
- try:
571
- with open(project_id, "rb") as f:
572
- data = pickle.load(f)
573
- return data["audio"], data["preset"], data["effects"]
574
- except Exception:
575
- return None, None, None
576
-
577
  # === UI ===
578
  effect_options = [
579
  "Noise Reduction",
@@ -663,35 +647,43 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
663
  clear_btn=None
664
  )
665
 
666
- # --- Genre Mastering Tab ===
667
- with gr.Tab("🎧 Genre Mastering"):
668
  gr.Interface(
669
- fn=lambda audio, genre: auto_eq(audio, genre),
670
  inputs=[
671
  gr.Audio(label="Upload Track", type="filepath"),
672
- gr.Dropdown(choices=list(genre_preset_map.keys()), label="Select Genre", value="Pop")
673
  ],
674
- outputs=gr.Audio(label="Mastered Output", type="filepath"),
675
- title="Genre-Specific Mastering",
676
- description="Apply professionally tuned mastering settings for popular music genres."
677
  )
678
 
679
- # --- Real-Time EQ ===
680
- with gr.Tab("πŸŽ› Real-Time EQ"):
681
  gr.Interface(
682
- fn=real_time_eq_slider,
683
  inputs=[
684
  gr.Audio(label="Upload Track", type="filepath"),
685
- gr.Slider(minimum=-12, maximum=12, value=0, label="Low Gain (-200–500Hz)"),
686
- gr.Slider(minimum=-12, maximum=12, value=0, label="Mid Gain (500Hz–4kHz)"),
687
- gr.Slider(minimum=-12, maximum=12, value=0, label="High Gain (4kHz+)"),
688
  ],
689
- outputs=gr.Audio(label="EQ'd Output", type="filepath"),
690
- title="Adjust Frequency Bands Live",
691
- description="Fine-tune your sound using real-time sliders for low, mid, and high frequencies."
692
  )
693
 
694
- # --- Spectrum Visualizer ===
 
 
 
 
 
 
 
 
 
 
695
  with gr.Tab("πŸ“Š Frequency Spectrum"):
696
  gr.Interface(
697
  fn=visualize_spectrum,
@@ -715,24 +707,40 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
715
  allow_flagging="never"
716
  )
717
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
718
  # --- Vocal Presets for Singers ===
719
  with gr.Tab("🎀 Vocal Presets for Singers"):
720
  gr.Interface(
721
  fn=process_audio,
722
  inputs=[
723
  gr.Audio(label="Upload Vocal Track", type="filepath"),
724
- gr.CheckboxGroup(choices=[
725
- "Noise Reduction",
726
- "Normalize",
727
- "Compress Dynamic Range",
728
- "Bass Boost",
729
- "Treble Boost",
730
- "Reverb",
731
- "Auto Gain",
732
- "Vocal Distortion",
733
- "Harmony",
734
- "Stage Mode"
735
- ]),
736
  gr.Checkbox(label="Isolate Vocals After Effects"),
737
  gr.Dropdown(choices=preset_names, label="Select Vocal Preset", value=preset_names[0]),
738
  gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
@@ -764,7 +772,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
764
  )
765
 
766
  # --- Speaker Diarization ("Who Spoke When?") ===
767
- if diarize_pipeline:
768
  with gr.Tab("πŸ§β€β™‚οΈ Who Spoke When?"):
769
  gr.Interface(
770
  fn=diarize_and_transcribe,
@@ -833,87 +841,4 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
833
  description="Detect and trim silence at start/end or between words"
834
  )
835
 
836
- # --- Save/Load Project File (.aiproj) ===
837
- with gr.Tab("πŸ“ Save/Load Project"):
838
- gr.Interface(
839
- fn=save_project,
840
- inputs=[
841
- gr.File(label="Original Audio"),
842
- gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
843
- gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
844
- ],
845
- outputs=gr.File(label="Project File (.aiproj)"),
846
- title="Save Everything Together",
847
- description="Save your session, effects, and settings in one file to reuse later."
848
- )
849
-
850
- gr.Interface(
851
- fn=load_project,
852
- inputs=gr.File(label="Upload .aiproj File"),
853
- outputs=[
854
- gr.Dropdown(choices=preset_names, label="Loaded Preset"),
855
- gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
856
- ],
857
- title="Resume Last Project",
858
- description="Load your saved session"
859
- )
860
-
861
- # --- Cloud Project Sync (Premium Feature) ===
862
- with gr.Tab("☁️ Cloud Project Sync"):
863
- gr.Markdown("Save your projects online and resume them from any device.")
864
-
865
- gr.Interface(
866
- fn=cloud_save_project,
867
- inputs=[
868
- gr.File(label="Upload Audio", type="filepath"),
869
- gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
870
- gr.CheckboxGroup(choices=effect_options, label="Effects Applied"),
871
- gr.Textbox(label="Project Name"),
872
- gr.Textbox(label="Project ID (Optional)")
873
- ],
874
- outputs=[
875
- gr.File(label="Downloadable Project File"),
876
- gr.Textbox(label="Status", value="βœ… Ready", lines=1)
877
- ],
878
- title="Save to Cloud",
879
- description="Save your project online and share it across devices."
880
- )
881
-
882
- gr.Interface(
883
- fn=cloud_load_project,
884
- inputs=gr.Textbox(label="Enter Project ID"),
885
- outputs=[
886
- gr.Audio(label="Loaded Audio", type="filepath"),
887
- gr.Dropdown(choices=preset_names, label="Loaded Preset"),
888
- gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
889
- ],
890
- title="Load from Cloud",
891
- description="Resume a project from the cloud",
892
- allow_flagging="never"
893
- )
894
-
895
- # --- AI Suggest Presets Based on Genre ===
896
- with gr.Tab("🧠 AI Suggest Preset"):
897
- gr.Interface(
898
- fn=suggest_preset_by_genre,
899
- inputs=gr.Audio(label="Upload Track", type="filepath"),
900
- outputs=gr.Dropdown(choices=preset_names, label="Recommended Preset"),
901
- title="Let AI Recommend Best Preset",
902
- description="Upload a track and let AI recommend the best preset based on genre."
903
- )
904
-
905
- # --- Create Karaoke Video from Audio + Lyrics ===
906
- with gr.Tab("πŸ“Ή Create Karaoke Video"):
907
- gr.Interface(
908
- fn=create_karaoke_video,
909
- inputs=[
910
- gr.Audio(label="Upload Track", type="filepath"),
911
- gr.Textbox(label="Lyrics", lines=10),
912
- gr.File(label="Background (Optional)"),
913
- ],
914
- outputs=gr.Video(label="Karaoke Video"),
915
- title="Make Karaoke Videos from Audio + Lyrics",
916
- description="Generate karaoke-style videos with real-time sync."
917
- )
918
-
919
  demo.launch()
 
118
  processed = apply_bass_boost(processed, gain=6)
119
  return apply_limiter(processed, limit_dB=-2)
120
 
121
+ # === Loudness Matching (EBU R128) ===
122
+ try:
123
+ import pyloudnorm as pyln
124
+ except ImportError:
125
+ print("Installing pyloudnorm...")
126
+ import subprocess
127
+ subprocess.run(["pip", "install", "pyloudnorm"])
128
+ import pyloudnorm as pyln
129
+
130
+ def match_loudness(audio_path, target_lufs=-14.0):
131
+ meter = pyln.Meter(44100)
132
+ wav = AudioSegment.from_file(audio_path).set_frame_rate(44100)
133
+ samples = np.array(wav.get_array_of_samples()).astype(np.float64) / 32768.0
134
+ loudness = meter.integrated_loudness(samples)
135
+ gain_db = target_lufs - loudness
136
+ adjusted = wav + gain_db
137
+ out_path = os.path.join(tempfile.gettempdir(), "loudness_output.wav")
138
+ adjusted.export(out_path, format="wav")
139
+ return out_path
140
+
141
+ # === Dynamic Range Compression Presets ===
142
+ def apply_compression_preset(audio, preset="Radio Ready"):
143
+ if preset == "Radio Ready":
144
+ return audio.compress_dynamic_range(threshold=-5, ratio=4)
145
+ elif preset == "Podcast Safe":
146
+ return audio.compress_dynamic_range(threshold=-10, ratio=2)
147
+ elif preset == "Club Mix":
148
+ return audio.compress_dynamic_range(threshold=-3, ratio=6)
149
+ elif preset == "Speech":
150
+ return audio.compress_dynamic_range(threshold=-6, ratio=1.5)
151
+ return audio
152
+
153
  # === Auto-EQ per Genre ===
154
  def auto_eq(audio, genre="Pop"):
 
155
  eq_map = {
156
  "Pop": [(200, 500, -3), (2000, 4000, +4)], # Cut muddiness, boost vocals
157
  "EDM": [(60, 250, +6), (8000, 12000, +3)], # Maximize bass & sparkle
 
200
  audio = apply_reverb(audio)
201
 
202
  if "pitch" in prompt.lower() and "correct" in prompt.lower():
203
+ audio = apply_pitch_correction(audio)
204
 
205
  if "harmony" in prompt.lower() or "double" in prompt.lower():
206
  audio = apply_harmony(audio)
 
209
  audio.export(out_path, format="wav")
210
  return out_path
211
 
212
+ # === AI Suggest Preset Based on Genre ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  def suggest_preset_by_genre(audio_path):
214
  try:
215
  y, sr = torchaudio.load(audio_path)
216
  mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
217
  genre = "Pop"
218
+ return f"Suggested Preset: {genre}"
219
  except Exception:
220
+ return "Unknown"
221
+
222
+ # === Vocal Pitch Correction – Auto-Tune Style ===
223
+ def apply_pitch_correction(audio, target_key="C"):
224
+ # Placeholder: In real use, this would align pitch to the nearest key note
225
+ return apply_pitch_shift(audio, 0.2)
226
 
227
  # === Create Karaoke Video from Audio + Lyrics ===
228
  def create_karaoke_video(audio_path, lyrics, bg_image=None):
 
543
  def visualize_spectrum(audio_path):
544
  y, sr = torchaudio.load(audio_path)
545
  y_np = y.numpy().flatten()
546
+
547
  stft = librosa.stft(y_np)
548
  db = librosa.amplitude_to_db(abs(stft))
549
 
 
558
  buf.seek(0)
559
  return Image.open(buf)
560
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  # === UI ===
562
  effect_options = [
563
  "Noise Reduction",
 
647
  clear_btn=None
648
  )
649
 
650
+ # --- Loudness Match (EBU R128) ===
651
+ with gr.Tab("πŸ“ˆ Loudness Match"):
652
  gr.Interface(
653
+ fn=match_loudness,
654
  inputs=[
655
  gr.Audio(label="Upload Track", type="filepath"),
656
+ gr.Slider(minimum=-24, maximum=-6, value=-14, label="Target LUFS")
657
  ],
658
+ outputs=gr.Audio(label="Normalized Output", type="filepath"),
659
+ title="Match Loudness (EBU R128)",
660
+ description="Ensure consistent loudness across tracks using industry-standard normalization."
661
  )
662
 
663
+ # --- Dynamic Compression Presets ===
664
+ with gr.Tab("πŸŽ› Dynamic Compression Presets"):
665
  gr.Interface(
666
+ fn=apply_compression_preset,
667
  inputs=[
668
  gr.Audio(label="Upload Track", type="filepath"),
669
+ gr.Dropdown(choices=["Radio Ready", "Podcast Safe", "Club Mix", "Speech"], label="Preset")
 
 
670
  ],
671
+ outputs=gr.Audio(label="Compressed Output", type="filepath"),
672
+ title="Apply Pre-Tuned Compression Settings",
673
+ description="Choose from compression presets used in radio, podcasting, club mixes, and speech editing."
674
  )
675
 
676
+ # --- AI Suggest Preset Based on Genre ===
677
+ with gr.Tab("🧠 AI Suggest Preset"):
678
+ gr.Interface(
679
+ fn=suggest_preset_by_genre,
680
+ inputs=gr.Audio(label="Upload Track", type="filepath"),
681
+ outputs=gr.Dropdown(choices=preset_names, label="Recommended Preset"),
682
+ title="AI Recommends Best Preset",
683
+ description="Upload a track and let AI recommend the best preset based on detected genre."
684
+ )
685
+
686
+ # --- Real-Time Spectrum Analyzer + EQ ===
687
  with gr.Tab("πŸ“Š Frequency Spectrum"):
688
  gr.Interface(
689
  fn=visualize_spectrum,
 
707
  allow_flagging="never"
708
  )
709
 
710
+ # --- Vocal Pitch Correction (Auto-Tune) ===
711
+ with gr.Tab("🧬 Vocal Pitch Correction"):
712
+ gr.Interface(
713
+ fn=apply_pitch_correction,
714
+ inputs=[
715
+ gr.Audio(label="Upload Vocal Clip", type="filepath"),
716
+ gr.Textbox(label="Target Key", value="C", lines=1)
717
+ ],
718
+ outputs=gr.Audio(label="Pitch-Corrected Output", type="filepath"),
719
+ title="Auto-Tune Style Pitch Correction",
720
+ description="Correct vocal pitch automatically"
721
+ )
722
+
723
+ # --- Create Karaoke Video from Audio + Lyrics ===
724
+ with gr.Tab("πŸ“Ή Create Karaoke Video"):
725
+ gr.Interface(
726
+ fn=create_karaoke_video,
727
+ inputs=[
728
+ gr.Audio(label="Upload Track", type="filepath"),
729
+ gr.Textbox(label="Lyrics", lines=10),
730
+ gr.File(label="Background (Optional)")
731
+ ],
732
+ outputs=gr.Video(label="Karaoke Video"),
733
+ title="Make Karaoke Videos from Audio + Lyrics",
734
+ description="Generate karaoke-style videos with real-time sync."
735
+ )
736
+
737
  # --- Vocal Presets for Singers ===
738
  with gr.Tab("🎀 Vocal Presets for Singers"):
739
  gr.Interface(
740
  fn=process_audio,
741
  inputs=[
742
  gr.Audio(label="Upload Vocal Track", type="filepath"),
743
+ gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
 
 
 
 
 
 
 
 
 
 
 
744
  gr.Checkbox(label="Isolate Vocals After Effects"),
745
  gr.Dropdown(choices=preset_names, label="Select Vocal Preset", value=preset_names[0]),
746
  gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
 
772
  )
773
 
774
  # --- Speaker Diarization ("Who Spoke When?") ===
775
+ if "diarize_pipeline" in locals():
776
  with gr.Tab("πŸ§β€β™‚οΈ Who Spoke When?"):
777
  gr.Interface(
778
  fn=diarize_and_transcribe,
 
841
  description="Detect and trim silence at start/end or between words"
842
  )
843
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
844
  demo.launch()