Spaces:
Runtime error
Runtime error
Added Mel spectrogram (#1)
Browse files- Added Mel spectrogram (ee40ca2679686ea68ecdb4546b7d2fdac3a8c0ae)
Co-authored-by: Armin <[email protected]>
app.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import matplotlib.pyplot as plt
|
| 3 |
-
from matplotlib.colors import LinearSegmentedColormap
|
| 4 |
import numpy as np
|
| 5 |
import os
|
| 6 |
import soundfile as sf
|
| 7 |
import requests
|
|
|
|
|
|
|
| 8 |
|
| 9 |
def download_file(url):
|
| 10 |
file_id = url.split('/')[-2]
|
|
@@ -14,16 +15,23 @@ def download_file(url):
|
|
| 14 |
open(local_filename, 'wb').write(response.content)
|
| 15 |
return local_filename
|
| 16 |
|
|
|
|
| 17 |
def main():
|
| 18 |
with gr.Blocks() as app:
|
| 19 |
gr.Markdown(
|
| 20 |
"""
|
| 21 |
-
Audio Analyzer
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
| 24 |
"""
|
| 25 |
)
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
| 27 |
with gr.Row():
|
| 28 |
with gr.Column():
|
| 29 |
audio_input = gr.Audio(type='filepath')
|
|
@@ -31,56 +39,44 @@ def main():
|
|
| 31 |
|
| 32 |
with gr.Column():
|
| 33 |
output_markdown = gr.Markdown(value="", visible=True)
|
| 34 |
-
|
| 35 |
-
|
| 36 |
with gr.Accordion('Audio Downloader', open=False):
|
| 37 |
url_input = gr.Textbox(value='', label='Google Drive Audio URL')
|
| 38 |
download_butt = gr.Button(value='Download audio', variant='primary')
|
| 39 |
-
|
| 40 |
download_butt.click(fn=download_file, inputs=[url_input], outputs=[audio_input])
|
| 41 |
-
create_spec_butt.click(fn=create_spectrogram_and_get_info, inputs=[audio_input],
|
| 42 |
-
|
|
|
|
| 43 |
download_butt.click(fn=download_file, inputs=[url_input], outputs=[audio_input])
|
| 44 |
-
create_spec_butt.click(fn=create_spectrogram_and_get_info, inputs=[audio_input],
|
| 45 |
-
|
| 46 |
-
app.queue(max_size=1022).launch(share=True)
|
| 47 |
|
| 48 |
-
|
| 49 |
|
| 50 |
def create_spectrogram_and_get_info(audio_file):
|
| 51 |
plt.clf()
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
(0.5, 0.0, 0.0),
|
| 62 |
-
(1.0, 0.0, 0.0)]}
|
| 63 |
-
custom_cmap = LinearSegmentedColormap('CustomMap', cdict)
|
| 64 |
-
|
| 65 |
-
fig = plt.figure(figsize=(15, 5))
|
| 66 |
-
fig.patch.set_facecolor('black') # Imposta il colore di sfondo su nero
|
| 67 |
-
audio_data, sample_rate = sf.read(audio_file)
|
| 68 |
-
if len(audio_data.shape) > 1:
|
| 69 |
-
audio_data = np.mean(audio_data, axis=1)
|
| 70 |
-
plt.specgram(audio_data, Fs=sample_rate / 1, NFFT=4096, sides='onesided',
|
| 71 |
-
cmap=custom_cmap, scale_by_freq=True, scale='dB', mode='magnitude', window=np.hanning(4096)) # Usa la mappa di colori personalizzata
|
| 72 |
-
plt.savefig('spectrogram.png', dpi=300)
|
| 73 |
audio_info = sf.info(audio_file)
|
| 74 |
bit_depth = {'PCM_16': 16, 'FLOAT': 32}.get(audio_info.subtype, 0)
|
| 75 |
minutes, seconds = divmod(audio_info.duration, 60)
|
| 76 |
seconds, milliseconds = divmod(seconds, 1)
|
| 77 |
milliseconds *= 1000
|
| 78 |
-
bitrate = audio_info.samplerate * audio_info.channels * bit_depth / 8 / 1024 / 1024
|
|
|
|
| 79 |
speed_in_kbps = audio_info.samplerate * bit_depth / 1000
|
| 80 |
filename_without_extension, _ = os.path.splitext(os.path.basename(audio_file))
|
| 81 |
info_table = f"""
|
| 82 |
|
| 83 |
-
|
| 84 |
| Information | Value |
|
| 85 |
| :---: | :---: |
|
| 86 |
| File Name | {filename_without_extension} |
|
|
@@ -89,9 +85,9 @@ def create_spectrogram_and_get_info(audio_file):
|
|
| 89 |
| Audio Channels | {audio_info.channels} |
|
| 90 |
| Samples per second | {audio_info.samplerate} Hz |
|
| 91 |
| Bit per second | {audio_info.samplerate * audio_info.channels * bit_depth} bit/s |
|
| 92 |
-
|
| 93 |
"""
|
| 94 |
-
|
| 95 |
# Return the PNG file of the spectrogram and the info table
|
| 96 |
return info_table, 'spectrogram.png'
|
| 97 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import matplotlib.pyplot as plt
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import os
|
| 5 |
import soundfile as sf
|
| 6 |
import requests
|
| 7 |
+
import librosa.display
|
| 8 |
+
|
| 9 |
|
| 10 |
def download_file(url):
|
| 11 |
file_id = url.split('/')[-2]
|
|
|
|
| 15 |
open(local_filename, 'wb').write(response.content)
|
| 16 |
return local_filename
|
| 17 |
|
| 18 |
+
|
| 19 |
def main():
|
| 20 |
with gr.Blocks() as app:
|
| 21 |
gr.Markdown(
|
| 22 |
"""
|
| 23 |
+
<h1><center>Audio Analyzer by Ilaria</center></h1>\n
|
| 24 |
+
<h3><center>Help me on <a href="https://ko-fi.com/ilariaowo/shop">Ko-Fi</a>!</center></h3>\n
|
| 25 |
+
## Special thanks to Alex Murkoff for helping me code it!
|
| 26 |
+
#### Need help with AI? Join [AI Hub](https://discord.gg/aihub)!\n
|
| 27 |
+
**Note**: Try to keep the audio length under **2 minutes**,
|
| 28 |
+
since long audio files dont work well with a static spectrogram
|
| 29 |
"""
|
| 30 |
)
|
| 31 |
+
|
| 32 |
+
with gr.Row():
|
| 33 |
+
image_output = gr.Image(type='filepath', interactive=False)
|
| 34 |
+
|
| 35 |
with gr.Row():
|
| 36 |
with gr.Column():
|
| 37 |
audio_input = gr.Audio(type='filepath')
|
|
|
|
| 39 |
|
| 40 |
with gr.Column():
|
| 41 |
output_markdown = gr.Markdown(value="", visible=True)
|
| 42 |
+
|
|
|
|
| 43 |
with gr.Accordion('Audio Downloader', open=False):
|
| 44 |
url_input = gr.Textbox(value='', label='Google Drive Audio URL')
|
| 45 |
download_butt = gr.Button(value='Download audio', variant='primary')
|
| 46 |
+
|
| 47 |
download_butt.click(fn=download_file, inputs=[url_input], outputs=[audio_input])
|
| 48 |
+
create_spec_butt.click(fn=create_spectrogram_and_get_info, inputs=[audio_input],
|
| 49 |
+
outputs=[output_markdown, image_output])
|
| 50 |
+
|
| 51 |
download_butt.click(fn=download_file, inputs=[url_input], outputs=[audio_input])
|
| 52 |
+
create_spec_butt.click(fn=create_spectrogram_and_get_info, inputs=[audio_input],
|
| 53 |
+
outputs=[output_markdown, image_output])
|
|
|
|
| 54 |
|
| 55 |
+
app.queue(max_size=1022).launch(share=True)
|
| 56 |
|
| 57 |
def create_spectrogram_and_get_info(audio_file):
|
| 58 |
plt.clf()
|
| 59 |
+
|
| 60 |
+
y, sr = librosa.load(audio_file, sr=None)
|
| 61 |
+
S = librosa.feature.melspectrogram(y, sr=sr, n_mels=256)
|
| 62 |
+
log_S = librosa.amplitude_to_db(S, ref=np.max, top_db=256)
|
| 63 |
+
plt.figure(figsize=(12, 5.5))
|
| 64 |
+
librosa.display.specshow(log_S, sr=sr, x_axis='time')
|
| 65 |
+
plt.colorbar(format='%+2.0f dB', pad=0.01)
|
| 66 |
+
plt.tight_layout(pad=0.5)
|
| 67 |
+
plt.savefig('spectrogram.png', dpi=500)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
audio_info = sf.info(audio_file)
|
| 69 |
bit_depth = {'PCM_16': 16, 'FLOAT': 32}.get(audio_info.subtype, 0)
|
| 70 |
minutes, seconds = divmod(audio_info.duration, 60)
|
| 71 |
seconds, milliseconds = divmod(seconds, 1)
|
| 72 |
milliseconds *= 1000
|
| 73 |
+
# bitrate = audio_info.samplerate * audio_info.channels * bit_depth / 8 / 1024 / 1024
|
| 74 |
+
# this bitrate one doesnt seem to be used anywhere so i just removed it
|
| 75 |
speed_in_kbps = audio_info.samplerate * bit_depth / 1000
|
| 76 |
filename_without_extension, _ = os.path.splitext(os.path.basename(audio_file))
|
| 77 |
info_table = f"""
|
| 78 |
|
| 79 |
+
|
| 80 |
| Information | Value |
|
| 81 |
| :---: | :---: |
|
| 82 |
| File Name | {filename_without_extension} |
|
|
|
|
| 85 |
| Audio Channels | {audio_info.channels} |
|
| 86 |
| Samples per second | {audio_info.samplerate} Hz |
|
| 87 |
| Bit per second | {audio_info.samplerate * audio_info.channels * bit_depth} bit/s |
|
| 88 |
+
|
| 89 |
"""
|
| 90 |
+
|
| 91 |
# Return the PNG file of the spectrogram and the info table
|
| 92 |
return info_table, 'spectrogram.png'
|
| 93 |
|