admin commited on
Commit
d323e45
·
1 Parent(s): 8dba7fb
Files changed (1) hide show
  1. app.py +62 -71
app.py CHANGED
@@ -46,81 +46,66 @@ def most_common_element(input_list):
46
 
47
 
48
  def mp3_to_mel(audio_path: str, width=11.4):
49
- os.makedirs(CACHE_DIR, exist_ok=True)
50
- try:
51
- y, sr = librosa.load(audio_path)
52
- mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
53
- log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
54
- dur = librosa.get_duration(y=y, sr=sr)
55
- total_frames = log_mel_spec.shape[1]
56
- step = int(width * total_frames / dur)
57
- count = int(total_frames / step)
58
- begin = int(0.5 * (total_frames - count * step))
59
- end = begin + step * count
60
- for i in range(begin, end, step):
61
- librosa.display.specshow(log_mel_spec[:, i : i + step])
62
- plt.axis("off")
63
- plt.savefig(
64
- f"{CACHE_DIR}/mel_{round(dur, 2)}_{i}.jpg",
65
- bbox_inches="tight",
66
- pad_inches=0.0,
67
- )
68
- plt.close()
69
-
70
- except Exception as e:
71
- print(f"Error converting {audio_path} : {e}")
72
 
73
 
74
  def mp3_to_cqt(audio_path: str, width=11.4):
75
- os.makedirs(CACHE_DIR, exist_ok=True)
76
- try:
77
- y, sr = librosa.load(audio_path)
78
- cqt_spec = librosa.cqt(y=y, sr=sr)
79
- log_cqt_spec = librosa.power_to_db(np.abs(cqt_spec) ** 2, ref=np.max)
80
- dur = librosa.get_duration(y=y, sr=sr)
81
- total_frames = log_cqt_spec.shape[1]
82
- step = int(width * total_frames / dur)
83
- count = int(total_frames / step)
84
- begin = int(0.5 * (total_frames - count * step))
85
- end = begin + step * count
86
- for i in range(begin, end, step):
87
- librosa.display.specshow(log_cqt_spec[:, i : i + step])
88
- plt.axis("off")
89
- plt.savefig(
90
- f"{CACHE_DIR}/cqt_{round(dur, 2)}_{i}.jpg",
91
- bbox_inches="tight",
92
- pad_inches=0.0,
93
- )
94
- plt.close()
95
-
96
- except Exception as e:
97
- print(f"Error converting {audio_path} : {e}")
98
 
99
 
100
  def mp3_to_chroma(audio_path: str, width=11.4):
101
- os.makedirs(CACHE_DIR, exist_ok=True)
102
- try:
103
- y, sr = librosa.load(audio_path)
104
- chroma_spec = librosa.feature.chroma_stft(y=y, sr=sr)
105
- log_chroma_spec = librosa.power_to_db(np.abs(chroma_spec) ** 2, ref=np.max)
106
- dur = librosa.get_duration(y=y, sr=sr)
107
- total_frames = log_chroma_spec.shape[1]
108
- step = int(width * total_frames / dur)
109
- count = int(total_frames / step)
110
- begin = int(0.5 * (total_frames - count * step))
111
- end = begin + step * count
112
- for i in range(begin, end, step):
113
- librosa.display.specshow(log_chroma_spec[:, i : i + step])
114
- plt.axis("off")
115
- plt.savefig(
116
- f"{CACHE_DIR}/chroma_{round(dur, 2)}_{i}.jpg",
117
- bbox_inches="tight",
118
- pad_inches=0.0,
119
- )
120
- plt.close()
121
-
122
- except Exception as e:
123
- print(f"Error converting {audio_path} : {e}")
124
 
125
 
126
  def embed_img(img_path, input_size=224):
@@ -142,9 +127,15 @@ def inference(mp3_path, log_name: str, folder_path=CACHE_DIR):
142
  if not mp3_path:
143
  return None, "Please input an audio!"
144
 
145
- network = EvalNet(log_name)
146
  spec = log_name.split("_")[-1]
147
- eval("mp3_to_%s" % spec)(mp3_path)
 
 
 
 
 
 
 
148
  outputs = []
149
  all_files = os.listdir(folder_path)
150
  for file_name in all_files:
 
46
 
47
 
48
  def mp3_to_mel(audio_path: str, width=11.4):
49
+ y, sr = librosa.load(audio_path)
50
+ mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
51
+ log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
52
+ dur = librosa.get_duration(y=y, sr=sr)
53
+ total_frames = log_mel_spec.shape[1]
54
+ step = int(width * total_frames / dur)
55
+ count = int(total_frames / step)
56
+ begin = int(0.5 * (total_frames - count * step))
57
+ end = begin + step * count
58
+ for i in range(begin, end, step):
59
+ librosa.display.specshow(log_mel_spec[:, i : i + step])
60
+ plt.axis("off")
61
+ plt.savefig(
62
+ f"{CACHE_DIR}/mel_{round(dur, 2)}_{i}.jpg",
63
+ bbox_inches="tight",
64
+ pad_inches=0.0,
65
+ )
66
+ plt.close()
 
 
 
 
 
67
 
68
 
69
  def mp3_to_cqt(audio_path: str, width=11.4):
70
+ y, sr = librosa.load(audio_path)
71
+ cqt_spec = librosa.cqt(y=y, sr=sr)
72
+ log_cqt_spec = librosa.power_to_db(np.abs(cqt_spec) ** 2, ref=np.max)
73
+ dur = librosa.get_duration(y=y, sr=sr)
74
+ total_frames = log_cqt_spec.shape[1]
75
+ step = int(width * total_frames / dur)
76
+ count = int(total_frames / step)
77
+ begin = int(0.5 * (total_frames - count * step))
78
+ end = begin + step * count
79
+ for i in range(begin, end, step):
80
+ librosa.display.specshow(log_cqt_spec[:, i : i + step])
81
+ plt.axis("off")
82
+ plt.savefig(
83
+ f"{CACHE_DIR}/cqt_{round(dur, 2)}_{i}.jpg",
84
+ bbox_inches="tight",
85
+ pad_inches=0.0,
86
+ )
87
+ plt.close()
 
 
 
 
 
88
 
89
 
90
  def mp3_to_chroma(audio_path: str, width=11.4):
91
+ y, sr = librosa.load(audio_path)
92
+ chroma_spec = librosa.feature.chroma_stft(y=y, sr=sr)
93
+ log_chroma_spec = librosa.power_to_db(np.abs(chroma_spec) ** 2, ref=np.max)
94
+ dur = librosa.get_duration(y=y, sr=sr)
95
+ total_frames = log_chroma_spec.shape[1]
96
+ step = int(width * total_frames / dur)
97
+ count = int(total_frames / step)
98
+ begin = int(0.5 * (total_frames - count * step))
99
+ end = begin + step * count
100
+ for i in range(begin, end, step):
101
+ librosa.display.specshow(log_chroma_spec[:, i : i + step])
102
+ plt.axis("off")
103
+ plt.savefig(
104
+ f"{CACHE_DIR}/chroma_{round(dur, 2)}_{i}.jpg",
105
+ bbox_inches="tight",
106
+ pad_inches=0.0,
107
+ )
108
+ plt.close()
 
 
 
 
 
109
 
110
 
111
  def embed_img(img_path, input_size=224):
 
127
  if not mp3_path:
128
  return None, "Please input an audio!"
129
 
 
130
  spec = log_name.split("_")[-1]
131
+ os.makedirs(folder_path, exist_ok=True)
132
+ try:
133
+ network = EvalNet(log_name)
134
+ eval("mp3_to_%s" % spec)(mp3_path)
135
+
136
+ except Exception as e:
137
+ print(f"Error converting {mp3_path} : {e}")
138
+
139
  outputs = []
140
  all_files = os.listdir(folder_path)
141
  for file_name in all_files: