admin commited on
Commit
3b68f3a
·
1 Parent(s): 461b48a
Files changed (3) hide show
  1. app.py +30 -48
  2. requirements.txt +3 -3
  3. utils.py +4 -10
app.py CHANGED
@@ -18,25 +18,25 @@ from PIL import Image
18
 
19
 
20
  TRANSLATE = {
21
- "Symphony": "交响乐 Symphony",
22
- "Opera": "戏曲 Opera",
23
- "Solo": "独奏 Solo",
24
- "Chamber": "室内乐 Chamber",
25
- "Pop_vocal_ballad": "芭乐 Pop vocal ballad",
26
- "Adult_contemporary": "成人时代 Adult contemporary",
27
- "Teen_pop": "青少年流行 Teen pop",
28
- "Contemporary_dance_pop": "当代流行舞曲 Contemporary dance pop",
29
- "Dance_pop": "流行舞曲 Dance pop",
30
- "Classic_indie_pop": "经典独立流行 Classic indie pop",
31
- "Chamber_cabaret_and_art_pop": "室内卡巴莱与艺术流行乐 Chamber cabaret & art pop",
32
- "Soul_or_r_and_b": "灵魂乐或节奏布鲁斯 Soul / R&B",
33
- "Adult_alternative_rock": "成人另类摇滚 Adult alternative rock",
34
- "Uplifting_anthemic_rock": "迷幻民族摇滚 Uplifting anthemic rock",
35
- "Soft_rock": "慢摇滚 Soft rock",
36
- "Acoustic_pop": "原声流行 Acoustic pop",
37
  }
38
-
39
  CLASSES = list(TRANSLATE.keys())
 
40
 
41
 
42
  def most_common_element(input_list):
@@ -46,7 +46,7 @@ def most_common_element(input_list):
46
 
47
 
48
  def mp3_to_mel(audio_path: str, width=11.4):
49
- os.makedirs("./flagged", exist_ok=True)
50
  try:
51
  y, sr = librosa.load(audio_path)
52
  mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
@@ -61,7 +61,7 @@ def mp3_to_mel(audio_path: str, width=11.4):
61
  librosa.display.specshow(log_mel_spec[:, i : i + step])
62
  plt.axis("off")
63
  plt.savefig(
64
- f"./flagged/mel_{round(dur, 2)}_{i}.jpg",
65
  bbox_inches="tight",
66
  pad_inches=0.0,
67
  )
@@ -72,7 +72,7 @@ def mp3_to_mel(audio_path: str, width=11.4):
72
 
73
 
74
  def mp3_to_cqt(audio_path: str, width=11.4):
75
- os.makedirs("./flagged", exist_ok=True)
76
  try:
77
  y, sr = librosa.load(audio_path)
78
  cqt_spec = librosa.cqt(y=y, sr=sr)
@@ -87,7 +87,7 @@ def mp3_to_cqt(audio_path: str, width=11.4):
87
  librosa.display.specshow(log_cqt_spec[:, i : i + step])
88
  plt.axis("off")
89
  plt.savefig(
90
- f"./flagged/cqt_{round(dur, 2)}_{i}.jpg",
91
  bbox_inches="tight",
92
  pad_inches=0.0,
93
  )
@@ -98,7 +98,7 @@ def mp3_to_cqt(audio_path: str, width=11.4):
98
 
99
 
100
  def mp3_to_chroma(audio_path: str, width=11.4):
101
- os.makedirs("./flagged", exist_ok=True)
102
  try:
103
  y, sr = librosa.load(audio_path)
104
  chroma_spec = librosa.feature.chroma_stft(y=y, sr=sr)
@@ -113,7 +113,7 @@ def mp3_to_chroma(audio_path: str, width=11.4):
113
  librosa.display.specshow(log_chroma_spec[:, i : i + step])
114
  plt.axis("off")
115
  plt.savefig(
116
- f"./flagged/chroma_{round(dur, 2)}_{i}.jpg",
117
  bbox_inches="tight",
118
  pad_inches=0.0,
119
  )
@@ -135,12 +135,12 @@ def embed_img(img_path, input_size=224):
135
  return transform(img).unsqueeze(0)
136
 
137
 
138
- def inference(mp3_path, log_name: str, folder_path="./flagged"):
139
  if os.path.exists(folder_path):
140
  shutil.rmtree(folder_path)
141
 
142
  if not mp3_path:
143
- return None, "请输入音频 Please input an audio!"
144
 
145
  network = EvalNet(log_name)
146
  spec = log_name.split("_")[-1]
@@ -186,35 +186,17 @@ if __name__ == "__main__":
186
  gr.Interface(
187
  fn=inference,
188
  inputs=[
189
- gr.Audio(label="上传MP3音频 Upload MP3", type="filepath"),
190
- gr.Dropdown(
191
- choices=models, label="选择模型 Select a model", value=models[6]
192
- ),
193
  ],
194
  outputs=[
195
- gr.Textbox(label="音频文件名 Audio filename", show_copy_button=True),
196
- gr.Textbox(label="流派识别 Genre recognition", show_copy_button=True),
197
  ],
198
  examples=examples,
199
  cache_examples=False,
200
  allow_flagging="never",
201
- title="建议录音时长保持在 15s 以内, 过长会影响识别效率<br>It is recommended to keep the duration of recording within 15s, too long will affect the recognition efficiency.",
202
- )
203
-
204
- gr.Markdown(
205
- """
206
- # 引用 Cite
207
- ```bibtex
208
- @dataset{zhaorui_liu_2021_5676893,
209
- author = {Monan Zhou, Shenyang Xu, Zhaorui Liu, Zhaowen Wang, Feng Yu, Wei Li and Baoqiang Han},
210
- title = {CCMusic: an Open and Diverse Database for Chinese and General Music Information Retrieval Research},
211
- month = {mar},
212
- year = {2024},
213
- publisher = {HuggingFace},
214
- version = {1.2},
215
- url = {https://huggingface.co/ccmusic-database}
216
- }
217
- ```"""
218
  )
219
 
220
  demo.launch()
 
18
 
19
 
20
  TRANSLATE = {
21
+ "Symphony": "Symphony",
22
+ "Opera": "Opera",
23
+ "Solo": "Solo",
24
+ "Chamber": "Chamber",
25
+ "Pop_vocal_ballad": "Pop vocal ballad",
26
+ "Adult_contemporary": "Adult contemporary",
27
+ "Teen_pop": "Teen pop",
28
+ "Contemporary_dance_pop": "Contemporary dance pop",
29
+ "Dance_pop": "Dance pop",
30
+ "Classic_indie_pop": "Classic indie pop",
31
+ "Chamber_cabaret_and_art_pop": "Chamber cabaret & art pop",
32
+ "Soul_or_r_and_b": "Soul / R&B",
33
+ "Adult_alternative_rock": "Adult alternative rock",
34
+ "Uplifting_anthemic_rock": "Uplifting anthemic rock",
35
+ "Soft_rock": "Soft rock",
36
+ "Acoustic_pop": "Acoustic pop",
37
  }
 
38
  CLASSES = list(TRANSLATE.keys())
39
+ CACHE_DIR = "__pycache__"
40
 
41
 
42
  def most_common_element(input_list):
 
46
 
47
 
48
  def mp3_to_mel(audio_path: str, width=11.4):
49
+ os.makedirs(CACHE_DIR, exist_ok=True)
50
  try:
51
  y, sr = librosa.load(audio_path)
52
  mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
 
61
  librosa.display.specshow(log_mel_spec[:, i : i + step])
62
  plt.axis("off")
63
  plt.savefig(
64
+ f"{CACHE_DIR}/mel_{round(dur, 2)}_{i}.jpg",
65
  bbox_inches="tight",
66
  pad_inches=0.0,
67
  )
 
72
 
73
 
74
  def mp3_to_cqt(audio_path: str, width=11.4):
75
+ os.makedirs(CACHE_DIR, exist_ok=True)
76
  try:
77
  y, sr = librosa.load(audio_path)
78
  cqt_spec = librosa.cqt(y=y, sr=sr)
 
87
  librosa.display.specshow(log_cqt_spec[:, i : i + step])
88
  plt.axis("off")
89
  plt.savefig(
90
+ f"{CACHE_DIR}/cqt_{round(dur, 2)}_{i}.jpg",
91
  bbox_inches="tight",
92
  pad_inches=0.0,
93
  )
 
98
 
99
 
100
  def mp3_to_chroma(audio_path: str, width=11.4):
101
+ os.makedirs(CACHE_DIR, exist_ok=True)
102
  try:
103
  y, sr = librosa.load(audio_path)
104
  chroma_spec = librosa.feature.chroma_stft(y=y, sr=sr)
 
113
  librosa.display.specshow(log_chroma_spec[:, i : i + step])
114
  plt.axis("off")
115
  plt.savefig(
116
+ f"{CACHE_DIR}/chroma_{round(dur, 2)}_{i}.jpg",
117
  bbox_inches="tight",
118
  pad_inches=0.0,
119
  )
 
135
  return transform(img).unsqueeze(0)
136
 
137
 
138
+ def inference(mp3_path, log_name: str, folder_path=CACHE_DIR):
139
  if os.path.exists(folder_path):
140
  shutil.rmtree(folder_path)
141
 
142
  if not mp3_path:
143
+ return None, "Please input an audio!"
144
 
145
  network = EvalNet(log_name)
146
  spec = log_name.split("_")[-1]
 
186
  gr.Interface(
187
  fn=inference,
188
  inputs=[
189
+ gr.Audio(label="Upload MP3", type="filepath"),
190
+ gr.Dropdown(choices=models, label="Select a model", value=models[6]),
 
 
191
  ],
192
  outputs=[
193
+ gr.Textbox(label="Audio filename", show_copy_button=True),
194
+ gr.Textbox(label="Genre recognition", show_copy_button=True),
195
  ],
196
  examples=examples,
197
  cache_examples=False,
198
  allow_flagging="never",
199
+ title="It is recommended to keep the duration of recording within 15s, too long will affect the recognition efficiency.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  )
201
 
202
  demo.launch()
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- librosa
2
  torch
 
 
3
  matplotlib
4
  torchvision
5
- pillow
6
- modelscope==1.15
 
 
1
  torch
2
+ pillow
3
+ librosa
4
  matplotlib
5
  torchvision
6
+ modelscope[framework]==1.18
 
utils.py CHANGED
@@ -32,23 +32,17 @@ def get_modelist(model_dir=MODEL_DIR):
32
  try:
33
  entries = os.listdir(model_dir)
34
  except OSError as e:
35
- print(f"无法访问 {model_dir}: {e}")
36
  return
37
 
38
- # 遍历所有条目
39
  output = []
40
  for entry in entries:
41
- # 获取完整路径
42
  full_path = os.path.join(model_dir, entry)
43
-
44
- # 跳过'.git'文件夹
45
  if entry == ".git" or entry == "examples":
46
- print(f"跳过 .git / examples 文件夹: {full_path}")
47
  continue
48
 
49
- # 检查条目是文件还是目录
50
  if os.path.isdir(full_path):
51
- # 打印目录路径
52
  output.append(os.path.basename(full_path))
53
 
54
  return output
@@ -62,6 +56,6 @@ def download(url: str):
62
  for chunk in response.iter_content(chunk_size=8192):
63
  f.write(chunk)
64
 
65
- print(f"文件已下载到 {os.getcwd()}/{filename}")
66
  else:
67
- print(f"下载失败,状态码:{response.status_code}")
 
32
  try:
33
  entries = os.listdir(model_dir)
34
  except OSError as e:
35
+ print(f"Cannot access {model_dir}: {e}")
36
  return
37
 
 
38
  output = []
39
  for entry in entries:
 
40
  full_path = os.path.join(model_dir, entry)
 
 
41
  if entry == ".git" or entry == "examples":
42
+ print(f"Skip .git / examples dir: {full_path}")
43
  continue
44
 
 
45
  if os.path.isdir(full_path):
 
46
  output.append(os.path.basename(full_path))
47
 
48
  return output
 
56
  for chunk in response.iter_content(chunk_size=8192):
57
  f.write(chunk)
58
 
59
+ print(f"The file has been downloaded to {os.getcwd()}/{filename}")
60
  else:
61
+ print(f"Failed to download, status code: {response.status_code}")