Spaces:

tencent
/

SongGeneration

Running on L40S

App Files Files Community

root commited on Jun 9

Commit

ef01ecd

1 Parent(s): 0a1e140

fix bug

Browse files

Files changed (4) hide show

app.py +6 -6
codeclm/models/lm_levo.py +2 -1
sample/lyric.jsonl +1 -1
sample/{19_2-又是一天过去，烦恼如影随形10s.wav → prompt.wav} +0 -0

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ print("Successful downloaded model.")
 from levo_inference import LeVoInference
-model = LeVoInference(op.join(APP_DIR, "conf/infer.yaml"))
 EXAMPLE_DESC = """female, dark, pop, sad, piano and drums, the bpm is 125."""
 EXAMPLE_LYRICS = """
@@ -62,10 +62,11 @@ with open('conf/vocab.yaml', 'r', encoding='utf-8') as file:
 # 模拟歌曲生成函数
 def generate_song(description, lyric, prompt_audio=None, cfg_coef=None, temperature=None, top_k=None, progress=gr.Progress(track_tqdm=True)):
-    global model
     params = {'cfg_coef':cfg_coef, 'temperature':temperature, 'top_k':top_k}
     params = {k:v for k,v in params.items() if v is not None}
-    sample_rate = model.cfg.sample_rate
     # 生成过程
     print(f"Generating song with description: {description}")
@@ -77,18 +78,17 @@ def generate_song(description, lyric, prompt_audio=None, cfg_coef=None, temperat
         lyric = lyric.replyricace(f"{s}\n", f"{s} ")
     lyric = lyric.replyricace("\n", "")
     lyric = lyric.replyricace(". ; ", " ; ")
-    print(lyric)
     # 适配prompt
     if prompt_audio is not None:
         print("Using prompt audio for generation")
     else:
-        prompt_audio = op.join(APP_DIR, 'sample/19_2-又是一天过去，烦恼如影随形10s.wav')
     progress(0.0, "Start Generation")
     start = time.time()
-    audio_data = model(lyric, description, prompt_audio, params).cpu().permute(1, 0).float().numpy()
     end = time.time()

 from levo_inference import LeVoInference
+MODEL = LeVoInference(op.join(APP_DIR, "conf/infer.yaml"))
 EXAMPLE_DESC = """female, dark, pop, sad, piano and drums, the bpm is 125."""
 EXAMPLE_LYRICS = """
 # 模拟歌曲生成函数
 def generate_song(description, lyric, prompt_audio=None, cfg_coef=None, temperature=None, top_k=None, progress=gr.Progress(track_tqdm=True)):
+    global MODEL
+    global STRUCTS
     params = {'cfg_coef':cfg_coef, 'temperature':temperature, 'top_k':top_k}
     params = {k:v for k,v in params.items() if v is not None}
+    sample_rate = MODEL.cfg.sample_rate
     # 生成过程
     print(f"Generating song with description: {description}")
         lyric = lyric.replyricace(f"{s}\n", f"{s} ")
     lyric = lyric.replyricace("\n", "")
     lyric = lyric.replyricace(". ; ", " ; ")
     # 适配prompt
     if prompt_audio is not None:
         print("Using prompt audio for generation")
     else:
+        prompt_audio = op.join(APP_DIR, 'sample/prompt.wav')
     progress(0.0, "Start Generation")
     start = time.time()
+    audio_data = MODEL(lyric, description, prompt_audio, params).cpu().permute(1, 0).float().numpy()
     end = time.time()

codeclm/models/lm_levo.py CHANGED Viewed

@@ -5,6 +5,7 @@ import random
 import torch.nn as nn
 import typing as tp
 import torch.nn.functional as F
 from dataclasses import dataclass
 from codeclm.models.levo import CausalLM, LlamaConfig
 from codeclm.modules.streaming import StreamingModule
@@ -421,7 +422,7 @@ class LmModel(StreamingModule):
         with self.streaming():
             gen_sequence_len = gen_sequence.shape[-1]  # gen_sequence shape is [B, K, S]
             prev_offset = 0
-            for offset in range(start_offset_sequence, gen_sequence_len):
                 # get current sequence (note that the streaming API is providing the caching over previous offsets)
                 curr_sequence = gen_sequence[..., prev_offset:offset]
                 curr_mask = mask[None, ..., prev_offset:offset].expand(B, -1, -1)

 import torch.nn as nn
 import typing as tp
 import torch.nn.functional as F
+from tqdm import tqdm
 from dataclasses import dataclass
 from codeclm.models.levo import CausalLM, LlamaConfig
 from codeclm.modules.streaming import StreamingModule
         with self.streaming():
             gen_sequence_len = gen_sequence.shape[-1]  # gen_sequence shape is [B, K, S]
             prev_offset = 0
+            for offset in tqdm(range(start_offset_sequence, gen_sequence_len)):
                 # get current sequence (note that the streaming API is providing the caching over previous offsets)
                 curr_sequence = gen_sequence[..., prev_offset:offset]
                 curr_mask = mask[None, ..., prev_offset:offset].expand(B, -1, -1)

sample/lyric.jsonl CHANGED Viewed

@@ -1 +1 @@

- {"idx": "01_节奏蓝调", "descriptions": "female, dark, pop, sad, piano and drums, the bpm is 125.", "gt_lyric": "[intro-short] ; [verse] 夜晚的街灯闪烁.我漫步在熟悉的角落.回忆像潮水般涌来.你的笑容如此清晰.在心头无法抹去.那些曾经的甜蜜.如今只剩我独自回忆 ; [bridge] 手机屏幕亮起.是你发来的消息.简单的几个字.却让我泪流满面.曾经的拥抱温暖.如今却变得遥远.我多想回到从前.重新拥有你的陪伴 ; [chorus] 回忆的温度还在.你却已不在.我的心被爱填满.却又被思念刺痛.R&B的节奏奏响.我的心却在流浪.没有你的日子.我该如何继续向前 ; [outro-short]", "prompt_audio_path": "sample/~~19_2-又是一天过去，烦恼如影随形10s~~.wav"}

+ {"idx": "01_节奏蓝调", "descriptions": "female, dark, pop, sad, piano and drums, the bpm is 125.", "gt_lyric": "[intro-short] ; [verse] 夜晚的街灯闪烁.我漫步在熟悉的角落.回忆像潮水般涌来.你的笑容如此清晰.在心头无法抹去.那些曾经的甜蜜.如今只剩我独自回忆 ; [bridge] 手机屏幕亮起.是你发来的消息.简单的几个字.却让我泪流满面.曾经的拥抱温暖.如今却变得遥远.我多想回到从前.重新拥有你的陪伴 ; [chorus] 回忆的温度还在.你却已不在.我的心被爱填满.却又被思念刺痛.R&B的节奏奏响.我的心却在流浪.没有你的日子.我该如何继续向前 ; [outro-short]", "prompt_audio_path": "sample/prompt.wav"}

sample/{19_2-又是一天过去，烦恼如影随形10s.wav → prompt.wav} RENAMED Viewed

File without changes