Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	cn lyrics example
Browse files- app.py +33 -30
 - diffrhythm/infer/infer_utils.py +1 -0
 - src/prompt/rap_cn.wav +0 -0
 - src/prompt/rap_en.wav +0 -0
 
    	
        app.py
    CHANGED
    
    | 
         @@ -29,15 +29,18 @@ device='cuda' 
     | 
|
| 29 | 
         
             
            cfm, tokenizer, muq, vae = prepare_model(device)
         
     | 
| 30 | 
         
             
            cfm = torch.compile(cfm)
         
     | 
| 31 | 
         | 
| 32 | 
         
            -
            @spaces.GPU
         
     | 
| 33 | 
         
             
            def infer_music(lrc, ref_audio_path, seed=42, randomize_seed=False, steps=32, file_type='wav', max_frames=2048, device='cuda'):
         
     | 
| 34 | 
         | 
| 35 | 
         
             
                if randomize_seed:
         
     | 
| 36 | 
         
             
                    seed = random.randint(0, MAX_SEED)
         
     | 
| 37 | 
         
             
                torch.manual_seed(seed)
         
     | 
| 38 | 
         
             
                sway_sampling_coef = -1 if steps < 32 else None
         
     | 
| 39 | 
         
            -
                 
     | 
| 40 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 41 | 
         
             
                negative_style_prompt = get_negative_style_prompt(device)
         
     | 
| 42 | 
         
             
                latent_prompt = get_reference_latent(device, max_frames)
         
     | 
| 43 | 
         
             
                generated_song = inference(cfm_model=cfm, 
         
     | 
| 
         @@ -169,7 +172,7 @@ with gr.Blocks(css=css) as demo: 
     | 
|
| 169 | 
         
             
                        with gr.Row():
         
     | 
| 170 | 
         
             
                            with gr.Column():
         
     | 
| 171 | 
         
             
                                lrc = gr.Textbox(
         
     | 
| 172 | 
         
            -
                                    label=" 
     | 
| 173 | 
         
             
                                    placeholder="Input the full lyrics",
         
     | 
| 174 | 
         
             
                                    lines=12,
         
     | 
| 175 | 
         
             
                                    max_lines=50,
         
     | 
| 
         @@ -181,26 +184,23 @@ with gr.Blocks(css=css) as demo: 
     | 
|
| 181 | 
         
             
                            with gr.Column():
         
     | 
| 182 | 
         
             
                                with gr.Accordion("Best Practices Guide", open=True):
         
     | 
| 183 | 
         
             
                                    gr.Markdown("""
         
     | 
| 184 | 
         
            -
             
     | 
| 185 | 
         
            -
             
     | 
| 186 | 
         
            -
             
     | 
| 187 | 
         
            -
             
     | 
| 188 | 
         
            -
             
     | 
| 189 | 
         
            -
             
     | 
| 190 | 
         
            -
             
     | 
| 191 | 
         
            -
             
     | 
| 192 | 
         
            -
             
     | 
| 193 | 
         
            -
             
     | 
| 194 | 
         
            -
             
     | 
| 195 | 
         
            -
             
     | 
| 196 | 
         
            -
             
     | 
| 197 | 
         
            -
             
     | 
| 198 | 
         
            -
             
     | 
| 199 | 
         
            -
             
     | 
| 200 | 
         
            -
             
     | 
| 201 | 
         
            -
                                    4. **Supported Languages**
         
     | 
| 202 | 
         
            -
                                    - **Chinese and English**
         
     | 
| 203 | 
         
            -
                                    - More languages comming soon
         
     | 
| 204 | 
         
             
                                    """)
         
     | 
| 205 | 
         | 
| 206 | 
         
             
                                lyrics_btn = gr.Button("Generate", variant="primary")
         
     | 
| 
         @@ -239,23 +239,26 @@ with gr.Blocks(css=css) as demo: 
     | 
|
| 239 | 
         
             
                                ["./src/prompt/classic_en.wav"],
         
     | 
| 240 | 
         
             
                                ["./src/prompt/jazz_cn.wav"],
         
     | 
| 241 | 
         
             
                                ["./src/prompt/jazz_en.wav"],
         
     | 
| 
         | 
|
| 
         | 
|
| 242 | 
         
             
                                ["./src/prompt/default.wav"]
         
     | 
| 243 | 
         
             
                            ],
         
     | 
| 244 | 
         
             
                            inputs=[audio_prompt],  
         
     | 
| 245 | 
         
             
                            label="Audio Examples",
         
     | 
| 246 | 
         
            -
                            examples_per_page= 
     | 
| 247 | 
         
             
                            elem_id="audio-examples-container" 
         
     | 
| 248 | 
         
             
                        )
         
     | 
| 249 | 
         | 
| 250 | 
         
             
                        gr.Examples(
         
     | 
| 251 | 
         
             
                            examples=[
         
     | 
| 252 | 
         
             
                                ["""[00:10.00]Moonlight spills through broken blinds\n[00:13.20]Your shadow dances on the dashboard shrine\n[00:16.85]Neon ghosts in gasoline rain\n[00:20.40]I hear your laughter down the midnight train\n[00:24.15]Static whispers through frayed wires\n[00:27.65]Guitar strings hum our cathedral choirs\n[00:31.30]Flicker screens show reruns of June\n[00:34.90]I'm drowning in this mercury lagoon\n[00:38.55]Electric veins pulse through concrete skies\n[00:42.10]Your name echoes in the hollow where my heartbeat lies\n[00:45.75]We're satellites trapped in parallel light\n[00:49.25]Burning through the atmosphere of endless night\n[01:00.00]Dusty vinyl spins reverse\n[01:03.45]Our polaroid timeline bleeds through the verse\n[01:07.10]Telescope aimed at dead stars\n[01:10.65]Still tracing constellations through prison bars\n[01:14.30]Electric veins pulse through concrete skies\n[01:17.85]Your name echoes in the hollow where my heartbeat lies\n[01:21.50]We're satellites trapped in parallel light\n[01:25.05]Burning through the atmosphere of endless night\n[02:10.00]Clockwork gears grind moonbeams to rust\n[02:13.50]Our fingerprint smudged by interstellar dust\n[02:17.15]Velvet thunder rolls through my veins\n[02:20.70]Chasing phantom trains through solar plane\n[02:24.35]Electric veins pulse through concrete skies\n[02:27.90]Your name echoes in the hollow where my heartbeat lies"""],
         
     | 
| 253 | 
         
            -
                                ["""[00:04.34]Tell me that I'm special\n[00:06.57]Tell me I look pretty\n[00:08.46]Tell me I'm a little angel\n[00:10.58]Sweetheart of your city\n[00:13.64]Say what I'm dying to hear\n[00:17.35]Cause I'm dying to hear you\n[00:20.86]Tell me I'm that new thing\n[00:22.93]Tell me that I'm relevant\n[00:24.96]Tell me that I got a big heart\n[00:27.04]Then back it up with evidence\n[00:29.94]I need it and I don't know why\n[00:34.28]This late at night\n[00:36.32]Isn't it lonely\n[00:39.24]I'd do anything to make you want me\n[00:43.40]I'd give it all up if you told me\n[00:47.42]That I'd be\n[00:49.43]The number one girl in your eyes\n[00:52.85]Your one and only\n[00:55.74]So what's it gon' take for you to want me\n[00:59.78]I'd give it all up if you told me\n[01:03.89]That I'd be\n[01:05.94]The number one girl in your eyes\n[01:11.34]Tell me I'm going real big places\n[01:14.32]Down to earth so friendly\n[01:16.30]And even through all the phases\n[01:18.46]Tell me you accept me\n[01:21.56]Well that's all I'm dying to hear\n[01:25.30]Yeah I'm dying to hear you\n[01:28.91]Tell me that you need me\n[01:30.85]Tell me that I'm loved\n[01:32.90]Tell me that I'm worth it"""]
         
     | 
| 
         | 
|
| 254 | 
         
             
                            ],
         
     | 
| 255 | 
         | 
| 256 | 
         
             
                            inputs=[lrc],
         
     | 
| 257 | 
         
             
                            label="Lrc Examples",
         
     | 
| 258 | 
         
            -
                            examples_per_page= 
     | 
| 259 | 
         
             
                            elem_id="lrc-examples-container",
         
     | 
| 260 | 
         
             
                        )
         
     | 
| 261 | 
         | 
| 
         @@ -270,7 +273,7 @@ with gr.Blocks(css=css) as demo: 
     | 
|
| 270 | 
         
             
                                    gr.Markdown("### Method 1: Generate from Theme")
         
     | 
| 271 | 
         
             
                                    theme = gr.Textbox(label="theme", placeholder="Enter song theme, e.g: Love and Heartbreak")
         
     | 
| 272 | 
         
             
                                    tags_gen = gr.Textbox(label="tags", placeholder="Enter song tags, e.g: pop confidence healing")
         
     | 
| 273 | 
         
            -
                                    language = gr.Radio([" 
     | 
| 274 | 
         
             
                                    gen_from_theme_btn = gr.Button("Generate LRC (From Theme)", variant="primary")
         
     | 
| 275 | 
         | 
| 276 | 
         
             
                                    gr.Examples(
         
     | 
| 
         @@ -283,7 +286,7 @@ with gr.Blocks(css=css) as demo: 
     | 
|
| 283 | 
         
             
                                            [
         
     | 
| 284 | 
         
             
                                                "Heroic Epic", 
         
     | 
| 285 | 
         
             
                                                "choir orchestral powerful",
         
     | 
| 286 | 
         
            -
                                                " 
     | 
| 287 | 
         
             
                                            ]
         
     | 
| 288 | 
         
             
                                        ],
         
     | 
| 289 | 
         
             
                                        inputs=[theme, tags_gen, language],
         
     | 
| 
         @@ -321,7 +324,7 @@ with gr.Blocks(css=css) as demo: 
     | 
|
| 321 | 
         | 
| 322 | 
         
             
                            with gr.Column():
         
     | 
| 323 | 
         
             
                                lrc_output = gr.Textbox(
         
     | 
| 324 | 
         
            -
                                    label="Generated LRC 
     | 
| 325 | 
         
             
                                    placeholder="Timed lyrics will appear here",
         
     | 
| 326 | 
         
             
                                    lines=57,
         
     | 
| 327 | 
         
             
                                    elem_classes="lrc-output",
         
     | 
| 
         | 
|
| 29 | 
         
             
            cfm, tokenizer, muq, vae = prepare_model(device)
         
     | 
| 30 | 
         
             
            cfm = torch.compile(cfm)
         
     | 
| 31 | 
         | 
| 32 | 
         
            +
            @spaces.GPU(duration=20)
         
     | 
| 33 | 
         
             
            def infer_music(lrc, ref_audio_path, seed=42, randomize_seed=False, steps=32, file_type='wav', max_frames=2048, device='cuda'):
         
     | 
| 34 | 
         | 
| 35 | 
         
             
                if randomize_seed:
         
     | 
| 36 | 
         
             
                    seed = random.randint(0, MAX_SEED)
         
     | 
| 37 | 
         
             
                torch.manual_seed(seed)
         
     | 
| 38 | 
         
             
                sway_sampling_coef = -1 if steps < 32 else None
         
     | 
| 39 | 
         
            +
                try:
         
     | 
| 40 | 
         
            +
                    lrc_prompt, start_time = get_lrc_token(lrc, tokenizer, device)
         
     | 
| 41 | 
         
            +
                    style_prompt = get_style_prompt(muq, ref_audio_path)
         
     | 
| 42 | 
         
            +
                except Exception as e:
         
     | 
| 43 | 
         
            +
                    raise gr.Error(f"Error: {str(e)}")
         
     | 
| 44 | 
         
             
                negative_style_prompt = get_negative_style_prompt(device)
         
     | 
| 45 | 
         
             
                latent_prompt = get_reference_latent(device, max_frames)
         
     | 
| 46 | 
         
             
                generated_song = inference(cfm_model=cfm, 
         
     | 
| 
         | 
|
| 172 | 
         
             
                        with gr.Row():
         
     | 
| 173 | 
         
             
                            with gr.Column():
         
     | 
| 174 | 
         
             
                                lrc = gr.Textbox(
         
     | 
| 175 | 
         
            +
                                    label="Lyrics",
         
     | 
| 176 | 
         
             
                                    placeholder="Input the full lyrics",
         
     | 
| 177 | 
         
             
                                    lines=12,
         
     | 
| 178 | 
         
             
                                    max_lines=50,
         
     | 
| 
         | 
|
| 184 | 
         
             
                            with gr.Column():
         
     | 
| 185 | 
         
             
                                with gr.Accordion("Best Practices Guide", open=True):
         
     | 
| 186 | 
         
             
                                    gr.Markdown("""
         
     | 
| 187 | 
         
            +
            1. **Lyrics Format Requirements**
         
     | 
| 188 | 
         
            +
                - Each line must follow: `[mm:ss.xx]Lyric content`
         
     | 
| 189 | 
         
            +
                - Example of valid format:
         
     | 
| 190 | 
         
            +
                ``` 
         
     | 
| 191 | 
         
            +
                [00:10.00]Moonlight spills through broken blinds
         
     | 
| 192 | 
         
            +
                [00:13.20]Your shadow dances on the dashboard shrine
         
     | 
| 193 | 
         
            +
                ```
         
     | 
| 194 | 
         
            +
            2. **Generation Duration Limits**
         
     | 
| 195 | 
         
            +
                - Current version supports maximum **95 seconds** of music generation
         
     | 
| 196 | 
         
            +
                - Total timestamps should not exceed 01:35.00 (95 seconds)
         
     | 
| 197 | 
         
            +
            3. **Audio Prompt Requirements**
         
     | 
| 198 | 
         
            +
                - Reference audio should be ≥ 1 second, audio >10 seconds will be randomly clipped into 10 seconds
         
     | 
| 199 | 
         
            +
                - For optimal results, the 10-second clips should be carefully selected
         
     | 
| 200 | 
         
            +
                - Shorter clips may lead to incoherent generation
         
     | 
| 201 | 
         
            +
            4. **Supported Languages**
         
     | 
| 202 | 
         
            +
                - **Chinese and English**
         
     | 
| 203 | 
         
            +
                - More languages comming soon
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 204 | 
         
             
                                    """)
         
     | 
| 205 | 
         | 
| 206 | 
         
             
                                lyrics_btn = gr.Button("Generate", variant="primary")
         
     | 
| 
         | 
|
| 239 | 
         
             
                                ["./src/prompt/classic_en.wav"],
         
     | 
| 240 | 
         
             
                                ["./src/prompt/jazz_cn.wav"],
         
     | 
| 241 | 
         
             
                                ["./src/prompt/jazz_en.wav"],
         
     | 
| 242 | 
         
            +
                                ["./src/prompt/rap_cn.wav"],
         
     | 
| 243 | 
         
            +
                                ["./src/prompt/rap_en.wav"],
         
     | 
| 244 | 
         
             
                                ["./src/prompt/default.wav"]
         
     | 
| 245 | 
         
             
                            ],
         
     | 
| 246 | 
         
             
                            inputs=[audio_prompt],  
         
     | 
| 247 | 
         
             
                            label="Audio Examples",
         
     | 
| 248 | 
         
            +
                            examples_per_page=13,
         
     | 
| 249 | 
         
             
                            elem_id="audio-examples-container" 
         
     | 
| 250 | 
         
             
                        )
         
     | 
| 251 | 
         | 
| 252 | 
         
             
                        gr.Examples(
         
     | 
| 253 | 
         
             
                            examples=[
         
     | 
| 254 | 
         
             
                                ["""[00:10.00]Moonlight spills through broken blinds\n[00:13.20]Your shadow dances on the dashboard shrine\n[00:16.85]Neon ghosts in gasoline rain\n[00:20.40]I hear your laughter down the midnight train\n[00:24.15]Static whispers through frayed wires\n[00:27.65]Guitar strings hum our cathedral choirs\n[00:31.30]Flicker screens show reruns of June\n[00:34.90]I'm drowning in this mercury lagoon\n[00:38.55]Electric veins pulse through concrete skies\n[00:42.10]Your name echoes in the hollow where my heartbeat lies\n[00:45.75]We're satellites trapped in parallel light\n[00:49.25]Burning through the atmosphere of endless night\n[01:00.00]Dusty vinyl spins reverse\n[01:03.45]Our polaroid timeline bleeds through the verse\n[01:07.10]Telescope aimed at dead stars\n[01:10.65]Still tracing constellations through prison bars\n[01:14.30]Electric veins pulse through concrete skies\n[01:17.85]Your name echoes in the hollow where my heartbeat lies\n[01:21.50]We're satellites trapped in parallel light\n[01:25.05]Burning through the atmosphere of endless night\n[02:10.00]Clockwork gears grind moonbeams to rust\n[02:13.50]Our fingerprint smudged by interstellar dust\n[02:17.15]Velvet thunder rolls through my veins\n[02:20.70]Chasing phantom trains through solar plane\n[02:24.35]Electric veins pulse through concrete skies\n[02:27.90]Your name echoes in the hollow where my heartbeat lies"""],
         
     | 
| 255 | 
         
            +
                                ["""[00:04.34]Tell me that I'm special\n[00:06.57]Tell me I look pretty\n[00:08.46]Tell me I'm a little angel\n[00:10.58]Sweetheart of your city\n[00:13.64]Say what I'm dying to hear\n[00:17.35]Cause I'm dying to hear you\n[00:20.86]Tell me I'm that new thing\n[00:22.93]Tell me that I'm relevant\n[00:24.96]Tell me that I got a big heart\n[00:27.04]Then back it up with evidence\n[00:29.94]I need it and I don't know why\n[00:34.28]This late at night\n[00:36.32]Isn't it lonely\n[00:39.24]I'd do anything to make you want me\n[00:43.40]I'd give it all up if you told me\n[00:47.42]That I'd be\n[00:49.43]The number one girl in your eyes\n[00:52.85]Your one and only\n[00:55.74]So what's it gon' take for you to want me\n[00:59.78]I'd give it all up if you told me\n[01:03.89]That I'd be\n[01:05.94]The number one girl in your eyes\n[01:11.34]Tell me I'm going real big places\n[01:14.32]Down to earth so friendly\n[01:16.30]And even through all the phases\n[01:18.46]Tell me you accept me\n[01:21.56]Well that's all I'm dying to hear\n[01:25.30]Yeah I'm dying to hear you\n[01:28.91]Tell me that you need me\n[01:30.85]Tell me that I'm loved\n[01:32.90]Tell me that I'm worth it"""],
         
     | 
| 256 | 
         
            +
                                ["""[00:04.27]只因你太美 baby\n[00:08.95]只因你实在是太美 baby\n[00:13.99]只因你太美 baby\n[00:18.89]迎面走来的你让我如此蠢蠢欲动\n[00:20.88]这种感觉我从未有\n[00:21.79]Cause I got a crush on you who you\n[00:25.74]你是我的我是你的谁\n[00:28.09]再多一眼看一眼就会爆炸\n[00:30.31]再近一点靠近点快被融化\n[00:32.49]想要把你占为己有 baby\n[00:34.60]不管走到哪里\n[00:35.44]都会想起的人是你 you you\n[00:38.12]我应该拿你怎样\n[00:39.61]Uh 所有人都在看着你\n[00:42.36]我的心总是不安\n[00:44.18]Oh 我现在已病入膏肓\n[00:46.63]Eh oh\n[00:47.84]难道真的因你而疯狂吗\n[00:51.57]我本来不是这种人\n[00:53.59]因你变成奇怪的人\n[00:55.77]第一次呀变成这样的我\n[01:01.23]不管我怎么去否认\n[01:03.21]只因你太美 baby\n[01:11.46]只因你实在是太美 baby\n[01:16.75]只因你太美 baby\n[01:21.09]Oh eh oh\n[01:22.82]现在确认地告诉我\n[01:25.26]Oh eh oh\n[01:27.31]你到底属于谁\n[01:29.98]Oh eh oh\n[01:31.70]现在确认地告诉我\n[01:34.45]Oh eh oh\n[01:36.35]你到底属于谁\n[01:37.65]就是现在告诉我\n[01:40.00]跟着那节奏 缓缓 make wave\n"""]
         
     | 
| 257 | 
         
             
                            ],
         
     | 
| 258 | 
         | 
| 259 | 
         
             
                            inputs=[lrc],
         
     | 
| 260 | 
         
             
                            label="Lrc Examples",
         
     | 
| 261 | 
         
            +
                            examples_per_page=3,
         
     | 
| 262 | 
         
             
                            elem_id="lrc-examples-container",
         
     | 
| 263 | 
         
             
                        )
         
     | 
| 264 | 
         | 
| 
         | 
|
| 273 | 
         
             
                                    gr.Markdown("### Method 1: Generate from Theme")
         
     | 
| 274 | 
         
             
                                    theme = gr.Textbox(label="theme", placeholder="Enter song theme, e.g: Love and Heartbreak")
         
     | 
| 275 | 
         
             
                                    tags_gen = gr.Textbox(label="tags", placeholder="Enter song tags, e.g: pop confidence healing")
         
     | 
| 276 | 
         
            +
                                    language = gr.Radio(["cn", "en"], label="Language", value="en")
         
     | 
| 277 | 
         
             
                                    gen_from_theme_btn = gr.Button("Generate LRC (From Theme)", variant="primary")
         
     | 
| 278 | 
         | 
| 279 | 
         
             
                                    gr.Examples(
         
     | 
| 
         | 
|
| 286 | 
         
             
                                            [
         
     | 
| 287 | 
         
             
                                                "Heroic Epic", 
         
     | 
| 288 | 
         
             
                                                "choir orchestral powerful",
         
     | 
| 289 | 
         
            +
                                                "cn"
         
     | 
| 290 | 
         
             
                                            ]
         
     | 
| 291 | 
         
             
                                        ],
         
     | 
| 292 | 
         
             
                                        inputs=[theme, tags_gen, language],
         
     | 
| 
         | 
|
| 324 | 
         | 
| 325 | 
         
             
                            with gr.Column():
         
     | 
| 326 | 
         
             
                                lrc_output = gr.Textbox(
         
     | 
| 327 | 
         
            +
                                    label="Generated LRC",
         
     | 
| 328 | 
         
             
                                    placeholder="Timed lyrics will appear here",
         
     | 
| 329 | 
         
             
                                    lines=57,
         
     | 
| 330 | 
         
             
                                    elem_classes="lrc-output",
         
     | 
    	
        diffrhythm/infer/infer_utils.py
    CHANGED
    
    | 
         @@ -56,6 +56,7 @@ def get_style_prompt(model, wav_path): 
     | 
|
| 56 | 
         
             
                audio, _ = librosa.load(wav_path, sr=24000)
         
     | 
| 57 | 
         
             
                audio_len = librosa.get_duration(y=audio, sr=24000)
         
     | 
| 58 | 
         | 
| 
         | 
|
| 59 | 
         
             
                assert audio_len >= 1, "Input audio length shorter than 1 second"
         
     | 
| 60 | 
         | 
| 61 | 
         
             
                if audio_len > 10:
         
     | 
| 
         | 
|
| 56 | 
         
             
                audio, _ = librosa.load(wav_path, sr=24000)
         
     | 
| 57 | 
         
             
                audio_len = librosa.get_duration(y=audio, sr=24000)
         
     | 
| 58 | 
         | 
| 59 | 
         
            +
             
     | 
| 60 | 
         
             
                assert audio_len >= 1, "Input audio length shorter than 1 second"
         
     | 
| 61 | 
         | 
| 62 | 
         
             
                if audio_len > 10:
         
     | 
    	
        src/prompt/rap_cn.wav
    ADDED
    
    | 
         Binary file (441 kB). View file 
     | 
| 
         | 
    	
        src/prompt/rap_en.wav
    ADDED
    
    | 
         Binary file (882 kB). View file 
     | 
| 
         |