SeedVR2-3B

Paused

App Files Files Community

Aduc-sdr commited on 16 days ago

Commit

7af19da

verified ·

1 Parent(s): 5c10f27

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -73

app.py CHANGED Viewed

@@ -28,11 +28,8 @@ if not os.path.exists(repo_dir_name):
 # --- ETAPA 2: Configuração dos Caminhos ---
 # Mudar para o diretório do repositório e adicioná-lo ao path do Python.
-# Mudar para o diretório do repositório. ESSENCIAL para caminhos de arquivos relativos.
 os.chdir(repo_dir_name)
 print(f"Diretório de trabalho alterado para: {os.getcwd()}")
-# Adicionar o diretório ao sys.path. ESSENCIAL para as importações de módulos.
 sys.path.insert(0, os.path.abspath('.'))
 print(f"Diretório atual adicionado ao sys.path para importações.")
@@ -43,7 +40,6 @@ import torch
 from pathlib import Path
 from urllib.parse import urlparse
 from torch.hub import download_url_to_file, get_dir
-import shlex
 # Função de download do original
 def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
@@ -72,7 +68,6 @@ pretrain_model_url = {
 # Criar diretório de checkpoints e baixar modelos
 ckpt_dir = Path('./ckpts')
 ckpt_dir.mkdir(exist_ok=True)
 for key, url in pretrain_model_url.items():
     filename = os.path.basename(url)
     model_dir = './ckpts' if key in ['vae', 'dit'] else '.'
@@ -84,23 +79,27 @@ for key, url in pretrain_model_url.items():
 torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/23_1_lq.mp4', '01.mp4')
 torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/28_1_lq.mp4', '02.mp4')
 torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/2_1_lq.mp4', '03.mp4')
-torch.hub.download_url_to_file('https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl', 'apex-0.1-cp310-cp310-linux_x86_64.whl')
-# Instalar dependências de forma robusta
 python_executable = sys.executable
-subprocess.run([python_executable, "-m", "pip", "install", "flash-attn", "--no-build-isolation"], env={**os.environ, "FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, check=True)
-apex_wheel_path = "apex-0.1-cp310-cp310-linux_x86_64.whl"
-if os.path.exists(apex_wheel_path):
-    print("Instalando o Apex a partir do arquivo wheel...")
-    subprocess.run([python_executable, "-m", "pip", "install", "--force-reinstall", "--no-cache-dir", apex_wheel_path], check=True)
-    print("✅ Configuração do Apex concluída.")
-else:
-    print(f"AVISO: O arquivo wheel do Apex '{apex_wheel_path}' não foi encontrado no repositório clonado.")
-# --- ETAPA 4: Execução do Código Principal da Aplicação ---
-# Agora que o ambiente está perfeito, importamos e executamos o resto do script.
 import mediapy
 from einops import rearrange
 from omegaconf import OmegaConf
@@ -130,6 +129,8 @@ os.environ["MASTER_ADDR"] = "127.0.0.1"
 os.environ["MASTER_PORT"] = "12355"
 os.environ["RANK"] = str(0)
 os.environ["WORLD_SIZE"] = str(1)
 if os.path.exists("projects/video_diffusion_sr/color_fix.py"):
     from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
@@ -158,122 +159,80 @@ def configure_runner(sp_size):
 def generation_step(runner, text_embeds_dict, cond_latents):
     def _move_to_cuda(x):
         return [i.to(torch.device("cuda")) for i in x]
     noises = [torch.randn_like(latent) for latent in cond_latents]
     aug_noises = [torch.randn_like(latent) for latent in cond_latents]
     noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0)
     noises, aug_noises, cond_latents = list(map(_move_to_cuda, (noises, aug_noises, cond_latents)))
     def _add_noise(x, aug_noise):
         t = torch.tensor([1000.0], device=torch.device("cuda")) * 0.1
         shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None]
         t = runner.timestep_transform(t, shape)
         return runner.schedule.forward(x, aug_noise, t)
     conditions = [runner.get_condition(noise, task="sr", latent_blur=_add_noise(latent_blur, aug_noise)) for noise, aug_noise, latent_blur in zip(noises, aug_noises, cond_latents)]
     with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
         video_tensors = runner.inference(noises=noises, conditions=conditions, dit_offload=False, **text_embeds_dict)
     return [rearrange(video, "c t h w -> t c h w") for video in video_tensors]
 def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1):
-    if video_path is None:
-        return None, None, None
     runner = configure_runner(1)
     def _extract_text_embeds():
         positive_prompts_embeds = []
         for _ in original_videos_local:
-            positive_prompts_embeds.append({
-                "texts_pos": [torch.load('pos_emb.pt')],
-                "texts_neg": [torch.load('neg_emb.pt')]
-            })
         gc.collect(); torch.cuda.empty_cache()
         return positive_prompts_embeds
-    runner.config.diffusion.cfg.scale = cfg_scale
-    runner.config.diffusion.cfg.rescale = cfg_rescale
-    runner.config.diffusion.timesteps.sampling.steps = sample_steps
     runner.configure_diffusion()
     set_seed(int(seed) % (2**32), same_across_ranks=True)
     os.makedirs("output", exist_ok=True)
     original_videos = [os.path.basename(video_path)]
     original_videos_local = partition_by_size(original_videos, batch_size)
     positive_prompts_embeds = _extract_text_embeds()
-    video_transform = Compose([
-        NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False),
-        Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
-        DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w"),
-    ])
     for videos, text_embeds in tqdm(zip(original_videos_local, positive_prompts_embeds)):
         media_type, _ = mimetypes.guess_type(video_path)
         is_video = media_type and media_type.startswith("video")
         if is_video:
-            video, _, _ = read_video(video_path, output_format="TCHW")
-            video = video[:121] / 255.0
-            output_dir = os.path.join("output", f"{uuid.uuid4()}.mp4")
-        else: # Assumimos que é uma imagem
-            video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0)
-            output_dir = os.path.join("output", f"{uuid.uuid4()}.png")
         cond_latents = [video_transform(video.to("cuda"))]
         ori_lengths = [v.size(1) for v in cond_latents]
         cond_latents = runner.vae_encode(cond_latents)
         for key in ["texts_pos", "texts_neg"]:
-            for i, emb in enumerate(text_embeds[key]):
-                text_embeds[key][i] = emb.to("cuda")
         samples = generation_step(runner, text_embeds, cond_latents=cond_latents)
         del cond_latents
         for sample, ori_length in zip(samples, ori_lengths):
             sample = sample[:ori_length].to("cpu")
             sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round().to(torch.uint8).numpy()
-            if is_video:
-                mediapy.write_video(output_dir, sample, fps=fps_out)
-            else:
-                mediapy.write_image(output_dir, sample[0])
         gc.collect(); torch.cuda.empty_cache()
         return (None, output_dir, output_dir) if is_video else (output_dir, None, output_dir)
 with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
     gr.HTML(f"""
-        <div style='text-align:center; margin-bottom: 10px;'>
-            <img src='file/{os.path.abspath("assets/seedvr_logo.png")}' style='height:40px;' alt='SeedVR logo'/>
-        </div>
         <p><b>Demonstração oficial do Gradio</b> para
         <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>
         <b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
         🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.
         </p>
     """)
     with gr.Row():
         input_file = gr.File(label="Carregar imagem ou vídeo")
         with gr.Column():
             seed = gr.Number(label="Seed", value=666)
             fps = gr.Number(label="FPS de Saída (para vídeo)", value=24)
     run_button = gr.Button("Executar")
     with gr.Row():
         output_image = gr.Image(label="Imagem de Saída")
         output_video = gr.Video(label="Vídeo de Saída")
     download_link = gr.File(label="Baixar o resultado")
     run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
     gr.Examples(
         examples=[
             ["01.mp4", 4, 24],
@@ -282,7 +241,6 @@ with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
         ],
         inputs=[input_file, seed, fps]
     )
     gr.HTML("""
         <hr>
         <p>Se você achou o SeedVR útil, por favor ⭐ o
@@ -296,5 +254,4 @@ with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
         <h4>Limitações</h4>
         <p>Pode falhar em degradações pesadas ou em clipes AIGC com pouco movimento, causando excesso de nitidez ou restauração inadequada.</p>
     """)
 demo.queue().launch(share=True)

 # --- ETAPA 2: Configuração dos Caminhos ---
 # Mudar para o diretório do repositório e adicioná-lo ao path do Python.
 os.chdir(repo_dir_name)
 print(f"Diretório de trabalho alterado para: {os.getcwd()}")
 sys.path.insert(0, os.path.abspath('.'))
 print(f"Diretório atual adicionado ao sys.path para importações.")
 from pathlib import Path
 from urllib.parse import urlparse
 from torch.hub import download_url_to_file, get_dir
 # Função de download do original
 def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
 # Criar diretório de checkpoints e baixar modelos
 ckpt_dir = Path('./ckpts')
 ckpt_dir.mkdir(exist_ok=True)
 for key, url in pretrain_model_url.items():
     filename = os.path.basename(url)
     model_dir = './ckpts' if key in ['vae', 'dit'] else '.'
 torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/23_1_lq.mp4', '01.mp4')
 torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/28_1_lq.mp4', '02.mp4')
 torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/2_1_lq.mp4', '03.mp4')
+# --- REFINAMENTO: Compilar dependências do zero para a GPU L40S (Ada Lovelace) ---
 python_executable = sys.executable
+print("Instalando flash-attn compilando do zero...")
+# Força a reinstalação a partir do zero para garantir que seja compilado para a GPU atual
+subprocess.run([python_executable, "-m", "pip", "install", "--force-reinstall", "--no-cache-dir", "flash-attn"], check=True)
+print("Clonando e compilando o Apex do zero...")
+if not os.path.exists("apex"):
+    subprocess.run("git clone https://github.com/NVIDIA/apex", shell=True, check=True)
+# Instala o Apex a partir da fonte clonada, o que força a compilação para a GPU L40S
+# As flags --cpp_ext e --cuda_ext são essenciais para a compilação
+subprocess.run(
+    [python_executable, "-m", "pip", "install", "-v", "--disable-pip-version-check", "--no-cache-dir", "--global-option=--cpp_ext", "--global-option=--cuda_ext", "./apex"],
+    check=True
+)
+print("✅ Configuração do Apex concluída.")
+# --- ETAPA 4: Execução do Código Principal da Aplicação ---
 import mediapy
 from einops import rearrange
 from omegaconf import OmegaConf
 os.environ["MASTER_PORT"] = "12355"
 os.environ["RANK"] = str(0)
 os.environ["WORLD_SIZE"] = str(1)
+# Adiciona uma variável de ambiente que pode ajudar o PyTorch a debugar erros de CUDA
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
 if os.path.exists("projects/video_diffusion_sr/color_fix.py"):
     from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
 def generation_step(runner, text_embeds_dict, cond_latents):
     def _move_to_cuda(x):
         return [i.to(torch.device("cuda")) for i in x]
     noises = [torch.randn_like(latent) for latent in cond_latents]
     aug_noises = [torch.randn_like(latent) for latent in cond_latents]
     noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0)
     noises, aug_noises, cond_latents = list(map(_move_to_cuda, (noises, aug_noises, cond_latents)))
     def _add_noise(x, aug_noise):
         t = torch.tensor([1000.0], device=torch.device("cuda")) * 0.1
         shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None]
         t = runner.timestep_transform(t, shape)
         return runner.schedule.forward(x, aug_noise, t)
     conditions = [runner.get_condition(noise, task="sr", latent_blur=_add_noise(latent_blur, aug_noise)) for noise, aug_noise, latent_blur in zip(noises, aug_noises, cond_latents)]
     with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
         video_tensors = runner.inference(noises=noises, conditions=conditions, dit_offload=False, **text_embeds_dict)
     return [rearrange(video, "c t h w -> t c h w") for video in video_tensors]
+@spaces.GPU
 def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1):
+    if video_path is None: return None, None, None
     runner = configure_runner(1)
     def _extract_text_embeds():
         positive_prompts_embeds = []
         for _ in original_videos_local:
+            positive_prompts_embeds.append({"texts_pos": [torch.load('pos_emb.pt')], "texts_neg": [torch.load('neg_emb.pt')]})
         gc.collect(); torch.cuda.empty_cache()
         return positive_prompts_embeds
+    runner.config.diffusion.cfg.scale, runner.config.diffusion.cfg.rescale, runner.config.diffusion.timesteps.sampling.steps = cfg_scale, cfg_rescale, sample_steps
     runner.configure_diffusion()
     set_seed(int(seed) % (2**32), same_across_ranks=True)
     os.makedirs("output", exist_ok=True)
     original_videos = [os.path.basename(video_path)]
     original_videos_local = partition_by_size(original_videos, batch_size)
     positive_prompts_embeds = _extract_text_embeds()
+    video_transform = Compose([NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False), Lambda(lambda x: torch.clamp(x, 0.0, 1.0)), DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w")])
     for videos, text_embeds in tqdm(zip(original_videos_local, positive_prompts_embeds)):
         media_type, _ = mimetypes.guess_type(video_path)
         is_video = media_type and media_type.startswith("video")
         if is_video:
+            video, _, _ = read_video(video_path, output_format="TCHW"); video = video[:121] / 255.0; output_dir = os.path.join("output", f"{uuid.uuid4()}.mp4")
+        else:
+            video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0); output_dir = os.path.join("output", f"{uuid.uuid4()}.png")
         cond_latents = [video_transform(video.to("cuda"))]
         ori_lengths = [v.size(1) for v in cond_latents]
         cond_latents = runner.vae_encode(cond_latents)
         for key in ["texts_pos", "texts_neg"]:
+            for i, emb in enumerate(text_embeds[key]): text_embeds[key][i] = emb.to("cuda")
         samples = generation_step(runner, text_embeds, cond_latents=cond_latents)
         del cond_latents
         for sample, ori_length in zip(samples, ori_lengths):
             sample = sample[:ori_length].to("cpu")
             sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round().to(torch.uint8).numpy()
+            if is_video: mediapy.write_video(output_dir, sample, fps=fps_out)
+            else: mediapy.write_image(output_dir, sample[0])
         gc.collect(); torch.cuda.empty_cache()
         return (None, output_dir, output_dir) if is_video else (output_dir, None, output_dir)
 with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
     gr.HTML(f"""
         <p><b>Demonstração oficial do Gradio</b> para
         <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>
         <b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
         🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.
         </p>
     """)
     with gr.Row():
         input_file = gr.File(label="Carregar imagem ou vídeo")
         with gr.Column():
             seed = gr.Number(label="Seed", value=666)
             fps = gr.Number(label="FPS de Saída (para vídeo)", value=24)
     run_button = gr.Button("Executar")
     with gr.Row():
         output_image = gr.Image(label="Imagem de Saída")
         output_video = gr.Video(label="Vídeo de Saída")
     download_link = gr.File(label="Baixar o resultado")
     run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
     gr.Examples(
         examples=[
             ["01.mp4", 4, 24],
         ],
         inputs=[input_file, seed, fps]
     )
     gr.HTML("""
         <hr>
         <p>Se você achou o SeedVR útil, por favor ⭐ o
         <h4>Limitações</h4>
         <p>Pode falhar em degradações pesadas ou em clipes AIGC com pouco movimento, causando excesso de nitidez ou restauração inadequada.</p>
     """)
 demo.queue().launch(share=True)