SeedVR2-3B

Paused

App Files Files Community

Aduc-sdr commited on Sep 5

Commit

ea7dfbd

verified ·

1 Parent(s): 1d6758a

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -43

app.py CHANGED Viewed

@@ -16,23 +16,28 @@ import subprocess
 import os
 import sys
-# --- Setup: Clone repository and Change Working Directory ---
-# This is the most robust way to ensure all relative paths work correctly.
-# 1. Clone the repository with all its files
-subprocess.run("git lfs install", shell=True, check=True)
 repo_dir_name = "SeedVR2-3B"
 if not os.path.exists(repo_dir_name):
-    print(f"Cloning {repo_dir_name} repository...")
     subprocess.run(f"git clone https://huggingface.co/spaces/ByteDance-Seed/{repo_dir_name}", shell=True, check=True)
-# 2. Change the current working directory to the repository's root
-# CORREÇÃO PRINCIPAL: Isso resolve todos os problemas de caminho relativo.
 os.chdir(repo_dir_name)
-print(f"Changed working directory to: {os.getcwd()}")
-# --- Main Application Code ---
-# Now that we are inside the repo, all imports and file loads will work naturally.
 import torch
 import mediapy
@@ -51,7 +56,7 @@ import torchvision.transforms as T
 from torchvision.transforms import Compose, Lambda, Normalize
 from torchvision.io.video import read_video
-# Imports from the repository (will now work directly)
 from data.image.transforms.divisible_crop import DivisibleCrop
 from data.image.transforms.na_resize import NaResize
 from data.video.transforms.rearrange import Rearrange
@@ -63,21 +68,21 @@ from common.partition import partition_by_size
 from projects.video_diffusion_sr.infer import VideoDiffusionInfer
 from common.distributed.ops import sync_data
-# Check for color_fix utility (using relative path)
 if os.path.exists("projects/video_diffusion_sr/color_fix.py"):
     from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
     use_colorfix = True
 else:
     use_colorfix = False
-    print('Note!!!!!! Color fix is not available!')
-# --- Environment and Dependencies Setup ---
 os.environ["MASTER_ADDR"] = "127.0.0.1"
 os.environ["MASTER_PORT"] = "12355"
 os.environ["RANK"] = str(0)
 os.environ["WORLD_SIZE"] = str(1)
-# Use sys.executable to ensure we use the correct pip
 python_executable = sys.executable
 subprocess.run(
     [python_executable, "-m", "pip", "install", "flash-attn", "--no-build-isolation"],
@@ -88,20 +93,20 @@ subprocess.run(
 apex_wheel_path = "apex-0.1-cp310-cp310-linux_x86_64.whl"
 if os.path.exists(apex_wheel_path):
     subprocess.run([python_executable, "-m", "pip", "install", apex_wheel_path], check=True)
-    print("✅ Apex setup completed.")
-# --- Core Functions ---
 def configure_sequence_parallel(sp_size):
     if sp_size > 1:
         init_sequence_parallel(sp_size)
 def configure_runner(sp_size):
-    # Paths are now simple and relative to the repo root
     config_path = 'configs_3b/main.yaml'
     checkpoint_path = 'ckpts/seedvr2_ema_3b.pth'
-    config = load_config(config_path) # This will now work correctly
     runner = VideoDiffusionInfer(config)
     OmegaConf.set_readonly(runner.config, False)
@@ -120,7 +125,7 @@ def generation_step(runner, text_embeds_dict, cond_latents):
     noises = [torch.randn_like(latent) for latent in cond_latents]
     aug_noises = [torch.randn_like(latent) for latent in cond_latents]
-    print(f"Generating with noise shape: {noises[0].size()}.")
     noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0)
     noises, aug_noises, cond_latents = list(map(_move_to_cuda, (noises, aug_noises, cond_latents)))
     cond_noise_scale = 0.1
@@ -129,7 +134,7 @@ def generation_step(runner, text_embeds_dict, cond_latents):
         t = torch.tensor([1000.0], device=torch.device("cuda")) * cond_noise_scale
         shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None]
         t = runner.timestep_transform(t, shape)
-        print(f"Timestep shifting from {1000.0 * cond_noise_scale} to {t}.")
         x = runner.schedule.forward(x, aug_noise, t)
         return x
@@ -157,7 +162,7 @@ def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.
     def _extract_text_embeds():
         positive_prompts_embeds = []
         for _ in original_videos_local:
-            # Paths are now simple
             text_pos_embeds = torch.load('pos_emb.pt')
             text_neg_embeds = torch.load('neg_emb.pt')
             positive_prompts_embeds.append({"texts_pos": [text_pos_embeds], "texts_neg": [text_neg_embeds]})
@@ -218,16 +223,16 @@ def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.
                 video = video / 255.0
                 if video.size(0) > 121:
                     video = video[:121]
-                print(f"Read video size: {video.size()}")
                 output_dir = os.path.join(output_base_dir, f"{uuid.uuid4()}.mp4")
             elif is_image:
                 img = Image.open(video_path).convert("RGB")
                 img_tensor = T.ToTensor()(img).unsqueeze(0)
                 video = img_tensor
-                print(f"Read Image size: {video.size()}")
                 output_dir = os.path.join(output_base_dir, f"{uuid.uuid4()}.png")
             else:
-                raise ValueError("Unsupported file type")
             cond_latents.append(video_transform(video.to(torch.device("cuda"))))
@@ -236,7 +241,7 @@ def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.
         if is_video:
             cond_latents = [cut_videos(v, sp_size) for v in cond_latents]
-        print(f"Encoding videos: {[v.size() for v in cond_latents]}")
         cond_latents = runner.vae_encode(cond_latents)
         for i, emb in enumerate(text_embeds["texts_pos"]):
@@ -273,41 +278,43 @@ def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.
         else:
             return None, output_dir, output_dir
-# --- Gradio UI ---
-with gr.Blocks(title="SeedVR2: One-Step Video Restoration") as demo:
-    # Use an absolute path for the Gradio file source to be safe
     logo_path = os.path.abspath("assets/seedvr_logo.png")
     gr.HTML(f"""
-        <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'><b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
-        🔥 <b>SeedVR2</b> is a one-step image and video restoration algorithm for real-world and AIGC content.
     """)
     with gr.Row():
-        input_file = gr.File(label="Upload image or video", type="filepath")
         with gr.Column():
             seed = gr.Number(label="Seed", value=666)
-            fps = gr.Number(label="Output FPS (for video)", value=24)
-    run_button = gr.Button("Run")
     with gr.Row():
-        output_image = gr.Image(label="Output Image")
-        output_video = gr.Video(label="Output Video")
-    download_link = gr.File(label="Download the output")
     run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
     gr.HTML("""
         <hr>
-        <p>If you find SeedVR helpful, please ⭐ the <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>GitHub repository</a>:
         <a href="https://github.com/ByteDance-Seed/SeedVR" target="_blank"><img src="https://img.shields.io/github/stars/ByteDance-Seed/SeedVR?style=social" alt="GitHub Stars"></a></p>
-        <h4>Notice</h4>
-        <p>This demo supports up to <b>720p and 121 frames for videos or 2k images</b>. For other use cases, check the <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>GitHub repo</a>.</p>
-        <h4>Limitations</h4>
-        <p>May fail on heavy degradations or small-motion AIGC clips, causing oversharpening or poor restoration.</p>
     """)
 demo.queue().launch(share=True)

 import os
 import sys
+# --- Setup: Clone repository, Change Directory, and Update Python Path ---
+# Esta é a abordagem definitiva para corrigir todos os problemas de caminho.
+# 1. Clone o repositório
 repo_dir_name = "SeedVR2-3B"
 if not os.path.exists(repo_dir_name):
+    print(f"Clonando o repositório {repo_dir_name}...")
     subprocess.run(f"git clone https://huggingface.co/spaces/ByteDance-Seed/{repo_dir_name}", shell=True, check=True)
+# 2. Mude o diretório de trabalho atual para a raiz do repositório.
+# Isso corrige o acesso a arquivos relativos (ex: carregar config.yaml).
 os.chdir(repo_dir_name)
+print(f"Diretório de trabalho alterado para: {os.getcwd()}")
+# 3. Adicione explicitamente o novo diretório de trabalho ao caminho do sistema do Python.
+# Isso corrige as importações de módulos (ex: `from data...`).
+sys.path.insert(0, os.path.abspath('.'))
+print(f"Diretório atual adicionado ao sys.path: {os.path.abspath('.')}")
+# --- Código Principal da Aplicação ---
+# Agora, todas as importações e cargas de arquivos devem funcionar corretamente.
 import torch
 import mediapy
 from torchvision.transforms import Compose, Lambda, Normalize
 from torchvision.io.video import read_video
+# Importações do repositório (agora funcionarão)
 from data.image.transforms.divisible_crop import DivisibleCrop
 from data.image.transforms.na_resize import NaResize
 from data.video.transforms.rearrange import Rearrange
 from projects.video_diffusion_sr.infer import VideoDiffusionInfer
 from common.distributed.ops import sync_data
+# Verifica o utilitário color_fix (usando caminho relativo)
 if os.path.exists("projects/video_diffusion_sr/color_fix.py"):
     from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
     use_colorfix = True
 else:
     use_colorfix = False
+    print('Atenção!!!!!! A correção de cor não está disponível!')
+# --- Configuração de Ambiente e Dependências ---
 os.environ["MASTER_ADDR"] = "127.0.0.1"
 os.environ["MASTER_PORT"] = "12355"
 os.environ["RANK"] = str(0)
 os.environ["WORLD_SIZE"] = str(1)
+# Use sys.executable para garantir que estamos usando o pip correto
 python_executable = sys.executable
 subprocess.run(
     [python_executable, "-m", "pip", "install", "flash-attn", "--no-build-isolation"],
 apex_wheel_path = "apex-0.1-cp310-cp310-linux_x86_64.whl"
 if os.path.exists(apex_wheel_path):
     subprocess.run([python_executable, "-m", "pip", "install", apex_wheel_path], check=True)
+    print("✅ Configuração do Apex concluída.")
+# --- Funções Principais ---
 def configure_sequence_parallel(sp_size):
     if sp_size > 1:
         init_sequence_parallel(sp_size)
 def configure_runner(sp_size):
+    # Os caminhos agora são simples e relativos à raiz do repositório
     config_path = 'configs_3b/main.yaml'
     checkpoint_path = 'ckpts/seedvr2_ema_3b.pth'
+    config = load_config(config_path) # Isto agora funcionará corretamente
     runner = VideoDiffusionInfer(config)
     OmegaConf.set_readonly(runner.config, False)
     noises = [torch.randn_like(latent) for latent in cond_latents]
     aug_noises = [torch.randn_like(latent) for latent in cond_latents]
+    print(f"Gerando com o formato de ruído: {noises[0].size()}.")
     noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0)
     noises, aug_noises, cond_latents = list(map(_move_to_cuda, (noises, aug_noises, cond_latents)))
     cond_noise_scale = 0.1
         t = torch.tensor([1000.0], device=torch.device("cuda")) * cond_noise_scale
         shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None]
         t = runner.timestep_transform(t, shape)
+        print(f"Deslocamento de Timestep de {1000.0 * cond_noise_scale} para {t}.")
         x = runner.schedule.forward(x, aug_noise, t)
         return x
     def _extract_text_embeds():
         positive_prompts_embeds = []
         for _ in original_videos_local:
+            # Os caminhos agora são simples
             text_pos_embeds = torch.load('pos_emb.pt')
             text_neg_embeds = torch.load('neg_emb.pt')
             positive_prompts_embeds.append({"texts_pos": [text_pos_embeds], "texts_neg": [text_neg_embeds]})
                 video = video / 255.0
                 if video.size(0) > 121:
                     video = video[:121]
+                print(f"Tamanho do vídeo lido: {video.size()}")
                 output_dir = os.path.join(output_base_dir, f"{uuid.uuid4()}.mp4")
             elif is_image:
                 img = Image.open(video_path).convert("RGB")
                 img_tensor = T.ToTensor()(img).unsqueeze(0)
                 video = img_tensor
+                print(f"Tamanho da imagem lida: {video.size()}")
                 output_dir = os.path.join(output_base_dir, f"{uuid.uuid4()}.png")
             else:
+                raise ValueError("Tipo de arquivo não suportado")
             cond_latents.append(video_transform(video.to(torch.device("cuda"))))
         if is_video:
             cond_latents = [cut_videos(v, sp_size) for v in cond_latents]
+        print(f"Codificando vídeos: {[v.size() for v in cond_latents]}")
         cond_latents = runner.vae_encode(cond_latents)
         for i, emb in enumerate(text_embeds["texts_pos"]):
         else:
             return None, output_dir, output_dir
+# --- UI do Gradio ---
+with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
+    # Use um caminho absoluto para o arquivo de logo do Gradio para segurança
     logo_path = os.path.abspath("assets/seedvr_logo.png")
     gr.HTML(f"""
+        <div style='text-align:center; margin-bottom: 10px;'>
+            <img src='file/{logo_path}' style='height:40px;' alt='SeedVR logo'/>
+        </div>
+        <p><b>Demonstração oficial do Gradio</b> para <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'><b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
+        🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.</p>
     """)
     with gr.Row():
+        input_file = gr.File(label="Carregar imagem ou vídeo", type="filepath")
         with gr.Column():
             seed = gr.Number(label="Seed", value=666)
+            fps = gr.Number(label="FPS de Saída (para vídeo)", value=24)
+    run_button = gr.Button("Executar")
     with gr.Row():
+        output_image = gr.Image(label="Imagem de Saída")
+        output_video = gr.Video(label="Vídeo de Saída")
+    download_link = gr.File(label="Baixar o resultado")
     run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
     gr.HTML("""
         <hr>
+        <p>Se você achou o SeedVR útil, por favor ⭐ o <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>repositório no GitHub</a>:
         <a href="https://github.com/ByteDance-Seed/SeedVR" target="_blank"><img src="https://img.shields.io/github/stars/ByteDance-Seed/SeedVR?style=social" alt="GitHub Stars"></a></p>
+        <h4>Aviso</h4>
+        <p>Esta demonstração suporta até <b>720p e 121 frames para vídeos ou imagens 2k</b>. Para outros casos de uso, verifique o <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>repositório no GitHub</a>.</p>
+        <h4>Limitações</h4>
+        <p>Pode falhar em degradações pesadas ou em clipes AIGC com pouco movimento, causando excesso de nitidez ou restauração inadequada.</p>
     """)
 demo.queue().launch(share=True)