Aduc-sdr commited on
Commit
7af19da
·
verified ·
1 Parent(s): 5c10f27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -73
app.py CHANGED
@@ -28,11 +28,8 @@ if not os.path.exists(repo_dir_name):
28
  # --- ETAPA 2: Configuração dos Caminhos ---
29
  # Mudar para o diretório do repositório e adicioná-lo ao path do Python.
30
 
31
- # Mudar para o diretório do repositório. ESSENCIAL para caminhos de arquivos relativos.
32
  os.chdir(repo_dir_name)
33
  print(f"Diretório de trabalho alterado para: {os.getcwd()}")
34
-
35
- # Adicionar o diretório ao sys.path. ESSENCIAL para as importações de módulos.
36
  sys.path.insert(0, os.path.abspath('.'))
37
  print(f"Diretório atual adicionado ao sys.path para importações.")
38
 
@@ -43,7 +40,6 @@ import torch
43
  from pathlib import Path
44
  from urllib.parse import urlparse
45
  from torch.hub import download_url_to_file, get_dir
46
- import shlex
47
 
48
  # Função de download do original
49
  def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
@@ -72,7 +68,6 @@ pretrain_model_url = {
72
  # Criar diretório de checkpoints e baixar modelos
73
  ckpt_dir = Path('./ckpts')
74
  ckpt_dir.mkdir(exist_ok=True)
75
-
76
  for key, url in pretrain_model_url.items():
77
  filename = os.path.basename(url)
78
  model_dir = './ckpts' if key in ['vae', 'dit'] else '.'
@@ -84,23 +79,27 @@ for key, url in pretrain_model_url.items():
84
  torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/23_1_lq.mp4', '01.mp4')
85
  torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/28_1_lq.mp4', '02.mp4')
86
  torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/2_1_lq.mp4', '03.mp4')
87
- torch.hub.download_url_to_file('https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl', 'apex-0.1-cp310-cp310-linux_x86_64.whl')
88
 
89
- # Instalar dependências de forma robusta
90
  python_executable = sys.executable
91
- subprocess.run([python_executable, "-m", "pip", "install", "flash-attn", "--no-build-isolation"], env={**os.environ, "FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, check=True)
92
 
93
- apex_wheel_path = "apex-0.1-cp310-cp310-linux_x86_64.whl"
94
- if os.path.exists(apex_wheel_path):
95
- print("Instalando o Apex a partir do arquivo wheel...")
96
- subprocess.run([python_executable, "-m", "pip", "install", "--force-reinstall", "--no-cache-dir", apex_wheel_path], check=True)
97
- print("✅ Configuração do Apex concluída.")
98
- else:
99
- print(f"AVISO: O arquivo wheel do Apex '{apex_wheel_path}' não foi encontrado no repositório clonado.")
100
 
101
- # --- ETAPA 4: Execução do Código Principal da Aplicação ---
102
- # Agora que o ambiente está perfeito, importamos e executamos o resto do script.
 
103
 
 
 
 
 
 
 
 
 
 
104
  import mediapy
105
  from einops import rearrange
106
  from omegaconf import OmegaConf
@@ -130,6 +129,8 @@ os.environ["MASTER_ADDR"] = "127.0.0.1"
130
  os.environ["MASTER_PORT"] = "12355"
131
  os.environ["RANK"] = str(0)
132
  os.environ["WORLD_SIZE"] = str(1)
 
 
133
 
134
  if os.path.exists("projects/video_diffusion_sr/color_fix.py"):
135
  from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
@@ -158,122 +159,80 @@ def configure_runner(sp_size):
158
  def generation_step(runner, text_embeds_dict, cond_latents):
159
  def _move_to_cuda(x):
160
  return [i.to(torch.device("cuda")) for i in x]
161
-
162
  noises = [torch.randn_like(latent) for latent in cond_latents]
163
  aug_noises = [torch.randn_like(latent) for latent in cond_latents]
164
  noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0)
165
  noises, aug_noises, cond_latents = list(map(_move_to_cuda, (noises, aug_noises, cond_latents)))
166
-
167
  def _add_noise(x, aug_noise):
168
  t = torch.tensor([1000.0], device=torch.device("cuda")) * 0.1
169
  shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None]
170
  t = runner.timestep_transform(t, shape)
171
  return runner.schedule.forward(x, aug_noise, t)
172
-
173
  conditions = [runner.get_condition(noise, task="sr", latent_blur=_add_noise(latent_blur, aug_noise)) for noise, aug_noise, latent_blur in zip(noises, aug_noises, cond_latents)]
174
-
175
  with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
176
  video_tensors = runner.inference(noises=noises, conditions=conditions, dit_offload=False, **text_embeds_dict)
177
-
178
  return [rearrange(video, "c t h w -> t c h w") for video in video_tensors]
179
 
180
-
181
  def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1):
182
- if video_path is None:
183
- return None, None, None
184
-
185
  runner = configure_runner(1)
186
-
187
  def _extract_text_embeds():
188
  positive_prompts_embeds = []
189
  for _ in original_videos_local:
190
- positive_prompts_embeds.append({
191
- "texts_pos": [torch.load('pos_emb.pt')],
192
- "texts_neg": [torch.load('neg_emb.pt')]
193
- })
194
  gc.collect(); torch.cuda.empty_cache()
195
  return positive_prompts_embeds
196
-
197
- runner.config.diffusion.cfg.scale = cfg_scale
198
- runner.config.diffusion.cfg.rescale = cfg_rescale
199
- runner.config.diffusion.timesteps.sampling.steps = sample_steps
200
  runner.configure_diffusion()
201
  set_seed(int(seed) % (2**32), same_across_ranks=True)
202
  os.makedirs("output", exist_ok=True)
203
-
204
  original_videos = [os.path.basename(video_path)]
205
  original_videos_local = partition_by_size(original_videos, batch_size)
206
  positive_prompts_embeds = _extract_text_embeds()
207
-
208
- video_transform = Compose([
209
- NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False),
210
- Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
211
- DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w"),
212
- ])
213
-
214
  for videos, text_embeds in tqdm(zip(original_videos_local, positive_prompts_embeds)):
215
  media_type, _ = mimetypes.guess_type(video_path)
216
  is_video = media_type and media_type.startswith("video")
217
-
218
  if is_video:
219
- video, _, _ = read_video(video_path, output_format="TCHW")
220
- video = video[:121] / 255.0
221
- output_dir = os.path.join("output", f"{uuid.uuid4()}.mp4")
222
- else: # Assumimos que é uma imagem
223
- video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0)
224
- output_dir = os.path.join("output", f"{uuid.uuid4()}.png")
225
-
226
  cond_latents = [video_transform(video.to("cuda"))]
227
  ori_lengths = [v.size(1) for v in cond_latents]
228
  cond_latents = runner.vae_encode(cond_latents)
229
-
230
  for key in ["texts_pos", "texts_neg"]:
231
- for i, emb in enumerate(text_embeds[key]):
232
- text_embeds[key][i] = emb.to("cuda")
233
-
234
  samples = generation_step(runner, text_embeds, cond_latents=cond_latents)
235
  del cond_latents
236
-
237
  for sample, ori_length in zip(samples, ori_lengths):
238
  sample = sample[:ori_length].to("cpu")
239
  sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round().to(torch.uint8).numpy()
240
-
241
- if is_video:
242
- mediapy.write_video(output_dir, sample, fps=fps_out)
243
- else:
244
- mediapy.write_image(output_dir, sample[0])
245
-
246
  gc.collect(); torch.cuda.empty_cache()
247
  return (None, output_dir, output_dir) if is_video else (output_dir, None, output_dir)
248
 
249
  with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
250
  gr.HTML(f"""
251
- <div style='text-align:center; margin-bottom: 10px;'>
252
- <img src='file/{os.path.abspath("assets/seedvr_logo.png")}' style='height:40px;' alt='SeedVR logo'/>
253
- </div>
254
  <p><b>Demonstração oficial do Gradio</b> para
255
  <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>
256
  <b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
257
  🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.
258
  </p>
259
  """)
260
-
261
  with gr.Row():
262
  input_file = gr.File(label="Carregar imagem ou vídeo")
263
  with gr.Column():
264
  seed = gr.Number(label="Seed", value=666)
265
  fps = gr.Number(label="FPS de Saída (para vídeo)", value=24)
266
-
267
  run_button = gr.Button("Executar")
268
-
269
  with gr.Row():
270
  output_image = gr.Image(label="Imagem de Saída")
271
  output_video = gr.Video(label="Vídeo de Saída")
272
-
273
  download_link = gr.File(label="Baixar o resultado")
274
-
275
  run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
276
-
277
  gr.Examples(
278
  examples=[
279
  ["01.mp4", 4, 24],
@@ -282,7 +241,6 @@ with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
282
  ],
283
  inputs=[input_file, seed, fps]
284
  )
285
-
286
  gr.HTML("""
287
  <hr>
288
  <p>Se você achou o SeedVR útil, por favor ⭐ o
@@ -296,5 +254,4 @@ with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
296
  <h4>Limitações</h4>
297
  <p>Pode falhar em degradações pesadas ou em clipes AIGC com pouco movimento, causando excesso de nitidez ou restauração inadequada.</p>
298
  """)
299
-
300
  demo.queue().launch(share=True)
 
28
  # --- ETAPA 2: Configuração dos Caminhos ---
29
  # Mudar para o diretório do repositório e adicioná-lo ao path do Python.
30
 
 
31
  os.chdir(repo_dir_name)
32
  print(f"Diretório de trabalho alterado para: {os.getcwd()}")
 
 
33
  sys.path.insert(0, os.path.abspath('.'))
34
  print(f"Diretório atual adicionado ao sys.path para importações.")
35
 
 
40
  from pathlib import Path
41
  from urllib.parse import urlparse
42
  from torch.hub import download_url_to_file, get_dir
 
43
 
44
  # Função de download do original
45
  def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
 
68
  # Criar diretório de checkpoints e baixar modelos
69
  ckpt_dir = Path('./ckpts')
70
  ckpt_dir.mkdir(exist_ok=True)
 
71
  for key, url in pretrain_model_url.items():
72
  filename = os.path.basename(url)
73
  model_dir = './ckpts' if key in ['vae', 'dit'] else '.'
 
79
  torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/23_1_lq.mp4', '01.mp4')
80
  torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/28_1_lq.mp4', '02.mp4')
81
  torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/2_1_lq.mp4', '03.mp4')
 
82
 
83
+ # --- REFINAMENTO: Compilar dependências do zero para a GPU L40S (Ada Lovelace) ---
84
  python_executable = sys.executable
 
85
 
86
+ print("Instalando flash-attn compilando do zero...")
87
+ # Força a reinstalação a partir do zero para garantir que seja compilado para a GPU atual
88
+ subprocess.run([python_executable, "-m", "pip", "install", "--force-reinstall", "--no-cache-dir", "flash-attn"], check=True)
 
 
 
 
89
 
90
+ print("Clonando e compilando o Apex do zero...")
91
+ if not os.path.exists("apex"):
92
+ subprocess.run("git clone https://github.com/NVIDIA/apex", shell=True, check=True)
93
 
94
+ # Instala o Apex a partir da fonte clonada, o que força a compilação para a GPU L40S
95
+ # As flags --cpp_ext e --cuda_ext são essenciais para a compilação
96
+ subprocess.run(
97
+ [python_executable, "-m", "pip", "install", "-v", "--disable-pip-version-check", "--no-cache-dir", "--global-option=--cpp_ext", "--global-option=--cuda_ext", "./apex"],
98
+ check=True
99
+ )
100
+ print("✅ Configuração do Apex concluída.")
101
+
102
+ # --- ETAPA 4: Execução do Código Principal da Aplicação ---
103
  import mediapy
104
  from einops import rearrange
105
  from omegaconf import OmegaConf
 
129
  os.environ["MASTER_PORT"] = "12355"
130
  os.environ["RANK"] = str(0)
131
  os.environ["WORLD_SIZE"] = str(1)
132
+ # Adiciona uma variável de ambiente que pode ajudar o PyTorch a debugar erros de CUDA
133
+ os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
134
 
135
  if os.path.exists("projects/video_diffusion_sr/color_fix.py"):
136
  from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
 
159
  def generation_step(runner, text_embeds_dict, cond_latents):
160
  def _move_to_cuda(x):
161
  return [i.to(torch.device("cuda")) for i in x]
 
162
  noises = [torch.randn_like(latent) for latent in cond_latents]
163
  aug_noises = [torch.randn_like(latent) for latent in cond_latents]
164
  noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0)
165
  noises, aug_noises, cond_latents = list(map(_move_to_cuda, (noises, aug_noises, cond_latents)))
 
166
  def _add_noise(x, aug_noise):
167
  t = torch.tensor([1000.0], device=torch.device("cuda")) * 0.1
168
  shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None]
169
  t = runner.timestep_transform(t, shape)
170
  return runner.schedule.forward(x, aug_noise, t)
 
171
  conditions = [runner.get_condition(noise, task="sr", latent_blur=_add_noise(latent_blur, aug_noise)) for noise, aug_noise, latent_blur in zip(noises, aug_noises, cond_latents)]
 
172
  with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
173
  video_tensors = runner.inference(noises=noises, conditions=conditions, dit_offload=False, **text_embeds_dict)
 
174
  return [rearrange(video, "c t h w -> t c h w") for video in video_tensors]
175
 
176
+ @spaces.GPU
177
  def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1):
178
+ if video_path is None: return None, None, None
 
 
179
  runner = configure_runner(1)
 
180
  def _extract_text_embeds():
181
  positive_prompts_embeds = []
182
  for _ in original_videos_local:
183
+ positive_prompts_embeds.append({"texts_pos": [torch.load('pos_emb.pt')], "texts_neg": [torch.load('neg_emb.pt')]})
 
 
 
184
  gc.collect(); torch.cuda.empty_cache()
185
  return positive_prompts_embeds
186
+ runner.config.diffusion.cfg.scale, runner.config.diffusion.cfg.rescale, runner.config.diffusion.timesteps.sampling.steps = cfg_scale, cfg_rescale, sample_steps
 
 
 
187
  runner.configure_diffusion()
188
  set_seed(int(seed) % (2**32), same_across_ranks=True)
189
  os.makedirs("output", exist_ok=True)
 
190
  original_videos = [os.path.basename(video_path)]
191
  original_videos_local = partition_by_size(original_videos, batch_size)
192
  positive_prompts_embeds = _extract_text_embeds()
193
+ video_transform = Compose([NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False), Lambda(lambda x: torch.clamp(x, 0.0, 1.0)), DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w")])
 
 
 
 
 
 
194
  for videos, text_embeds in tqdm(zip(original_videos_local, positive_prompts_embeds)):
195
  media_type, _ = mimetypes.guess_type(video_path)
196
  is_video = media_type and media_type.startswith("video")
 
197
  if is_video:
198
+ video, _, _ = read_video(video_path, output_format="TCHW"); video = video[:121] / 255.0; output_dir = os.path.join("output", f"{uuid.uuid4()}.mp4")
199
+ else:
200
+ video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0); output_dir = os.path.join("output", f"{uuid.uuid4()}.png")
 
 
 
 
201
  cond_latents = [video_transform(video.to("cuda"))]
202
  ori_lengths = [v.size(1) for v in cond_latents]
203
  cond_latents = runner.vae_encode(cond_latents)
 
204
  for key in ["texts_pos", "texts_neg"]:
205
+ for i, emb in enumerate(text_embeds[key]): text_embeds[key][i] = emb.to("cuda")
 
 
206
  samples = generation_step(runner, text_embeds, cond_latents=cond_latents)
207
  del cond_latents
 
208
  for sample, ori_length in zip(samples, ori_lengths):
209
  sample = sample[:ori_length].to("cpu")
210
  sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round().to(torch.uint8).numpy()
211
+ if is_video: mediapy.write_video(output_dir, sample, fps=fps_out)
212
+ else: mediapy.write_image(output_dir, sample[0])
 
 
 
 
213
  gc.collect(); torch.cuda.empty_cache()
214
  return (None, output_dir, output_dir) if is_video else (output_dir, None, output_dir)
215
 
216
  with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
217
  gr.HTML(f"""
218
+
 
 
219
  <p><b>Demonstração oficial do Gradio</b> para
220
  <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>
221
  <b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
222
  🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.
223
  </p>
224
  """)
 
225
  with gr.Row():
226
  input_file = gr.File(label="Carregar imagem ou vídeo")
227
  with gr.Column():
228
  seed = gr.Number(label="Seed", value=666)
229
  fps = gr.Number(label="FPS de Saída (para vídeo)", value=24)
 
230
  run_button = gr.Button("Executar")
 
231
  with gr.Row():
232
  output_image = gr.Image(label="Imagem de Saída")
233
  output_video = gr.Video(label="Vídeo de Saída")
 
234
  download_link = gr.File(label="Baixar o resultado")
 
235
  run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
 
236
  gr.Examples(
237
  examples=[
238
  ["01.mp4", 4, 24],
 
241
  ],
242
  inputs=[input_file, seed, fps]
243
  )
 
244
  gr.HTML("""
245
  <hr>
246
  <p>Se você achou o SeedVR útil, por favor ⭐ o
 
254
  <h4>Limitações</h4>
255
  <p>Pode falhar em degradações pesadas ou em clipes AIGC com pouco movimento, causando excesso de nitidez ou restauração inadequada.</p>
256
  """)
 
257
  demo.queue().launch(share=True)