Aduc-sdr commited on
Commit
77577e4
·
verified ·
1 Parent(s): 1073841

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -25
app.py CHANGED
@@ -32,12 +32,10 @@ print(f"Diretório atual adicionado ao sys.path.")
32
  # --- ETAPA 3: Instalar Dependências Corretamente ---
33
  python_executable = sys.executable
34
 
35
- # CORREÇÃO: Forçar uma versão do NumPy < 2.0 para evitar conflitos de compatibilidade.
36
  print("Instalando NumPy compatível...")
37
  subprocess.run([python_executable, "-m", "pip", "install", "numpy<2.0"], check=True)
38
 
39
- # Filtrar requirements.txt para evitar conflitos com torch/torchvision pré-instalados
40
- print("Filtrando requirements.txt...")
41
  with open("requirements.txt", "r") as f_in, open("filtered_requirements.txt", "w") as f_out:
42
  for line in f_in:
43
  if not line.strip().startswith(('torch', 'torchvision')):
@@ -52,6 +50,7 @@ subprocess.run([python_executable, "-m", "pip", "install", "flash-attn==2.5.9.po
52
  from pathlib import Path
53
  from urllib.parse import urlparse
54
  from torch.hub import download_url_to_file, get_dir
 
55
 
56
  def load_file_from_url(url, model_dir='.', progress=True, file_name=None):
57
  os.makedirs(model_dir, exist_ok=True)
@@ -72,8 +71,6 @@ print("✅ Configuração do Apex concluída.")
72
 
73
  # --- ETAPA 4: Baixar os Modelos Pré-treinados ---
74
  print("Baixando modelos pré-treinados...")
75
- import torch
76
-
77
  pretrain_model_url = {
78
  'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
79
  'dit': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
@@ -87,7 +84,8 @@ for key, url in pretrain_model_url.items():
87
  load_file_from_url(url=url, model_dir=model_dir)
88
 
89
 
90
- # --- ETAPA 5: Executar a Aplicação Principal ---
 
91
  import mediapy
92
  from einops import rearrange
93
  from omegaconf import OmegaConf
@@ -124,6 +122,7 @@ def configure_runner():
124
  config = load_config('configs_3b/main.yaml')
125
  runner = VideoDiffusionInfer(config)
126
  OmegaConf.set_readonly(runner.config, False)
 
127
  init_torch(cudnn_benchmark=False, timeout=datetime.timedelta(seconds=3600))
128
  runner.configure_dit_model(device="cuda", checkpoint='ckpts/seedvr2_ema_3b.pth')
129
  runner.configure_vae_model()
@@ -131,6 +130,13 @@ def configure_runner():
131
  runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
132
  return runner
133
 
 
 
 
 
 
 
 
134
  def generation_step(runner, text_embeds_dict, cond_latents):
135
  def _move_to_cuda(x): return [i.to("cuda") for i in x]
136
  noises, aug_noises = [torch.randn_like(l) for l in cond_latents], [torch.randn_like(l) for l in cond_latents]
@@ -146,11 +152,27 @@ def generation_step(runner, text_embeds_dict, cond_latents):
146
  video_tensors = runner.inference(noises=noises, conditions=conditions, **text_embeds_dict)
147
  return [rearrange(v, "c t h w -> t c h w") for v in video_tensors]
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  @spaces.GPU
150
  def generation_loop(video_path, seed=666, fps_out=24):
151
  if video_path is None: return None, None, None
152
- runner = configure_runner()
153
- # Adicionado `weights_only=True` para segurança e para suprimir o aviso
 
154
  text_embeds = {
155
  "texts_pos": [torch.load('pos_emb.pt', weights_only=True).to("cuda")],
156
  "texts_neg": [torch.load('neg_emb.pt', weights_only=True).to("cuda")]
@@ -159,31 +181,36 @@ def generation_loop(video_path, seed=666, fps_out=24):
159
  set_seed(int(seed))
160
  os.makedirs("output", exist_ok=True)
161
 
162
- # CORREÇÃO: Fornecer os argumentos que faltam para NaResize.
163
  res_h, res_w = 1280, 720
164
  transform = Compose([
165
  NaResize(resolution=(res_h * res_w)**0.5, mode="area", downsample_only=False),
166
  Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
167
- DivisibleCrop((16, 16)),
168
- Normalize(0.5, 0.5),
169
- Rearrange("t c h w -> c t h w")
170
  ])
171
 
172
  media_type, _ = mimetypes.guess_type(video_path)
173
  is_video = media_type and media_type.startswith("video")
174
 
175
  if is_video:
176
- video, _, _ = read_video(video_path, output_format="TCHW")
177
- video = video[:121] / 255.0
178
  output_path = os.path.join("output", f"{uuid.uuid4()}.mp4")
179
  else:
180
  video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0)
181
  output_path = os.path.join("output", f"{uuid.uuid4()}.png")
182
 
183
- cond_latents = [transform(video.to("cuda"))]
184
- ori_length = cond_latents[0].size(2)
 
 
 
 
 
 
 
185
  cond_latents = runner.vae_encode(cond_latents)
186
  samples = generation_step(runner, text_embeds, cond_latents)
 
187
  sample = samples[0][:ori_length].cpu()
188
  sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).add(1).mul(127.5).byte().numpy()
189
 
@@ -195,14 +222,7 @@ def generation_loop(video_path, seed=666, fps_out=24):
195
  return output_path, None, output_path
196
 
197
  with gr.Blocks(title="SeedVR") as demo:
198
- gr.HTML(f"""
199
-
200
- <p><b>Demonstração oficial do Gradio</b> para
201
- <a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>
202
- <b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
203
- 🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.
204
- </p>
205
- """)
206
  with gr.Row():
207
  input_file = gr.File(label="Carregar Imagem ou Vídeo")
208
  with gr.Column():
@@ -214,5 +234,4 @@ with gr.Blocks(title="SeedVR") as demo:
214
  download_link = gr.File(label="Baixar Resultado")
215
  run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
216
 
217
-
218
  demo.queue().launch(share=True)
 
32
  # --- ETAPA 3: Instalar Dependências Corretamente ---
33
  python_executable = sys.executable
34
 
 
35
  print("Instalando NumPy compatível...")
36
  subprocess.run([python_executable, "-m", "pip", "install", "numpy<2.0"], check=True)
37
 
38
+ print("Filtrando requirements.txt para evitar conflitos de versão...")
 
39
  with open("requirements.txt", "r") as f_in, open("filtered_requirements.txt", "w") as f_out:
40
  for line in f_in:
41
  if not line.strip().startswith(('torch', 'torchvision')):
 
50
  from pathlib import Path
51
  from urllib.parse import urlparse
52
  from torch.hub import download_url_to_file, get_dir
53
+ import torch
54
 
55
  def load_file_from_url(url, model_dir='.', progress=True, file_name=None):
56
  os.makedirs(model_dir, exist_ok=True)
 
71
 
72
  # --- ETAPA 4: Baixar os Modelos Pré-treinados ---
73
  print("Baixando modelos pré-treinados...")
 
 
74
  pretrain_model_url = {
75
  'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
76
  'dit': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
 
84
  load_file_from_url(url=url, model_dir=model_dir)
85
 
86
 
87
+ # --- ETAPA 5: Inicialização Global do Modelo (FEITA APENAS UMA VEZ) ---
88
+ print("Inicializando o modelo e o ambiente distribuído (uma única vez)...")
89
  import mediapy
90
  from einops import rearrange
91
  from omegaconf import OmegaConf
 
122
  config = load_config('configs_3b/main.yaml')
123
  runner = VideoDiffusionInfer(config)
124
  OmegaConf.set_readonly(runner.config, False)
125
+ # A chamada de inicialização crítica é feita aqui
126
  init_torch(cudnn_benchmark=False, timeout=datetime.timedelta(seconds=3600))
127
  runner.configure_dit_model(device="cuda", checkpoint='ckpts/seedvr2_ema_3b.pth')
128
  runner.configure_vae_model()
 
130
  runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
131
  return runner
132
 
133
+ # Criamos o runner globalmente, UMA ÚNICA VEZ
134
+ GLOBAL_RUNNER = configure_runner()
135
+ print("✅ Setup completo. Aplicação pronta para receber requisições.")
136
+
137
+
138
+ # --- ETAPA 6: Funções de Inferência e UI do Gradio ---
139
+
140
  def generation_step(runner, text_embeds_dict, cond_latents):
141
  def _move_to_cuda(x): return [i.to("cuda") for i in x]
142
  noises, aug_noises = [torch.randn_like(l) for l in cond_latents], [torch.randn_like(l) for l in cond_latents]
 
152
  video_tensors = runner.inference(noises=noises, conditions=conditions, **text_embeds_dict)
153
  return [rearrange(v, "c t h w -> t c h w") for v in video_tensors]
154
 
155
+ def cut_videos(videos, sp_size=1):
156
+ t = videos.size(1)
157
+ if t > 121:
158
+ videos = videos[:, :121]
159
+ t = 121
160
+ if (t - 1) % (4 * sp_size) == 0:
161
+ return videos
162
+ else:
163
+ padding_needed = 4 * sp_size - ((t - 1) % (4 * sp_size))
164
+ last_frame = videos[:, -1].unsqueeze(1)
165
+ padding = last_frame.repeat(1, padding_needed, 1, 1)
166
+ videos = torch.cat([videos, padding], dim=1)
167
+ assert (videos.size(1) - 1) % (4 * sp_size) == 0
168
+ return videos
169
+
170
  @spaces.GPU
171
  def generation_loop(video_path, seed=666, fps_out=24):
172
  if video_path is None: return None, None, None
173
+ # CORREÇÃO: Usamos o runner global em vez de criar um novo
174
+ runner = GLOBAL_RUNNER
175
+
176
  text_embeds = {
177
  "texts_pos": [torch.load('pos_emb.pt', weights_only=True).to("cuda")],
178
  "texts_neg": [torch.load('neg_emb.pt', weights_only=True).to("cuda")]
 
181
  set_seed(int(seed))
182
  os.makedirs("output", exist_ok=True)
183
 
 
184
  res_h, res_w = 1280, 720
185
  transform = Compose([
186
  NaResize(resolution=(res_h * res_w)**0.5, mode="area", downsample_only=False),
187
  Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
188
+ DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w")
 
 
189
  ])
190
 
191
  media_type, _ = mimetypes.guess_type(video_path)
192
  is_video = media_type and media_type.startswith("video")
193
 
194
  if is_video:
195
+ video, _, _ = read_video(video_path, output_format="TCHW", pts_unit="sec")
196
+ video = video / 255.0
197
  output_path = os.path.join("output", f"{uuid.uuid4()}.mp4")
198
  else:
199
  video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0)
200
  output_path = os.path.join("output", f"{uuid.uuid4()}.png")
201
 
202
+ transformed_video = transform(video.to("cuda"))
203
+ ori_length = transformed_video.size(1)
204
+
205
+ if is_video:
206
+ padded_video = cut_videos(transformed_video)
207
+ cond_latents = [padded_video]
208
+ else:
209
+ cond_latents = [transformed_video]
210
+
211
  cond_latents = runner.vae_encode(cond_latents)
212
  samples = generation_step(runner, text_embeds, cond_latents)
213
+
214
  sample = samples[0][:ori_length].cpu()
215
  sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).add(1).mul(127.5).byte().numpy()
216
 
 
222
  return output_path, None, output_path
223
 
224
  with gr.Blocks(title="SeedVR") as demo:
225
+ gr.HTML(f"""<div style='text-align:center; margin-bottom: 10px;'><img src='file/{os.path.abspath("assets/seedvr_logo.png")}' style='height:40px;'/></div>...""")
 
 
 
 
 
 
 
226
  with gr.Row():
227
  input_file = gr.File(label="Carregar Imagem ou Vídeo")
228
  with gr.Column():
 
234
  download_link = gr.File(label="Baixar Resultado")
235
  run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
236
 
 
237
  demo.queue().launch(share=True)