Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -28,11 +28,8 @@ if not os.path.exists(repo_dir_name):
|
|
28 |
# --- ETAPA 2: Configuração dos Caminhos ---
|
29 |
# Mudar para o diretório do repositório e adicioná-lo ao path do Python.
|
30 |
|
31 |
-
# Mudar para o diretório do repositório. ESSENCIAL para caminhos de arquivos relativos.
|
32 |
os.chdir(repo_dir_name)
|
33 |
print(f"Diretório de trabalho alterado para: {os.getcwd()}")
|
34 |
-
|
35 |
-
# Adicionar o diretório ao sys.path. ESSENCIAL para as importações de módulos.
|
36 |
sys.path.insert(0, os.path.abspath('.'))
|
37 |
print(f"Diretório atual adicionado ao sys.path para importações.")
|
38 |
|
@@ -43,7 +40,6 @@ import torch
|
|
43 |
from pathlib import Path
|
44 |
from urllib.parse import urlparse
|
45 |
from torch.hub import download_url_to_file, get_dir
|
46 |
-
import shlex
|
47 |
|
48 |
# Função de download do original
|
49 |
def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
|
@@ -72,7 +68,6 @@ pretrain_model_url = {
|
|
72 |
# Criar diretório de checkpoints e baixar modelos
|
73 |
ckpt_dir = Path('./ckpts')
|
74 |
ckpt_dir.mkdir(exist_ok=True)
|
75 |
-
|
76 |
for key, url in pretrain_model_url.items():
|
77 |
filename = os.path.basename(url)
|
78 |
model_dir = './ckpts' if key in ['vae', 'dit'] else '.'
|
@@ -84,23 +79,27 @@ for key, url in pretrain_model_url.items():
|
|
84 |
torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/23_1_lq.mp4', '01.mp4')
|
85 |
torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/28_1_lq.mp4', '02.mp4')
|
86 |
torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/2_1_lq.mp4', '03.mp4')
|
87 |
-
torch.hub.download_url_to_file('https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl', 'apex-0.1-cp310-cp310-linux_x86_64.whl')
|
88 |
|
89 |
-
#
|
90 |
python_executable = sys.executable
|
91 |
-
subprocess.run([python_executable, "-m", "pip", "install", "flash-attn", "--no-build-isolation"], env={**os.environ, "FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, check=True)
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
subprocess.run([python_executable, "-m", "pip", "install", "--force-reinstall", "--no-cache-dir", apex_wheel_path], check=True)
|
97 |
-
print("✅ Configuração do Apex concluída.")
|
98 |
-
else:
|
99 |
-
print(f"AVISO: O arquivo wheel do Apex '{apex_wheel_path}' não foi encontrado no repositório clonado.")
|
100 |
|
101 |
-
|
102 |
-
|
|
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
import mediapy
|
105 |
from einops import rearrange
|
106 |
from omegaconf import OmegaConf
|
@@ -130,6 +129,8 @@ os.environ["MASTER_ADDR"] = "127.0.0.1"
|
|
130 |
os.environ["MASTER_PORT"] = "12355"
|
131 |
os.environ["RANK"] = str(0)
|
132 |
os.environ["WORLD_SIZE"] = str(1)
|
|
|
|
|
133 |
|
134 |
if os.path.exists("projects/video_diffusion_sr/color_fix.py"):
|
135 |
from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
|
@@ -158,122 +159,80 @@ def configure_runner(sp_size):
|
|
158 |
def generation_step(runner, text_embeds_dict, cond_latents):
|
159 |
def _move_to_cuda(x):
|
160 |
return [i.to(torch.device("cuda")) for i in x]
|
161 |
-
|
162 |
noises = [torch.randn_like(latent) for latent in cond_latents]
|
163 |
aug_noises = [torch.randn_like(latent) for latent in cond_latents]
|
164 |
noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0)
|
165 |
noises, aug_noises, cond_latents = list(map(_move_to_cuda, (noises, aug_noises, cond_latents)))
|
166 |
-
|
167 |
def _add_noise(x, aug_noise):
|
168 |
t = torch.tensor([1000.0], device=torch.device("cuda")) * 0.1
|
169 |
shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None]
|
170 |
t = runner.timestep_transform(t, shape)
|
171 |
return runner.schedule.forward(x, aug_noise, t)
|
172 |
-
|
173 |
conditions = [runner.get_condition(noise, task="sr", latent_blur=_add_noise(latent_blur, aug_noise)) for noise, aug_noise, latent_blur in zip(noises, aug_noises, cond_latents)]
|
174 |
-
|
175 |
with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
|
176 |
video_tensors = runner.inference(noises=noises, conditions=conditions, dit_offload=False, **text_embeds_dict)
|
177 |
-
|
178 |
return [rearrange(video, "c t h w -> t c h w") for video in video_tensors]
|
179 |
|
180 |
-
|
181 |
def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1):
|
182 |
-
if video_path is None:
|
183 |
-
return None, None, None
|
184 |
-
|
185 |
runner = configure_runner(1)
|
186 |
-
|
187 |
def _extract_text_embeds():
|
188 |
positive_prompts_embeds = []
|
189 |
for _ in original_videos_local:
|
190 |
-
positive_prompts_embeds.append({
|
191 |
-
"texts_pos": [torch.load('pos_emb.pt')],
|
192 |
-
"texts_neg": [torch.load('neg_emb.pt')]
|
193 |
-
})
|
194 |
gc.collect(); torch.cuda.empty_cache()
|
195 |
return positive_prompts_embeds
|
196 |
-
|
197 |
-
runner.config.diffusion.cfg.scale = cfg_scale
|
198 |
-
runner.config.diffusion.cfg.rescale = cfg_rescale
|
199 |
-
runner.config.diffusion.timesteps.sampling.steps = sample_steps
|
200 |
runner.configure_diffusion()
|
201 |
set_seed(int(seed) % (2**32), same_across_ranks=True)
|
202 |
os.makedirs("output", exist_ok=True)
|
203 |
-
|
204 |
original_videos = [os.path.basename(video_path)]
|
205 |
original_videos_local = partition_by_size(original_videos, batch_size)
|
206 |
positive_prompts_embeds = _extract_text_embeds()
|
207 |
-
|
208 |
-
video_transform = Compose([
|
209 |
-
NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False),
|
210 |
-
Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
|
211 |
-
DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w"),
|
212 |
-
])
|
213 |
-
|
214 |
for videos, text_embeds in tqdm(zip(original_videos_local, positive_prompts_embeds)):
|
215 |
media_type, _ = mimetypes.guess_type(video_path)
|
216 |
is_video = media_type and media_type.startswith("video")
|
217 |
-
|
218 |
if is_video:
|
219 |
-
video, _, _ = read_video(video_path, output_format="TCHW")
|
220 |
-
|
221 |
-
output_dir = os.path.join("output", f"{uuid.uuid4()}.
|
222 |
-
else: # Assumimos que é uma imagem
|
223 |
-
video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0)
|
224 |
-
output_dir = os.path.join("output", f"{uuid.uuid4()}.png")
|
225 |
-
|
226 |
cond_latents = [video_transform(video.to("cuda"))]
|
227 |
ori_lengths = [v.size(1) for v in cond_latents]
|
228 |
cond_latents = runner.vae_encode(cond_latents)
|
229 |
-
|
230 |
for key in ["texts_pos", "texts_neg"]:
|
231 |
-
for i, emb in enumerate(text_embeds[key]):
|
232 |
-
text_embeds[key][i] = emb.to("cuda")
|
233 |
-
|
234 |
samples = generation_step(runner, text_embeds, cond_latents=cond_latents)
|
235 |
del cond_latents
|
236 |
-
|
237 |
for sample, ori_length in zip(samples, ori_lengths):
|
238 |
sample = sample[:ori_length].to("cpu")
|
239 |
sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round().to(torch.uint8).numpy()
|
240 |
-
|
241 |
-
|
242 |
-
mediapy.write_video(output_dir, sample, fps=fps_out)
|
243 |
-
else:
|
244 |
-
mediapy.write_image(output_dir, sample[0])
|
245 |
-
|
246 |
gc.collect(); torch.cuda.empty_cache()
|
247 |
return (None, output_dir, output_dir) if is_video else (output_dir, None, output_dir)
|
248 |
|
249 |
with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
|
250 |
gr.HTML(f"""
|
251 |
-
|
252 |
-
<img src='file/{os.path.abspath("assets/seedvr_logo.png")}' style='height:40px;' alt='SeedVR logo'/>
|
253 |
-
</div>
|
254 |
<p><b>Demonstração oficial do Gradio</b> para
|
255 |
<a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>
|
256 |
<b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
|
257 |
🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.
|
258 |
</p>
|
259 |
""")
|
260 |
-
|
261 |
with gr.Row():
|
262 |
input_file = gr.File(label="Carregar imagem ou vídeo")
|
263 |
with gr.Column():
|
264 |
seed = gr.Number(label="Seed", value=666)
|
265 |
fps = gr.Number(label="FPS de Saída (para vídeo)", value=24)
|
266 |
-
|
267 |
run_button = gr.Button("Executar")
|
268 |
-
|
269 |
with gr.Row():
|
270 |
output_image = gr.Image(label="Imagem de Saída")
|
271 |
output_video = gr.Video(label="Vídeo de Saída")
|
272 |
-
|
273 |
download_link = gr.File(label="Baixar o resultado")
|
274 |
-
|
275 |
run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
|
276 |
-
|
277 |
gr.Examples(
|
278 |
examples=[
|
279 |
["01.mp4", 4, 24],
|
@@ -282,7 +241,6 @@ with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
|
|
282 |
],
|
283 |
inputs=[input_file, seed, fps]
|
284 |
)
|
285 |
-
|
286 |
gr.HTML("""
|
287 |
<hr>
|
288 |
<p>Se você achou o SeedVR útil, por favor ⭐ o
|
@@ -296,5 +254,4 @@ with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
|
|
296 |
<h4>Limitações</h4>
|
297 |
<p>Pode falhar em degradações pesadas ou em clipes AIGC com pouco movimento, causando excesso de nitidez ou restauração inadequada.</p>
|
298 |
""")
|
299 |
-
|
300 |
demo.queue().launch(share=True)
|
|
|
28 |
# --- ETAPA 2: Configuração dos Caminhos ---
|
29 |
# Mudar para o diretório do repositório e adicioná-lo ao path do Python.
|
30 |
|
|
|
31 |
os.chdir(repo_dir_name)
|
32 |
print(f"Diretório de trabalho alterado para: {os.getcwd()}")
|
|
|
|
|
33 |
sys.path.insert(0, os.path.abspath('.'))
|
34 |
print(f"Diretório atual adicionado ao sys.path para importações.")
|
35 |
|
|
|
40 |
from pathlib import Path
|
41 |
from urllib.parse import urlparse
|
42 |
from torch.hub import download_url_to_file, get_dir
|
|
|
43 |
|
44 |
# Função de download do original
|
45 |
def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
|
|
|
68 |
# Criar diretório de checkpoints e baixar modelos
|
69 |
ckpt_dir = Path('./ckpts')
|
70 |
ckpt_dir.mkdir(exist_ok=True)
|
|
|
71 |
for key, url in pretrain_model_url.items():
|
72 |
filename = os.path.basename(url)
|
73 |
model_dir = './ckpts' if key in ['vae', 'dit'] else '.'
|
|
|
79 |
torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/23_1_lq.mp4', '01.mp4')
|
80 |
torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/28_1_lq.mp4', '02.mp4')
|
81 |
torch.hub.download_url_to_file('https://huggingface.co/datasets/Iceclear/SeedVR_VideoDemos/resolve/main/seedvr_videos_crf23/aigc1k/2_1_lq.mp4', '03.mp4')
|
|
|
82 |
|
83 |
+
# --- REFINAMENTO: Compilar dependências do zero para a GPU L40S (Ada Lovelace) ---
|
84 |
python_executable = sys.executable
|
|
|
85 |
|
86 |
+
print("Instalando flash-attn compilando do zero...")
|
87 |
+
# Força a reinstalação a partir do zero para garantir que seja compilado para a GPU atual
|
88 |
+
subprocess.run([python_executable, "-m", "pip", "install", "--force-reinstall", "--no-cache-dir", "flash-attn"], check=True)
|
|
|
|
|
|
|
|
|
89 |
|
90 |
+
print("Clonando e compilando o Apex do zero...")
|
91 |
+
if not os.path.exists("apex"):
|
92 |
+
subprocess.run("git clone https://github.com/NVIDIA/apex", shell=True, check=True)
|
93 |
|
94 |
+
# Instala o Apex a partir da fonte clonada, o que força a compilação para a GPU L40S
|
95 |
+
# As flags --cpp_ext e --cuda_ext são essenciais para a compilação
|
96 |
+
subprocess.run(
|
97 |
+
[python_executable, "-m", "pip", "install", "-v", "--disable-pip-version-check", "--no-cache-dir", "--global-option=--cpp_ext", "--global-option=--cuda_ext", "./apex"],
|
98 |
+
check=True
|
99 |
+
)
|
100 |
+
print("✅ Configuração do Apex concluída.")
|
101 |
+
|
102 |
+
# --- ETAPA 4: Execução do Código Principal da Aplicação ---
|
103 |
import mediapy
|
104 |
from einops import rearrange
|
105 |
from omegaconf import OmegaConf
|
|
|
129 |
os.environ["MASTER_PORT"] = "12355"
|
130 |
os.environ["RANK"] = str(0)
|
131 |
os.environ["WORLD_SIZE"] = str(1)
|
132 |
+
# Adiciona uma variável de ambiente que pode ajudar o PyTorch a debugar erros de CUDA
|
133 |
+
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
134 |
|
135 |
if os.path.exists("projects/video_diffusion_sr/color_fix.py"):
|
136 |
from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
|
|
|
159 |
def generation_step(runner, text_embeds_dict, cond_latents):
|
160 |
def _move_to_cuda(x):
|
161 |
return [i.to(torch.device("cuda")) for i in x]
|
|
|
162 |
noises = [torch.randn_like(latent) for latent in cond_latents]
|
163 |
aug_noises = [torch.randn_like(latent) for latent in cond_latents]
|
164 |
noises, aug_noises, cond_latents = sync_data((noises, aug_noises, cond_latents), 0)
|
165 |
noises, aug_noises, cond_latents = list(map(_move_to_cuda, (noises, aug_noises, cond_latents)))
|
|
|
166 |
def _add_noise(x, aug_noise):
|
167 |
t = torch.tensor([1000.0], device=torch.device("cuda")) * 0.1
|
168 |
shape = torch.tensor(x.shape[1:], device=torch.device("cuda"))[None]
|
169 |
t = runner.timestep_transform(t, shape)
|
170 |
return runner.schedule.forward(x, aug_noise, t)
|
|
|
171 |
conditions = [runner.get_condition(noise, task="sr", latent_blur=_add_noise(latent_blur, aug_noise)) for noise, aug_noise, latent_blur in zip(noises, aug_noises, cond_latents)]
|
|
|
172 |
with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
|
173 |
video_tensors = runner.inference(noises=noises, conditions=conditions, dit_offload=False, **text_embeds_dict)
|
|
|
174 |
return [rearrange(video, "c t h w -> t c h w") for video in video_tensors]
|
175 |
|
176 |
+
@spaces.GPU
|
177 |
def generation_loop(video_path, seed=666, fps_out=24, batch_size=1, cfg_scale=1.0, cfg_rescale=0.0, sample_steps=1, res_h=1280, res_w=720, sp_size=1):
|
178 |
+
if video_path is None: return None, None, None
|
|
|
|
|
179 |
runner = configure_runner(1)
|
|
|
180 |
def _extract_text_embeds():
|
181 |
positive_prompts_embeds = []
|
182 |
for _ in original_videos_local:
|
183 |
+
positive_prompts_embeds.append({"texts_pos": [torch.load('pos_emb.pt')], "texts_neg": [torch.load('neg_emb.pt')]})
|
|
|
|
|
|
|
184 |
gc.collect(); torch.cuda.empty_cache()
|
185 |
return positive_prompts_embeds
|
186 |
+
runner.config.diffusion.cfg.scale, runner.config.diffusion.cfg.rescale, runner.config.diffusion.timesteps.sampling.steps = cfg_scale, cfg_rescale, sample_steps
|
|
|
|
|
|
|
187 |
runner.configure_diffusion()
|
188 |
set_seed(int(seed) % (2**32), same_across_ranks=True)
|
189 |
os.makedirs("output", exist_ok=True)
|
|
|
190 |
original_videos = [os.path.basename(video_path)]
|
191 |
original_videos_local = partition_by_size(original_videos, batch_size)
|
192 |
positive_prompts_embeds = _extract_text_embeds()
|
193 |
+
video_transform = Compose([NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False), Lambda(lambda x: torch.clamp(x, 0.0, 1.0)), DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w")])
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
for videos, text_embeds in tqdm(zip(original_videos_local, positive_prompts_embeds)):
|
195 |
media_type, _ = mimetypes.guess_type(video_path)
|
196 |
is_video = media_type and media_type.startswith("video")
|
|
|
197 |
if is_video:
|
198 |
+
video, _, _ = read_video(video_path, output_format="TCHW"); video = video[:121] / 255.0; output_dir = os.path.join("output", f"{uuid.uuid4()}.mp4")
|
199 |
+
else:
|
200 |
+
video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0); output_dir = os.path.join("output", f"{uuid.uuid4()}.png")
|
|
|
|
|
|
|
|
|
201 |
cond_latents = [video_transform(video.to("cuda"))]
|
202 |
ori_lengths = [v.size(1) for v in cond_latents]
|
203 |
cond_latents = runner.vae_encode(cond_latents)
|
|
|
204 |
for key in ["texts_pos", "texts_neg"]:
|
205 |
+
for i, emb in enumerate(text_embeds[key]): text_embeds[key][i] = emb.to("cuda")
|
|
|
|
|
206 |
samples = generation_step(runner, text_embeds, cond_latents=cond_latents)
|
207 |
del cond_latents
|
|
|
208 |
for sample, ori_length in zip(samples, ori_lengths):
|
209 |
sample = sample[:ori_length].to("cpu")
|
210 |
sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round().to(torch.uint8).numpy()
|
211 |
+
if is_video: mediapy.write_video(output_dir, sample, fps=fps_out)
|
212 |
+
else: mediapy.write_image(output_dir, sample[0])
|
|
|
|
|
|
|
|
|
213 |
gc.collect(); torch.cuda.empty_cache()
|
214 |
return (None, output_dir, output_dir) if is_video else (output_dir, None, output_dir)
|
215 |
|
216 |
with gr.Blocks(title="SeedVR2: Restauração de Vídeo em Um Passo") as demo:
|
217 |
gr.HTML(f"""
|
218 |
+
|
|
|
|
|
219 |
<p><b>Demonstração oficial do Gradio</b> para
|
220 |
<a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>
|
221 |
<b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
|
222 |
🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.
|
223 |
</p>
|
224 |
""")
|
|
|
225 |
with gr.Row():
|
226 |
input_file = gr.File(label="Carregar imagem ou vídeo")
|
227 |
with gr.Column():
|
228 |
seed = gr.Number(label="Seed", value=666)
|
229 |
fps = gr.Number(label="FPS de Saída (para vídeo)", value=24)
|
|
|
230 |
run_button = gr.Button("Executar")
|
|
|
231 |
with gr.Row():
|
232 |
output_image = gr.Image(label="Imagem de Saída")
|
233 |
output_video = gr.Video(label="Vídeo de Saída")
|
|
|
234 |
download_link = gr.File(label="Baixar o resultado")
|
|
|
235 |
run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
|
|
|
236 |
gr.Examples(
|
237 |
examples=[
|
238 |
["01.mp4", 4, 24],
|
|
|
241 |
],
|
242 |
inputs=[input_file, seed, fps]
|
243 |
)
|
|
|
244 |
gr.HTML("""
|
245 |
<hr>
|
246 |
<p>Se você achou o SeedVR útil, por favor ⭐ o
|
|
|
254 |
<h4>Limitações</h4>
|
255 |
<p>Pode falhar em degradações pesadas ou em clipes AIGC com pouco movimento, causando excesso de nitidez ou restauração inadequada.</p>
|
256 |
""")
|
|
|
257 |
demo.queue().launch(share=True)
|