Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -32,12 +32,10 @@ print(f"Diretório atual adicionado ao sys.path.")
|
|
32 |
# --- ETAPA 3: Instalar Dependências Corretamente ---
|
33 |
python_executable = sys.executable
|
34 |
|
35 |
-
# CORREÇÃO: Forçar uma versão do NumPy < 2.0 para evitar conflitos de compatibilidade.
|
36 |
print("Instalando NumPy compatível...")
|
37 |
subprocess.run([python_executable, "-m", "pip", "install", "numpy<2.0"], check=True)
|
38 |
|
39 |
-
|
40 |
-
print("Filtrando requirements.txt...")
|
41 |
with open("requirements.txt", "r") as f_in, open("filtered_requirements.txt", "w") as f_out:
|
42 |
for line in f_in:
|
43 |
if not line.strip().startswith(('torch', 'torchvision')):
|
@@ -52,6 +50,7 @@ subprocess.run([python_executable, "-m", "pip", "install", "flash-attn==2.5.9.po
|
|
52 |
from pathlib import Path
|
53 |
from urllib.parse import urlparse
|
54 |
from torch.hub import download_url_to_file, get_dir
|
|
|
55 |
|
56 |
def load_file_from_url(url, model_dir='.', progress=True, file_name=None):
|
57 |
os.makedirs(model_dir, exist_ok=True)
|
@@ -72,8 +71,6 @@ print("✅ Configuração do Apex concluída.")
|
|
72 |
|
73 |
# --- ETAPA 4: Baixar os Modelos Pré-treinados ---
|
74 |
print("Baixando modelos pré-treinados...")
|
75 |
-
import torch
|
76 |
-
|
77 |
pretrain_model_url = {
|
78 |
'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
|
79 |
'dit': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
|
@@ -87,7 +84,8 @@ for key, url in pretrain_model_url.items():
|
|
87 |
load_file_from_url(url=url, model_dir=model_dir)
|
88 |
|
89 |
|
90 |
-
# --- ETAPA 5:
|
|
|
91 |
import mediapy
|
92 |
from einops import rearrange
|
93 |
from omegaconf import OmegaConf
|
@@ -124,6 +122,7 @@ def configure_runner():
|
|
124 |
config = load_config('configs_3b/main.yaml')
|
125 |
runner = VideoDiffusionInfer(config)
|
126 |
OmegaConf.set_readonly(runner.config, False)
|
|
|
127 |
init_torch(cudnn_benchmark=False, timeout=datetime.timedelta(seconds=3600))
|
128 |
runner.configure_dit_model(device="cuda", checkpoint='ckpts/seedvr2_ema_3b.pth')
|
129 |
runner.configure_vae_model()
|
@@ -131,6 +130,13 @@ def configure_runner():
|
|
131 |
runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
|
132 |
return runner
|
133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
def generation_step(runner, text_embeds_dict, cond_latents):
|
135 |
def _move_to_cuda(x): return [i.to("cuda") for i in x]
|
136 |
noises, aug_noises = [torch.randn_like(l) for l in cond_latents], [torch.randn_like(l) for l in cond_latents]
|
@@ -146,11 +152,27 @@ def generation_step(runner, text_embeds_dict, cond_latents):
|
|
146 |
video_tensors = runner.inference(noises=noises, conditions=conditions, **text_embeds_dict)
|
147 |
return [rearrange(v, "c t h w -> t c h w") for v in video_tensors]
|
148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
@spaces.GPU
|
150 |
def generation_loop(video_path, seed=666, fps_out=24):
|
151 |
if video_path is None: return None, None, None
|
152 |
-
runner
|
153 |
-
|
|
|
154 |
text_embeds = {
|
155 |
"texts_pos": [torch.load('pos_emb.pt', weights_only=True).to("cuda")],
|
156 |
"texts_neg": [torch.load('neg_emb.pt', weights_only=True).to("cuda")]
|
@@ -159,31 +181,36 @@ def generation_loop(video_path, seed=666, fps_out=24):
|
|
159 |
set_seed(int(seed))
|
160 |
os.makedirs("output", exist_ok=True)
|
161 |
|
162 |
-
# CORREÇÃO: Fornecer os argumentos que faltam para NaResize.
|
163 |
res_h, res_w = 1280, 720
|
164 |
transform = Compose([
|
165 |
NaResize(resolution=(res_h * res_w)**0.5, mode="area", downsample_only=False),
|
166 |
Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
|
167 |
-
DivisibleCrop((16, 16)),
|
168 |
-
Normalize(0.5, 0.5),
|
169 |
-
Rearrange("t c h w -> c t h w")
|
170 |
])
|
171 |
|
172 |
media_type, _ = mimetypes.guess_type(video_path)
|
173 |
is_video = media_type and media_type.startswith("video")
|
174 |
|
175 |
if is_video:
|
176 |
-
video, _, _ = read_video(video_path, output_format="TCHW")
|
177 |
-
video = video
|
178 |
output_path = os.path.join("output", f"{uuid.uuid4()}.mp4")
|
179 |
else:
|
180 |
video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0)
|
181 |
output_path = os.path.join("output", f"{uuid.uuid4()}.png")
|
182 |
|
183 |
-
|
184 |
-
ori_length =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
cond_latents = runner.vae_encode(cond_latents)
|
186 |
samples = generation_step(runner, text_embeds, cond_latents)
|
|
|
187 |
sample = samples[0][:ori_length].cpu()
|
188 |
sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).add(1).mul(127.5).byte().numpy()
|
189 |
|
@@ -195,14 +222,7 @@ def generation_loop(video_path, seed=666, fps_out=24):
|
|
195 |
return output_path, None, output_path
|
196 |
|
197 |
with gr.Blocks(title="SeedVR") as demo:
|
198 |
-
gr.HTML(f"""
|
199 |
-
|
200 |
-
<p><b>Demonstração oficial do Gradio</b> para
|
201 |
-
<a href='https://github.com/ByteDance-Seed/SeedVR' target='_blank'>
|
202 |
-
<b>SeedVR2: One-Step Video Restoration via Diffusion Adversarial Post-Training</b></a>.<br>
|
203 |
-
🔥 <b>SeedVR2</b> é um algoritmo de restauração de imagem e vídeo em um passo para conteúdo do mundo real e AIGC.
|
204 |
-
</p>
|
205 |
-
""")
|
206 |
with gr.Row():
|
207 |
input_file = gr.File(label="Carregar Imagem ou Vídeo")
|
208 |
with gr.Column():
|
@@ -214,5 +234,4 @@ with gr.Blocks(title="SeedVR") as demo:
|
|
214 |
download_link = gr.File(label="Baixar Resultado")
|
215 |
run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
|
216 |
|
217 |
-
|
218 |
demo.queue().launch(share=True)
|
|
|
32 |
# --- ETAPA 3: Instalar Dependências Corretamente ---
|
33 |
python_executable = sys.executable
|
34 |
|
|
|
35 |
print("Instalando NumPy compatível...")
|
36 |
subprocess.run([python_executable, "-m", "pip", "install", "numpy<2.0"], check=True)
|
37 |
|
38 |
+
print("Filtrando requirements.txt para evitar conflitos de versão...")
|
|
|
39 |
with open("requirements.txt", "r") as f_in, open("filtered_requirements.txt", "w") as f_out:
|
40 |
for line in f_in:
|
41 |
if not line.strip().startswith(('torch', 'torchvision')):
|
|
|
50 |
from pathlib import Path
|
51 |
from urllib.parse import urlparse
|
52 |
from torch.hub import download_url_to_file, get_dir
|
53 |
+
import torch
|
54 |
|
55 |
def load_file_from_url(url, model_dir='.', progress=True, file_name=None):
|
56 |
os.makedirs(model_dir, exist_ok=True)
|
|
|
71 |
|
72 |
# --- ETAPA 4: Baixar os Modelos Pré-treinados ---
|
73 |
print("Baixando modelos pré-treinados...")
|
|
|
|
|
74 |
pretrain_model_url = {
|
75 |
'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
|
76 |
'dit': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
|
|
|
84 |
load_file_from_url(url=url, model_dir=model_dir)
|
85 |
|
86 |
|
87 |
+
# --- ETAPA 5: Inicialização Global do Modelo (FEITA APENAS UMA VEZ) ---
|
88 |
+
print("Inicializando o modelo e o ambiente distribuído (uma única vez)...")
|
89 |
import mediapy
|
90 |
from einops import rearrange
|
91 |
from omegaconf import OmegaConf
|
|
|
122 |
config = load_config('configs_3b/main.yaml')
|
123 |
runner = VideoDiffusionInfer(config)
|
124 |
OmegaConf.set_readonly(runner.config, False)
|
125 |
+
# A chamada de inicialização crítica é feita aqui
|
126 |
init_torch(cudnn_benchmark=False, timeout=datetime.timedelta(seconds=3600))
|
127 |
runner.configure_dit_model(device="cuda", checkpoint='ckpts/seedvr2_ema_3b.pth')
|
128 |
runner.configure_vae_model()
|
|
|
130 |
runner.vae.set_memory_limit(**runner.config.vae.memory_limit)
|
131 |
return runner
|
132 |
|
133 |
+
# Criamos o runner globalmente, UMA ÚNICA VEZ
|
134 |
+
GLOBAL_RUNNER = configure_runner()
|
135 |
+
print("✅ Setup completo. Aplicação pronta para receber requisições.")
|
136 |
+
|
137 |
+
|
138 |
+
# --- ETAPA 6: Funções de Inferência e UI do Gradio ---
|
139 |
+
|
140 |
def generation_step(runner, text_embeds_dict, cond_latents):
|
141 |
def _move_to_cuda(x): return [i.to("cuda") for i in x]
|
142 |
noises, aug_noises = [torch.randn_like(l) for l in cond_latents], [torch.randn_like(l) for l in cond_latents]
|
|
|
152 |
video_tensors = runner.inference(noises=noises, conditions=conditions, **text_embeds_dict)
|
153 |
return [rearrange(v, "c t h w -> t c h w") for v in video_tensors]
|
154 |
|
155 |
+
def cut_videos(videos, sp_size=1):
|
156 |
+
t = videos.size(1)
|
157 |
+
if t > 121:
|
158 |
+
videos = videos[:, :121]
|
159 |
+
t = 121
|
160 |
+
if (t - 1) % (4 * sp_size) == 0:
|
161 |
+
return videos
|
162 |
+
else:
|
163 |
+
padding_needed = 4 * sp_size - ((t - 1) % (4 * sp_size))
|
164 |
+
last_frame = videos[:, -1].unsqueeze(1)
|
165 |
+
padding = last_frame.repeat(1, padding_needed, 1, 1)
|
166 |
+
videos = torch.cat([videos, padding], dim=1)
|
167 |
+
assert (videos.size(1) - 1) % (4 * sp_size) == 0
|
168 |
+
return videos
|
169 |
+
|
170 |
@spaces.GPU
|
171 |
def generation_loop(video_path, seed=666, fps_out=24):
|
172 |
if video_path is None: return None, None, None
|
173 |
+
# CORREÇÃO: Usamos o runner global em vez de criar um novo
|
174 |
+
runner = GLOBAL_RUNNER
|
175 |
+
|
176 |
text_embeds = {
|
177 |
"texts_pos": [torch.load('pos_emb.pt', weights_only=True).to("cuda")],
|
178 |
"texts_neg": [torch.load('neg_emb.pt', weights_only=True).to("cuda")]
|
|
|
181 |
set_seed(int(seed))
|
182 |
os.makedirs("output", exist_ok=True)
|
183 |
|
|
|
184 |
res_h, res_w = 1280, 720
|
185 |
transform = Compose([
|
186 |
NaResize(resolution=(res_h * res_w)**0.5, mode="area", downsample_only=False),
|
187 |
Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
|
188 |
+
DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w")
|
|
|
|
|
189 |
])
|
190 |
|
191 |
media_type, _ = mimetypes.guess_type(video_path)
|
192 |
is_video = media_type and media_type.startswith("video")
|
193 |
|
194 |
if is_video:
|
195 |
+
video, _, _ = read_video(video_path, output_format="TCHW", pts_unit="sec")
|
196 |
+
video = video / 255.0
|
197 |
output_path = os.path.join("output", f"{uuid.uuid4()}.mp4")
|
198 |
else:
|
199 |
video = T.ToTensor()(Image.open(video_path).convert("RGB")).unsqueeze(0)
|
200 |
output_path = os.path.join("output", f"{uuid.uuid4()}.png")
|
201 |
|
202 |
+
transformed_video = transform(video.to("cuda"))
|
203 |
+
ori_length = transformed_video.size(1)
|
204 |
+
|
205 |
+
if is_video:
|
206 |
+
padded_video = cut_videos(transformed_video)
|
207 |
+
cond_latents = [padded_video]
|
208 |
+
else:
|
209 |
+
cond_latents = [transformed_video]
|
210 |
+
|
211 |
cond_latents = runner.vae_encode(cond_latents)
|
212 |
samples = generation_step(runner, text_embeds, cond_latents)
|
213 |
+
|
214 |
sample = samples[0][:ori_length].cpu()
|
215 |
sample = rearrange(sample, "t c h w -> t h w c").clip(-1, 1).add(1).mul(127.5).byte().numpy()
|
216 |
|
|
|
222 |
return output_path, None, output_path
|
223 |
|
224 |
with gr.Blocks(title="SeedVR") as demo:
|
225 |
+
gr.HTML(f"""<div style='text-align:center; margin-bottom: 10px;'><img src='file/{os.path.abspath("assets/seedvr_logo.png")}' style='height:40px;'/></div>...""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
with gr.Row():
|
227 |
input_file = gr.File(label="Carregar Imagem ou Vídeo")
|
228 |
with gr.Column():
|
|
|
234 |
download_link = gr.File(label="Baixar Resultado")
|
235 |
run_button.click(fn=generation_loop, inputs=[input_file, seed, fps], outputs=[output_image, output_video, download_link])
|
236 |
|
|
|
237 |
demo.queue().launch(share=True)
|