File size: 14,566 Bytes
3539b1a
 
 
2ac1c2d
691ecd9
77948f7
7f54bbc
3539b1a
 
 
 
aac950a
3539b1a
 
 
 
 
fb76e24
50e4252
3539b1a
 
 
 
 
50e4252
3539b1a
fb76e24
 
50e4252
 
fb76e24
3539b1a
 
 
 
 
 
 
 
 
aac950a
3539b1a
50e4252
3539b1a
 
 
 
 
 
 
50e4252
 
 
f55e443
2ac1c2d
3539b1a
 
 
 
5cbb918
50e4252
 
5cbb918
 
2ac1c2d
5cbb918
50e4252
3539b1a
 
 
2ac1c2d
3539b1a
 
5cbb918
 
50e4252
5cbb918
3539b1a
5cbb918
 
3539b1a
 
 
 
50e4252
3539b1a
50e4252
 
 
3539b1a
50e4252
 
3539b1a
 
 
 
 
 
 
 
 
50e4252
 
3539b1a
50e4252
 
 
 
 
 
 
 
3539b1a
50e4252
 
 
 
 
 
3539b1a
 
 
 
 
50e4252
3539b1a
 
50e4252
 
3539b1a
50e4252
3539b1a
 
 
50e4252
 
3539b1a
50e4252
 
 
 
 
 
3539b1a
 
aac950a
 
50e4252
3539b1a
50e4252
3539b1a
50e4252
3539b1a
 
5cbb918
bc373eb
5cbb918
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3539b1a
 
5cbb918
 
aac950a
 
3539b1a
 
aac950a
 
 
3539b1a
aac950a
 
50e4252
3539b1a
aac950a
3539b1a
aac950a
 
3539b1a
56b8892
 
aac950a
5cbb918
aac950a
3539b1a
 
5cbb918
aac950a
5cbb918
3539b1a
aac950a
 
3539b1a
 
 
5cbb918
3539b1a
 
 
aac950a
3539b1a
50e4252
aac950a
 
5cbb918
 
3539b1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50e4252
3539b1a
aac950a
3539b1a
50e4252
3539b1a
 
 
 
 
 
 
 
aac950a
5cbb918
 
 
3539b1a
 
 
 
 
 
 
 
5cbb918
3539b1a
b8291ba
 
50e4252
 
3539b1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aac950a
 
3539b1a
 
 
 
 
 
 
 
5cbb918
50e4252
3539b1a
 
 
 
 
2ac1c2d
3539b1a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# ==============================================================================
# 1. INSTALACIÓN DEL ENTORNO Y DEPENDENCIAS
# ==============================================================================
import os
import shlex
import spaces
import subprocess
import logging

# Configuración del logging para depuración
logging.basicConfig(level=logging.INFO, format='%(asctime)s - Step1X-3D - %(levelname)s - %(message)s')

def install_dependencies():
    """Instala el toolkit de CUDA y compila las extensiones C++/CUDA necesarias."""
    logging.info("Iniciando la instalación de dependencias...")
    
    # Instalar CUDA Toolkit
    CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run"
    CUDA_TOOLKIT_FILE = f"/tmp/{os.path.basename(CUDA_TOOLKIT_URL)}"
    if not os.path.exists("/usr/local/cuda"):
        logging.info("Descargando e instalando CUDA Toolkit...")
        subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
        subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
        subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
    else:
        logging.info("CUDA Toolkit ya está instalado.")

    os.environ["CUDA_HOME"] = "/usr/local/cuda"
    os.environ["PATH"] = f"{os.environ['CUDA_HOME']}/bin:{os.environ['PATH']}"
    os.environ["LD_LIBRARY_PATH"] = f"{os.environ['CUDA_HOME']}/lib:{os.environ.get('LD_LIBRARY_PATH', '')}"
    os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
    
    # Compilar extensiones personalizadas
    logging.info("Compilando extensiones de renderizado...")
    renderer_path = "/home/user/app/step1x3d_texture/differentiable_renderer/"
    subprocess.run(f"cd {renderer_path} && python setup.py install", shell=True, check=True)
    subprocess.run(shlex.split("pip install custom_rasterizer-0.1-cp310-cp310-linux_x86_64.whl"), check=True)
    
    logging.info("Instalación completada.")
    os.system('nvcc -V')

install_dependencies()

import uuid
import torch
import trimesh
import argparse
import numpy as np
import gradio as gr
from PIL import Image, ImageOps
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler
from step1x3d_geometry.models.pipelines.pipeline import Step1X3DGeometryPipeline
from step1x3d_texture.pipelines.step1x_3d_texture_synthesis_pipeline import Step1X3DTexturePipeline
from step1x3d_geometry.models.pipelines.pipeline_utils import reduce_face, remove_degenerate_face

# ==============================================================================
# 2. CONFIGURACIÓN Y CARGA DE MODELOS
# ==============================================================================

parser = argparse.ArgumentParser()
parser.add_argument("--geometry_model", type=str, default="Step1X-3D-Geometry-Label-1300m")
parser.add_argument("--texture_model", type=str, default="Step1X-3D-Texture")
parser.add_argument("--cache_dir", type=str, default="cache")
args = parser.parse_args()

os.makedirs(args.cache_dir, exist_ok=True)
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16

logging.info("Cargando modelos... Este proceso puede tardar varios minutos.")

# Carga de modelos de Step1X-3D
logging.info(f"Cargando modelo de geometría: {args.geometry_model}")
geometry_model = Step1X3DGeometryPipeline.from_pretrained(
    "stepfun-ai/Step1X-3D", subfolder=args.geometry_model
).to(device)

logging.info(f"Cargando modelo de textura: {args.texture_model}")
texture_model = Step1X3DTexturePipeline.from_pretrained("stepfun-ai/Step1X-3D", subfolder=args.texture_model)

# Carga de modelos de ControlNet para el pre-procesamiento de bocetos
logging.info("Cargando modelos para el pre-procesamiento de bocetos (SDXL + ControlNet)...")
controlnet = ControlNetModel.from_pretrained("xinsir/controlnet-scribble-sdxl-1.0", torch_dtype=torch_dtype)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype)
pipe_control = StableDiffusionXLControlNetPipeline.from_pretrained(
    "sd-community/sdxl-flash", controlnet=controlnet, vae=vae, torch_dtype=torch_dtype
)
pipe_control.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe_control.scheduler.config)
pipe_control.to(device)
logging.info("Todos los modelos han sido cargados correctamente.")


# ==============================================================================
# 3. FUNCIONES DE GENERACIÓN POR PASOS
# ==============================================================================

def apply_3d_style(prompt: str) -> tuple[str, str]:
    """Aplica el estilo '3D Model' por defecto al prompt."""
    style_prompt = "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting"
    negative_prompt = "ugly, deformed, noisy, low poly, blurry, painting"
    return style_prompt.replace("{prompt}", prompt), negative_prompt

@spaces.GPU(duration=60)
def process_sketch(image, prompt, negative_prompt, guidance_scale, num_steps, controlnet_scale):
    """
    Paso 0: Convierte un boceto en una imagen de alta calidad usando ControlNet.
    """
    if image is None:
        raise gr.Error("Por favor, proporciona un boceto de entrada.")

    input_image = image.convert("RGB")
    
    # Pre-procesamiento de la imagen de entrada (invertir y redimensionar)
    width, height = input_image.size
    ratio = np.sqrt(1024.0 * 1024.0 / (width * height))
    new_width, new_height = int(width * ratio), int(height * ratio)
    input_image = input_image.resize((new_width, new_height))
    input_image = ImageOps.invert(input_image)
    
    final_prompt, final_negative_prompt = apply_3d_style(prompt)
    if negative_prompt: # Añadir negativo del usuario si existe
        final_negative_prompt = f"{final_negative_prompt}, {negative_prompt}"

    logging.info(f"Mejorando boceto con prompt: '{final_prompt}'")
    
    generator = torch.Generator(device=device).manual_seed(np.random.randint(0, 2**32 - 1))

    output_image = pipe_control(
        prompt=final_prompt,
        negative_prompt=final_negative_prompt,
        image=input_image,
        num_inference_steps=int(num_steps),
        controlnet_conditioning_scale=float(controlnet_scale),
        guidance_scale=float(guidance_scale),
        width=new_width,
        height=new_height,
        generator=generator,
    ).images[0]
    
    save_name = str(uuid.uuid4())
    processed_image_path = f"{args.cache_dir}/{save_name}_processed.png"
    output_image.save(processed_image_path)
    
    logging.info(f"Boceto mejorado y guardado en: {processed_image_path}")
    return processed_image_path

@spaces.GPU(duration=180)
def generate_geometry(input_image_path, guidance_scale, inference_steps, max_facenum, symmetry, edge_type):
    """Paso 1: Genera la geometría a partir de la imagen procesada."""
    if not input_image_path or not os.path.exists(input_image_path):
        raise gr.Error("Primero debes procesar un boceto o proporcionar una imagen de entrada válida.")
        
    logging.info(f"Iniciando generación de geometría desde: {os.path.basename(input_image_path)}")
    
    if "Label" in args.geometry_model:
        symmetry_values = ["x", "asymmetry"]
        out = geometry_model(
            input_image_path,
            label={"symmetry": symmetry_values[int(symmetry)], "edge_type": edge_type},
            guidance_scale=float(guidance_scale),
            octree_resolution=384,
            max_facenum=int(max_facenum),
            num_inference_steps=int(inference_steps),
        )
    else:
        out = geometry_model(
            input_image_path,
            guidance_scale=float(guidance_scale),
            num_inference_steps=int(inference_steps),
            max_facenum=int(max_facenum),
        )

    save_name = os.path.basename(input_image_path).replace("_processed.png", "")
    geometry_save_path = f"{args.cache_dir}/{save_name}_geometry.glb"
    geometry_mesh = out.mesh[0]
    geometry_mesh.export(geometry_save_path)
    
    torch.cuda.empty_cache()
    logging.info(f"Geometría guardada en: {geometry_save_path}")
    return geometry_save_path

@spaces.GPU(duration=120)
def generate_texture(input_image_path, geometry_path):
    """Paso 2: Aplica la textura a la geometría generada."""
    if not geometry_path or not os.path.exists(geometry_path):
        raise gr.Error("Por favor, primero genera la geometría antes de texturizar.")
    if not input_image_path or not os.path.exists(input_image_path):
        raise gr.Error("Se necesita la imagen procesada para el texturizado.")
        
    logging.info(f"Iniciando texturizado para la malla: {os.path.basename(geometry_path)}")
    geometry_mesh = trimesh.load(geometry_path)
    
    # Post-procesamiento
    geometry_mesh = remove_degenerate_face(geometry_mesh)
    geometry_mesh = reduce_face(geometry_mesh)
    
    textured_mesh = texture_model(input_image_path, geometry_mesh)
    
    save_name = os.path.basename(geometry_path).replace("_geometry.glb", "")
    textured_save_path = f"{args.cache_dir}/{save_name}_textured.glb"
    textured_mesh.export(textured_save_path)
    
    torch.cuda.empty_cache()
    logging.info(f"Malla texturizada guardada en: {textured_save_path}")
    return textured_save_path

# ==============================================================================
# 4. INTERFAZ DE GRADIO
# ==============================================================================

with gr.Blocks(title="Step1X-3D", css="footer {display: none !important;} a {text-decoration: none !important;}") as demo:
    gr.Markdown("# Step1X-3D: De Boceto a Malla 3D Texturizada")
    gr.Markdown("Flujo de trabajo en 3 pasos: **0. Procesar Boceto → 1. Generar Geometría → 2. Generar Textura**")
    
    # Estados para mantener las rutas de los archivos entre pasos
    processed_image_path_state = gr.State()
    geometry_path_state = gr.State()

    with gr.Row():
        with gr.Column(scale=2):
            # --- Panel de Entradas ---
            input_image = gr.Image(label="Paso 0: Carga tu boceto o imagen", type="pil", image_mode="RGB")
            prompt = gr.Textbox(label="Describe tu objeto", value="a comfortable armchair")

            with gr.Accordion(label="Opciones Avanzadas", open=False):
                gr.Markdown("### Opciones de Procesado de Boceto (Paso 0)")
                neg_prompt_sketch = gr.Textbox(label="Negative Prompt (Boceto)", value="text, signature, watermark")
                guidance_sketch = gr.Slider(0.1, 10.0, label="Guidance Scale (Boceto)", value=5.0, step=0.1)
                steps_sketch = gr.Slider(1, 50, label="Steps (Boceto)", value=25, step=1)
                controlnet_scale = gr.Slider(0.1, 2.0, label="ControlNet Scale", value=0.85, step=0.05)

                gr.Markdown("---")
                gr.Markdown("### Opciones de Generación 3D (Paso 1)")
                guidance_3d = gr.Number(label="Guidance Scale (3D)", value="7.5")
                steps_3d = gr.Slider(label="Inference Steps (3D)", minimum=1, maximum=100, value=50)
                max_facenum = gr.Number(label="Max Face Num", value="200000")
                symmetry = gr.Radio(choices=["symmetry", "asymmetry"], label="Symmetry", value="symmetry", type="index")
                edge_type = gr.Radio(choices=["sharp", "normal", "smooth"], label="Edge Type", value="sharp", type="value")

            with gr.Row():
                btn_process_sketch = gr.Button("0. Procesar Boceto", variant="secondary")
            with gr.Row():
                btn_geo = gr.Button("1. Generar Geometría", interactive=False)
                btn_tex = gr.Button("2. Generar Textura", interactive=False)

        with gr.Column(scale=3):
            # --- Panel de Salidas ---
            processed_image_preview = gr.Image(label="Resultado del Boceto Procesado", type="filepath", interactive=False, height=400)
            geometry_preview = gr.Model3D(label="Vista Previa de la Geometría", height=400, clear_color=[0.0, 0.0, 0.0, 0.0])
            textured_preview = gr.Model3D(label="Vista Previa del Modelo Texturizado", height=400, clear_color=[0.0, 0.0, 0.0, 0.0])
            
        with gr.Column(scale=1):
            gr.Examples(
                examples=[
                    ["examples/images/000.png", "a futuristic spaceship"],
                    ["examples/images/001.png", "a cartoon style monster"],
                    ["examples/images/004.png", "a red sports car"],
                    ["examples/images/008.png", "a medieval sword"],
                    ["examples/images/028.png", "a vintage camera"],
                    ["examples/images/032.png", "a cute robot"],
                    ["examples/images/061.png", "a delicious hamburger"],
                    ["examples/images/107.png", "a golden trophy"],
                ],
                inputs=[input_image, prompt], cache_examples=False
            )

    # --- Lógica de la Interfaz ---

    def on_sketch_processed(path):
        """Función a ejecutar cuando el boceto se ha procesado."""
        return {
            processed_image_path_state: path,
            btn_geo: gr.update(interactive=True, variant="primary"),
            btn_tex: gr.update(interactive=False),
            geometry_preview: gr.update(value=None),
            textured_preview: gr.update(value=None),
        }

    def on_geometry_generated(path):
        """Función a ejecutar cuando la geometría se ha generado."""
        return {
            geometry_path_state: path,
            btn_tex: gr.update(interactive=True, variant="primary"),
        }

    btn_process_sketch.click(
        fn=process_sketch,
        inputs=[input_image, prompt, neg_prompt_sketch, guidance_sketch, steps_sketch, controlnet_scale],
        outputs=[processed_image_preview]
    ).then(
        fn=on_sketch_processed,
        inputs=[processed_image_preview],
        outputs=[processed_image_path_state, btn_geo, btn_tex, geometry_preview, textured_preview]
    )
    
    btn_geo.click(
        fn=generate_geometry,
        inputs=[processed_image_path_state, guidance_3d, steps_3d, max_facenum, symmetry, edge_type],
        outputs=[geometry_preview]
    ).then(
        fn=on_geometry_generated,
        inputs=[geometry_preview],
        outputs=[geometry_path_state, btn_tex]
    )
    
    btn_tex.click(
        fn=generate_texture,
        inputs=[processed_image_path_state, geometry_path_state],
        outputs=[textured_preview],
    )

demo.launch(ssr_mode=False)