Spaces:
Running
on
Zero
Running
on
Zero
# ============================================================================== | |
# 1. INSTALACIÓN DEL ENTORNO Y DEPENDENCIAS | |
# ============================================================================== | |
import os | |
import shlex | |
import spaces | |
import subprocess | |
import logging | |
# Configuración del logging para depuración | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - Step1X-3D - %(levelname)s - %(message)s') | |
def install_dependencies(): | |
"""Instala el toolkit de CUDA y compila las extensiones C++/CUDA necesarias.""" | |
logging.info("Iniciando la instalación de dependencias...") | |
# Instalar CUDA Toolkit | |
CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run" | |
CUDA_TOOLKIT_FILE = f"/tmp/{os.path.basename(CUDA_TOOLKIT_URL)}" | |
if not os.path.exists("/usr/local/cuda"): | |
logging.info("Descargando e instalando CUDA Toolkit...") | |
subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE]) | |
subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE]) | |
subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"]) | |
else: | |
logging.info("CUDA Toolkit ya está instalado.") | |
os.environ["CUDA_HOME"] = "/usr/local/cuda" | |
os.environ["PATH"] = f"{os.environ['CUDA_HOME']}/bin:{os.environ['PATH']}" | |
os.environ["LD_LIBRARY_PATH"] = f"{os.environ['CUDA_HOME']}/lib:{os.environ.get('LD_LIBRARY_PATH', '')}" | |
os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6" | |
# Compilar extensiones personalizadas | |
logging.info("Compilando extensiones de renderizado...") | |
renderer_path = "/home/user/app/step1x3d_texture/differentiable_renderer/" | |
subprocess.run(f"cd {renderer_path} && python setup.py install", shell=True, check=True) | |
subprocess.run(shlex.split("pip install custom_rasterizer-0.1-cp310-cp310-linux_x86_64.whl"), check=True) | |
logging.info("Instalación completada.") | |
os.system('nvcc -V') | |
install_dependencies() | |
import uuid | |
import torch | |
import trimesh | |
import argparse | |
import numpy as np | |
import gradio as gr | |
from PIL import Image, ImageOps | |
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler | |
from step1x3d_geometry.models.pipelines.pipeline import Step1X3DGeometryPipeline | |
from step1x3d_texture.pipelines.step1x_3d_texture_synthesis_pipeline import Step1X3DTexturePipeline | |
from step1x3d_geometry.models.pipelines.pipeline_utils import reduce_face, remove_degenerate_face | |
# ============================================================================== | |
# 2. CONFIGURACIÓN Y CARGA DE MODELOS | |
# ============================================================================== | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--geometry_model", type=str, default="Step1X-3D-Geometry-Label-1300m") | |
parser.add_argument("--texture_model", type=str, default="Step1X-3D-Texture") | |
parser.add_argument("--cache_dir", type=str, default="cache") | |
args = parser.parse_args() | |
os.makedirs(args.cache_dir, exist_ok=True) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
torch_dtype = torch.float16 | |
logging.info("Cargando modelos... Este proceso puede tardar varios minutos.") | |
# Carga de modelos de Step1X-3D | |
logging.info(f"Cargando modelo de geometría: {args.geometry_model}") | |
geometry_model = Step1X3DGeometryPipeline.from_pretrained( | |
"stepfun-ai/Step1X-3D", subfolder=args.geometry_model | |
).to(device) | |
logging.info(f"Cargando modelo de textura: {args.texture_model}") | |
texture_model = Step1X3DTexturePipeline.from_pretrained("stepfun-ai/Step1X-3D", subfolder=args.texture_model) | |
# Carga de modelos de ControlNet para el pre-procesamiento de bocetos | |
logging.info("Cargando modelos para el pre-procesamiento de bocetos (SDXL + ControlNet)...") | |
controlnet = ControlNetModel.from_pretrained("xinsir/controlnet-scribble-sdxl-1.0", torch_dtype=torch_dtype) | |
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype) | |
pipe_control = StableDiffusionXLControlNetPipeline.from_pretrained( | |
"sd-community/sdxl-flash", controlnet=controlnet, vae=vae, torch_dtype=torch_dtype | |
) | |
pipe_control.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe_control.scheduler.config) | |
pipe_control.to(device) | |
logging.info("Todos los modelos han sido cargados correctamente.") | |
# ============================================================================== | |
# 3. FUNCIONES DE GENERACIÓN POR PASOS | |
# ============================================================================== | |
def apply_3d_style(prompt: str) -> tuple[str, str]: | |
"""Aplica el estilo '3D Model' por defecto al prompt.""" | |
style_prompt = "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting" | |
negative_prompt = "ugly, deformed, noisy, low poly, blurry, painting" | |
return style_prompt.replace("{prompt}", prompt), negative_prompt | |
def process_sketch(image, prompt, negative_prompt, guidance_scale, num_steps, controlnet_scale): | |
""" | |
Paso 0: Convierte un boceto en una imagen de alta calidad usando ControlNet. | |
""" | |
if image is None: | |
raise gr.Error("Por favor, proporciona un boceto de entrada.") | |
input_image = image.convert("RGB") | |
# Pre-procesamiento de la imagen de entrada (invertir y redimensionar) | |
width, height = input_image.size | |
ratio = np.sqrt(1024.0 * 1024.0 / (width * height)) | |
new_width, new_height = int(width * ratio), int(height * ratio) | |
input_image = input_image.resize((new_width, new_height)) | |
input_image = ImageOps.invert(input_image) | |
final_prompt, final_negative_prompt = apply_3d_style(prompt) | |
if negative_prompt: # Añadir negativo del usuario si existe | |
final_negative_prompt = f"{final_negative_prompt}, {negative_prompt}" | |
logging.info(f"Mejorando boceto con prompt: '{final_prompt}'") | |
generator = torch.Generator(device=device).manual_seed(np.random.randint(0, 2**32 - 1)) | |
output_image = pipe_control( | |
prompt=final_prompt, | |
negative_prompt=final_negative_prompt, | |
image=input_image, | |
num_inference_steps=int(num_steps), | |
controlnet_conditioning_scale=float(controlnet_scale), | |
guidance_scale=float(guidance_scale), | |
width=new_width, | |
height=new_height, | |
generator=generator, | |
).images[0] | |
save_name = str(uuid.uuid4()) | |
processed_image_path = f"{args.cache_dir}/{save_name}_processed.png" | |
output_image.save(processed_image_path) | |
logging.info(f"Boceto mejorado y guardado en: {processed_image_path}") | |
return processed_image_path | |
def generate_geometry(input_image_path, guidance_scale, inference_steps, max_facenum, symmetry, edge_type): | |
"""Paso 1: Genera la geometría a partir de la imagen procesada.""" | |
if not input_image_path or not os.path.exists(input_image_path): | |
raise gr.Error("Primero debes procesar un boceto o proporcionar una imagen de entrada válida.") | |
logging.info(f"Iniciando generación de geometría desde: {os.path.basename(input_image_path)}") | |
if "Label" in args.geometry_model: | |
symmetry_values = ["x", "asymmetry"] | |
out = geometry_model( | |
input_image_path, | |
label={"symmetry": symmetry_values[int(symmetry)], "edge_type": edge_type}, | |
guidance_scale=float(guidance_scale), | |
octree_resolution=384, | |
max_facenum=int(max_facenum), | |
num_inference_steps=int(inference_steps), | |
) | |
else: | |
out = geometry_model( | |
input_image_path, | |
guidance_scale=float(guidance_scale), | |
num_inference_steps=int(inference_steps), | |
max_facenum=int(max_facenum), | |
) | |
save_name = os.path.basename(input_image_path).replace("_processed.png", "") | |
geometry_save_path = f"{args.cache_dir}/{save_name}_geometry.glb" | |
geometry_mesh = out.mesh[0] | |
geometry_mesh.export(geometry_save_path) | |
torch.cuda.empty_cache() | |
logging.info(f"Geometría guardada en: {geometry_save_path}") | |
return geometry_save_path | |
def generate_texture(input_image_path, geometry_path): | |
"""Paso 2: Aplica la textura a la geometría generada.""" | |
if not geometry_path or not os.path.exists(geometry_path): | |
raise gr.Error("Por favor, primero genera la geometría antes de texturizar.") | |
if not input_image_path or not os.path.exists(input_image_path): | |
raise gr.Error("Se necesita la imagen procesada para el texturizado.") | |
logging.info(f"Iniciando texturizado para la malla: {os.path.basename(geometry_path)}") | |
geometry_mesh = trimesh.load(geometry_path) | |
# Post-procesamiento | |
geometry_mesh = remove_degenerate_face(geometry_mesh) | |
geometry_mesh = reduce_face(geometry_mesh) | |
textured_mesh = texture_model(input_image_path, geometry_mesh) | |
save_name = os.path.basename(geometry_path).replace("_geometry.glb", "") | |
textured_save_path = f"{args.cache_dir}/{save_name}_textured.glb" | |
textured_mesh.export(textured_save_path) | |
torch.cuda.empty_cache() | |
logging.info(f"Malla texturizada guardada en: {textured_save_path}") | |
return textured_save_path | |
# ============================================================================== | |
# 4. INTERFAZ DE GRADIO | |
# ============================================================================== | |
with gr.Blocks(title="Step1X-3D", css="footer {display: none !important;} a {text-decoration: none !important;}") as demo: | |
gr.Markdown("# Step1X-3D: De Boceto a Malla 3D Texturizada") | |
gr.Markdown("Flujo de trabajo en 3 pasos: **0. Procesar Boceto → 1. Generar Geometría → 2. Generar Textura**") | |
# Estados para mantener las rutas de los archivos entre pasos | |
processed_image_path_state = gr.State() | |
geometry_path_state = gr.State() | |
with gr.Row(): | |
with gr.Column(scale=2): | |
# --- Panel de Entradas --- | |
input_image = gr.Image(label="Paso 0: Carga tu boceto o imagen", type="pil", image_mode="RGB") | |
prompt = gr.Textbox(label="Describe tu objeto", value="a comfortable armchair") | |
with gr.Accordion(label="Opciones Avanzadas", open=False): | |
gr.Markdown("### Opciones de Procesado de Boceto (Paso 0)") | |
neg_prompt_sketch = gr.Textbox(label="Negative Prompt (Boceto)", value="text, signature, watermark") | |
guidance_sketch = gr.Slider(0.1, 10.0, label="Guidance Scale (Boceto)", value=5.0, step=0.1) | |
steps_sketch = gr.Slider(1, 50, label="Steps (Boceto)", value=25, step=1) | |
controlnet_scale = gr.Slider(0.1, 2.0, label="ControlNet Scale", value=0.85, step=0.05) | |
gr.Markdown("---") | |
gr.Markdown("### Opciones de Generación 3D (Paso 1)") | |
guidance_3d = gr.Number(label="Guidance Scale (3D)", value="7.5") | |
steps_3d = gr.Slider(label="Inference Steps (3D)", minimum=1, maximum=100, value=50) | |
max_facenum = gr.Number(label="Max Face Num", value="200000") | |
symmetry = gr.Radio(choices=["symmetry", "asymmetry"], label="Symmetry", value="symmetry", type="index") | |
edge_type = gr.Radio(choices=["sharp", "normal", "smooth"], label="Edge Type", value="sharp", type="value") | |
with gr.Row(): | |
btn_process_sketch = gr.Button("0. Procesar Boceto", variant="secondary") | |
with gr.Row(): | |
btn_geo = gr.Button("1. Generar Geometría", interactive=False) | |
btn_tex = gr.Button("2. Generar Textura", interactive=False) | |
with gr.Column(scale=3): | |
# --- Panel de Salidas --- | |
processed_image_preview = gr.Image(label="Resultado del Boceto Procesado", type="filepath", interactive=False, height=400) | |
geometry_preview = gr.Model3D(label="Vista Previa de la Geometría", height=400, clear_color=[0.0, 0.0, 0.0, 0.0]) | |
textured_preview = gr.Model3D(label="Vista Previa del Modelo Texturizado", height=400, clear_color=[0.0, 0.0, 0.0, 0.0]) | |
with gr.Column(scale=1): | |
gr.Examples( | |
examples=[ | |
["examples/images/000.png", "a futuristic spaceship"], | |
["examples/images/001.png", "a cartoon style monster"], | |
["examples/images/004.png", "a red sports car"], | |
["examples/images/008.png", "a medieval sword"], | |
["examples/images/028.png", "a vintage camera"], | |
["examples/images/032.png", "a cute robot"], | |
["examples/images/061.png", "a delicious hamburger"], | |
["examples/images/107.png", "a golden trophy"], | |
], | |
inputs=[input_image, prompt], cache_examples=False | |
) | |
# --- Lógica de la Interfaz --- | |
def on_sketch_processed(path): | |
"""Función a ejecutar cuando el boceto se ha procesado.""" | |
return { | |
processed_image_path_state: path, | |
btn_geo: gr.update(interactive=True, variant="primary"), | |
btn_tex: gr.update(interactive=False), | |
geometry_preview: gr.update(value=None), | |
textured_preview: gr.update(value=None), | |
} | |
def on_geometry_generated(path): | |
"""Función a ejecutar cuando la geometría se ha generado.""" | |
return { | |
geometry_path_state: path, | |
btn_tex: gr.update(interactive=True, variant="primary"), | |
} | |
btn_process_sketch.click( | |
fn=process_sketch, | |
inputs=[input_image, prompt, neg_prompt_sketch, guidance_sketch, steps_sketch, controlnet_scale], | |
outputs=[processed_image_preview] | |
).then( | |
fn=on_sketch_processed, | |
inputs=[processed_image_preview], | |
outputs=[processed_image_path_state, btn_geo, btn_tex, geometry_preview, textured_preview] | |
) | |
btn_geo.click( | |
fn=generate_geometry, | |
inputs=[processed_image_path_state, guidance_3d, steps_3d, max_facenum, symmetry, edge_type], | |
outputs=[geometry_preview] | |
).then( | |
fn=on_geometry_generated, | |
inputs=[geometry_preview], | |
outputs=[geometry_path_state, btn_tex] | |
) | |
btn_tex.click( | |
fn=generate_texture, | |
inputs=[processed_image_path_state, geometry_path_state], | |
outputs=[textured_preview], | |
) | |
demo.launch(ssr_mode=False) |