Spaces:

Stable-X
/

ReconViaGen

Running on Zero

App Files Files Community

Stable-X commited on Oct 1

Commit

0fa4298

verified ·

1 Parent(s): 1dd8354

update model

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +10 -6
trellis/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/models/__init__.py +2 -1
trellis/models/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/models/__pycache__/sparse_structure_flow.cpython-310.pyc +0 -0
trellis/models/__pycache__/sparse_structure_vae.cpython-310.pyc +0 -0
trellis/models/__pycache__/structured_latent_flow.cpython-310.pyc +0 -0
trellis/models/structured_latent_flow.py +65 -9
trellis/models/structured_latent_vae/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/models/structured_latent_vae/__pycache__/base.cpython-310.pyc +0 -0
trellis/models/structured_latent_vae/__pycache__/decoder_gs.cpython-310.pyc +0 -0
trellis/models/structured_latent_vae/__pycache__/decoder_mesh.cpython-310.pyc +0 -0
trellis/models/structured_latent_vae/__pycache__/encoder.cpython-310.pyc +0 -0
trellis/modules/__pycache__/norm.cpython-310.pyc +0 -0
trellis/modules/__pycache__/spatial.cpython-310.pyc +0 -0
trellis/modules/__pycache__/utils.cpython-310.pyc +0 -0
trellis/modules/attention/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/modules/attention/__pycache__/full_attn.cpython-310.pyc +0 -0
trellis/modules/attention/__pycache__/modules.cpython-310.pyc +0 -0
trellis/modules/sparse/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/modules/sparse/__pycache__/basic.cpython-310.pyc +0 -0
trellis/modules/sparse/__pycache__/linear.cpython-310.pyc +0 -0
trellis/modules/sparse/__pycache__/nonlinearity.cpython-310.pyc +0 -0
trellis/modules/sparse/__pycache__/norm.cpython-310.pyc +0 -0
trellis/modules/sparse/__pycache__/spatial.cpython-310.pyc +0 -0
trellis/modules/sparse/attention/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/modules/sparse/attention/__pycache__/full_attn.cpython-310.pyc +0 -0
trellis/modules/sparse/attention/__pycache__/modules.cpython-310.pyc +0 -0
trellis/modules/sparse/attention/__pycache__/serialized_attn.cpython-310.pyc +0 -0
trellis/modules/sparse/attention/__pycache__/windowed_attn.cpython-310.pyc +0 -0
trellis/modules/sparse/conv/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/modules/sparse/conv/__pycache__/conv_spconv.cpython-310.pyc +0 -0
trellis/modules/sparse/transformer/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/modules/sparse/transformer/__pycache__/blocks.cpython-310.pyc +0 -0
trellis/modules/sparse/transformer/__pycache__/modulated.cpython-310.pyc +0 -0
trellis/modules/sparse/transformer/modulated.py +8 -2
trellis/modules/transformer/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/modules/transformer/__pycache__/blocks.cpython-310.pyc +0 -0
trellis/modules/transformer/__pycache__/modulated.cpython-310.pyc +0 -0
trellis/modules/transformer/modulated.py +8 -3
trellis/pipelines/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/pipelines/__pycache__/base.cpython-310.pyc +0 -0
trellis/pipelines/__pycache__/trellis_image_to_3d.cpython-310.pyc +0 -0
trellis/pipelines/base.py +3 -0
trellis/pipelines/samplers/__pycache__/__init__.cpython-310.pyc +0 -0
trellis/pipelines/samplers/__pycache__/base.cpython-310.pyc +0 -0
trellis/pipelines/samplers/__pycache__/classifier_free_guidance_mixin.cpython-310.pyc +0 -0
trellis/pipelines/samplers/__pycache__/flow_euler.cpython-310.pyc +0 -0
trellis/pipelines/samplers/__pycache__/flow_euler_old.cpython-310.pyc +0 -0
trellis/pipelines/samplers/__pycache__/guidance_interval_mixin.cpython-310.pyc +0 -0

app.py CHANGED Viewed

@@ -193,7 +193,7 @@ def generate_and_extract_glb(
     image_files = [image[0] for image in multiimages]
     # Generate 3D model
-    outputs = pipeline.run(
         image=image_files,
         seed=seed,
         formats=["gaussian", "mesh"],
@@ -210,6 +210,12 @@ def generate_and_extract_glb(
     )
     # Render video
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
     video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
@@ -331,10 +337,8 @@ with demo:
                     image_prompt = gr.Image(label="Image Prompt", format="png", visible=False, image_mode="RGBA", type="pil", height=300)
                     multiimage_prompt = gr.Gallery(label="Image Prompt", format="png", type="pil", height=300, columns=3)
                     gr.Markdown("""
-                        Input different views of the object in separate images.
-                        *NOTE: this is an experimental algorithm without training a specialized model. It may not produce the best results for all images, especially those having different poses or inconsistent details.*
-                    """)
             with gr.Accordion(label="Generation Settings", open=False):
                 seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
@@ -434,7 +438,7 @@ with demo:
 # Launch the Gradio app
 if __name__ == "__main__":
-    pipeline = TrellisVGGTTo3DPipeline.from_pretrained("Stable-X/trellis-vggt-v0-1")
     pipeline.cuda()
     pipeline.VGGT_model.cuda()
     pipeline.birefnet_model.cuda()

     image_files = [image[0] for image in multiimages]
     # Generate 3D model
+    outputs, _, _ = pipeline.run(
         image=image_files,
         seed=seed,
         formats=["gaussian", "mesh"],
     )
     # Render video
+    # import uuid
+    # output_id = str(uuid.uuid4())
+    # os.makedirs(f"{TMP_DIR}/{output_id}", exist_ok=True)
+    # video_path = f"{TMP_DIR}/{output_id}/preview.mp4"
+    # glb_path = f"{TMP_DIR}/{output_id}/mesh.glb"
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
     video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
                     image_prompt = gr.Image(label="Image Prompt", format="png", visible=False, image_mode="RGBA", type="pil", height=300)
                     multiimage_prompt = gr.Gallery(label="Image Prompt", format="png", type="pil", height=300, columns=3)
                     gr.Markdown("""
+                        Input different views of the object in separate images.
+                                """)
             with gr.Accordion(label="Generation Settings", open=False):
                 seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
 # Launch the Gradio app
 if __name__ == "__main__":
+    pipeline = TrellisVGGTTo3DPipeline.from_pretrained("Stable-X/trellis-vggt-v0-2")
     pipeline.cuda()
     pipeline.VGGT_model.cuda()
     pipeline.birefnet_model.cuda()

trellis/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/__pycache__/__init__.cpython-310.pyc and b/trellis/__pycache__/__init__.cpython-310.pyc differ

trellis/models/__init__.py CHANGED Viewed

@@ -9,6 +9,7 @@ __attributes = {
     'SLatMeshDecoder': 'structured_latent_vae',
     'SLatFlowModel': 'structured_latent_flow',
     'ModulatedMultiViewCond': 'sparse_structure_flow',
 }
 __submodules = []
@@ -85,4 +86,4 @@ if __name__ == '__main__':
     from .sparse_structure_vae import SparseStructureEncoder, SparseStructureDecoder
     from .sparse_structure_flow import SparseStructureFlowModel, ModulatedMultiViewCond
     from .structured_latent_vae import SLatEncoder, SLatGaussianDecoder, SLatMeshDecoder
-    from .structured_latent_flow import SLatFlowModel

     'SLatMeshDecoder': 'structured_latent_vae',
     'SLatFlowModel': 'structured_latent_flow',
     'ModulatedMultiViewCond': 'sparse_structure_flow',
+    'ModulatedSLATMultiViewCond': 'structured_latent_flow',
 }
 __submodules = []
     from .sparse_structure_vae import SparseStructureEncoder, SparseStructureDecoder
     from .sparse_structure_flow import SparseStructureFlowModel, ModulatedMultiViewCond
     from .structured_latent_vae import SLatEncoder, SLatGaussianDecoder, SLatMeshDecoder
+    from .structured_latent_flow import SLatFlowModel, ModulatedSLATMultiViewCond

trellis/models/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/models/__pycache__/__init__.cpython-310.pyc and b/trellis/models/__pycache__/__init__.cpython-310.pyc differ

trellis/models/__pycache__/sparse_structure_flow.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/models/__pycache__/sparse_structure_flow.cpython-310.pyc and b/trellis/models/__pycache__/sparse_structure_flow.cpython-310.pyc differ

trellis/models/__pycache__/sparse_structure_vae.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/models/__pycache__/sparse_structure_vae.cpython-310.pyc and b/trellis/models/__pycache__/sparse_structure_vae.cpython-310.pyc differ

trellis/models/__pycache__/structured_latent_flow.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/models/__pycache__/structured_latent_flow.cpython-310.pyc and b/trellis/models/__pycache__/structured_latent_flow.cpython-310.pyc differ

trellis/models/structured_latent_flow.py CHANGED Viewed

@@ -311,6 +311,11 @@ class SLatFlowModel(nn.Module):
             t_emb = self.adaLN_modulation(t_emb)
         t_emb = t_emb.type(self.dtype)
         skips = []
         # pack with input blocks
         for block in self.input_blocks:
@@ -320,15 +325,8 @@ class SLatFlowModel(nn.Module):
         if self.pe_mode == "ape":
             h = h + self.pos_embedder(h.coords[:, 1:]).type(self.dtype)
-        if isinstance(cond, list):
-            for i in range(len(cond)):
-                cond_tmp = cond[i].type(self.dtype)
-                for block in self.blocks:
-                    h = block(h, t_emb, cond_tmp)
-        else:
-            cond = cond.type(self.dtype)
-            for block in self.blocks:
-                h = block(h, t_emb, cond)
         # unpack with output blocks
         for block, skip in zip(self.out_blocks, reversed(skips)):
@@ -340,3 +338,61 @@ class SLatFlowModel(nn.Module):
         h = h.replace(F.layer_norm(h.feats, h.feats.shape[-1:]))
         h = self.out_layer(h.type(x.dtype))
         return h

             t_emb = self.adaLN_modulation(t_emb)
         t_emb = t_emb.type(self.dtype)
+        if isinstance(cond, list):
+            cond = [c.type(self.dtype) for c in cond]
+        else:
+            cond = cond.type(self.dtype)
         skips = []
         # pack with input blocks
         for block in self.input_blocks:
         if self.pe_mode == "ape":
             h = h + self.pos_embedder(h.coords[:, 1:]).type(self.dtype)
+        for block in self.blocks:
+            h = block(h, t_emb, cond)
         # unpack with output blocks
         for block, skip in zip(self.out_blocks, reversed(skips)):
         h = h.replace(F.layer_norm(h.feats, h.feats.shape[-1:]))
         h = self.out_layer(h.type(x.dtype))
         return h
+class ModulatedSLATMultiViewCond(nn.Module):
+    """
+    Transformer cross-attention block (MSA + MCA + FFN) with adaptive layer norm conditioning.
+    """
+    def __init__(
+        self,
+        channels: int,
+        ctx_channels: int,
+        dtype: Optional[torch.dtype] = torch.float32,
+        use_fp16: bool = True,
+    ):
+        super().__init__()
+        self.linear_blocks = nn.ModuleList([
+            nn.Sequential(
+                nn.Linear(ctx_channels, channels, bias=True),
+                nn.ReLU(),
+            )
+            for _ in range(4)
+        ])
+        self.fuse_blocks = nn.ModuleList([
+            nn.Sequential(
+                nn.Linear(ctx_channels, channels, bias=True),
+                nn.ReLU(),
+            )
+            for _ in range(4)
+        ])
+        self.use_fp16 = use_fp16
+        if use_fp16:
+            self.dtype = torch.float16
+        else:
+            self.dtype = dtype
+        self.intermediate_layer_idx = [4, 11, 17, 23]
+        if use_fp16:
+            self.convert_to_fp16()
+    def convert_to_fp16(self) -> None:
+        """
+        Convert the torso of the model to float16.
+        """
+        self.use_fp16 = True
+        self.dtype = torch.float16
+        self.linear_blocks.apply(convert_module_to_f16)
+    def forward(self, aggregated_tokens_list: List, image_cond: torch.Tensor):
+        b, n, _, _ = aggregated_tokens_list[0].shape
+        idx = 0
+        cond = image_cond.reshape(b*n, -1, 1024).to(self.dtype)
+        for layer_idx in self.intermediate_layer_idx:
+            x = aggregated_tokens_list[layer_idx]
+            # x = x.reshape(b, -1, 2048) + torch.cat([image_cond.reshape(b, -1, 1024), image_cond.reshape(b, -1, 1024)],dim=-1)
+            x = torch.cat([x.reshape(b*n, -1, 2048), cond.reshape(b*n, -1, 1024)],dim=-1).to(self.dtype)
+            x = self.linear_blocks[idx](x)
+            cond = x + image_cond.reshape(b*n, -1, 1024).to(self.dtype)
+            idx = idx + 1
+        return cond

trellis/models/structured_latent_vae/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/models/structured_latent_vae/__pycache__/__init__.cpython-310.pyc and b/trellis/models/structured_latent_vae/__pycache__/__init__.cpython-310.pyc differ

trellis/models/structured_latent_vae/__pycache__/base.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/models/structured_latent_vae/__pycache__/base.cpython-310.pyc and b/trellis/models/structured_latent_vae/__pycache__/base.cpython-310.pyc differ

trellis/models/structured_latent_vae/__pycache__/decoder_gs.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/models/structured_latent_vae/__pycache__/decoder_gs.cpython-310.pyc and b/trellis/models/structured_latent_vae/__pycache__/decoder_gs.cpython-310.pyc differ

trellis/models/structured_latent_vae/__pycache__/decoder_mesh.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/models/structured_latent_vae/__pycache__/decoder_mesh.cpython-310.pyc and b/trellis/models/structured_latent_vae/__pycache__/decoder_mesh.cpython-310.pyc differ

trellis/models/structured_latent_vae/__pycache__/encoder.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/models/structured_latent_vae/__pycache__/encoder.cpython-310.pyc and b/trellis/models/structured_latent_vae/__pycache__/encoder.cpython-310.pyc differ

trellis/modules/__pycache__/norm.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/__pycache__/norm.cpython-310.pyc and b/trellis/modules/__pycache__/norm.cpython-310.pyc differ

trellis/modules/__pycache__/spatial.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/__pycache__/spatial.cpython-310.pyc and b/trellis/modules/__pycache__/spatial.cpython-310.pyc differ

trellis/modules/__pycache__/utils.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/__pycache__/utils.cpython-310.pyc and b/trellis/modules/__pycache__/utils.cpython-310.pyc differ

trellis/modules/attention/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/attention/__pycache__/__init__.cpython-310.pyc and b/trellis/modules/attention/__pycache__/__init__.cpython-310.pyc differ

trellis/modules/attention/__pycache__/full_attn.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/attention/__pycache__/full_attn.cpython-310.pyc and b/trellis/modules/attention/__pycache__/full_attn.cpython-310.pyc differ

trellis/modules/attention/__pycache__/modules.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/attention/__pycache__/modules.cpython-310.pyc and b/trellis/modules/attention/__pycache__/modules.cpython-310.pyc differ

trellis/modules/sparse/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/__pycache__/__init__.cpython-310.pyc and b/trellis/modules/sparse/__pycache__/__init__.cpython-310.pyc differ

trellis/modules/sparse/__pycache__/basic.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/__pycache__/basic.cpython-310.pyc and b/trellis/modules/sparse/__pycache__/basic.cpython-310.pyc differ

trellis/modules/sparse/__pycache__/linear.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/__pycache__/linear.cpython-310.pyc and b/trellis/modules/sparse/__pycache__/linear.cpython-310.pyc differ

trellis/modules/sparse/__pycache__/nonlinearity.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/__pycache__/nonlinearity.cpython-310.pyc and b/trellis/modules/sparse/__pycache__/nonlinearity.cpython-310.pyc differ

trellis/modules/sparse/__pycache__/norm.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/__pycache__/norm.cpython-310.pyc and b/trellis/modules/sparse/__pycache__/norm.cpython-310.pyc differ

trellis/modules/sparse/__pycache__/spatial.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/__pycache__/spatial.cpython-310.pyc and b/trellis/modules/sparse/__pycache__/spatial.cpython-310.pyc differ

trellis/modules/sparse/attention/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/attention/__pycache__/__init__.cpython-310.pyc and b/trellis/modules/sparse/attention/__pycache__/__init__.cpython-310.pyc differ

trellis/modules/sparse/attention/__pycache__/full_attn.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/attention/__pycache__/full_attn.cpython-310.pyc and b/trellis/modules/sparse/attention/__pycache__/full_attn.cpython-310.pyc differ

trellis/modules/sparse/attention/__pycache__/modules.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/attention/__pycache__/modules.cpython-310.pyc and b/trellis/modules/sparse/attention/__pycache__/modules.cpython-310.pyc differ

trellis/modules/sparse/attention/__pycache__/serialized_attn.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/attention/__pycache__/serialized_attn.cpython-310.pyc and b/trellis/modules/sparse/attention/__pycache__/serialized_attn.cpython-310.pyc differ

trellis/modules/sparse/attention/__pycache__/windowed_attn.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/attention/__pycache__/windowed_attn.cpython-310.pyc and b/trellis/modules/sparse/attention/__pycache__/windowed_attn.cpython-310.pyc differ

trellis/modules/sparse/conv/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/conv/__pycache__/__init__.cpython-310.pyc and b/trellis/modules/sparse/conv/__pycache__/__init__.cpython-310.pyc differ

trellis/modules/sparse/conv/__pycache__/conv_spconv.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/conv/__pycache__/conv_spconv.cpython-310.pyc and b/trellis/modules/sparse/conv/__pycache__/conv_spconv.cpython-310.pyc differ

trellis/modules/sparse/transformer/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/transformer/__pycache__/__init__.cpython-310.pyc and b/trellis/modules/sparse/transformer/__pycache__/__init__.cpython-310.pyc differ

trellis/modules/sparse/transformer/__pycache__/blocks.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/transformer/__pycache__/blocks.cpython-310.pyc and b/trellis/modules/sparse/transformer/__pycache__/blocks.cpython-310.pyc differ

trellis/modules/sparse/transformer/__pycache__/modulated.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/sparse/transformer/__pycache__/modulated.cpython-310.pyc and b/trellis/modules/sparse/transformer/__pycache__/modulated.cpython-310.pyc differ

trellis/modules/sparse/transformer/modulated.py CHANGED Viewed

@@ -150,8 +150,14 @@ class ModulatedSparseTransformerCrossBlock(nn.Module):
         h = h * gate_msa
         x = x + h
         h = x.replace(self.norm2(x.feats))
-        h = self.cross_attn(h, context)
-        x = x + h
         h = x.replace(self.norm3(x.feats))
         h = h * (1 + scale_mlp) + shift_mlp
         h = self.mlp(h)

         h = h * gate_msa
         x = x + h
         h = x.replace(self.norm2(x.feats))
+        # h = self.cross_attn(h, context)
+        # x = x + h
+        if isinstance(context, list):
+            for ctx in context:
+                x = x + self.cross_attn(h, ctx) / len(context)
+        else:
+            h = self.cross_attn(h, context)
+            x = x + h
         h = x.replace(self.norm3(x.feats))
         h = h * (1 + scale_mlp) + shift_mlp
         h = self.mlp(h)

trellis/modules/transformer/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/transformer/__pycache__/__init__.cpython-310.pyc and b/trellis/modules/transformer/__pycache__/__init__.cpython-310.pyc differ

trellis/modules/transformer/__pycache__/blocks.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/transformer/__pycache__/blocks.cpython-310.pyc and b/trellis/modules/transformer/__pycache__/blocks.cpython-310.pyc differ

trellis/modules/transformer/__pycache__/modulated.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/modules/transformer/__pycache__/modulated.cpython-310.pyc and b/trellis/modules/transformer/__pycache__/modulated.cpython-310.pyc differ

trellis/modules/transformer/modulated.py CHANGED Viewed

@@ -138,13 +138,18 @@ class ModulatedTransformerCrossBlock(nn.Module):
             shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.adaLN_modulation(mod).chunk(6, dim=1)
         h = self.norm1(x)
         h = h * (1 + scale_msa.unsqueeze(1)) + shift_msa.unsqueeze(1)
-        # h = torch.utils.checkpoint.checkpoint(self.self_attn, h)
         h = self.self_attn(h)
         h = h * gate_msa.unsqueeze(1)
         x = x + h
         h = self.norm2(x)
-        h = self.cross_attn(h, context)
-        x = x + h
         h = self.norm3(x)
         h = h * (1 + scale_mlp.unsqueeze(1)) + shift_mlp.unsqueeze(1)
         h = self.mlp(h)

             shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.adaLN_modulation(mod).chunk(6, dim=1)
         h = self.norm1(x)
         h = h * (1 + scale_msa.unsqueeze(1)) + shift_msa.unsqueeze(1)
         h = self.self_attn(h)
         h = h * gate_msa.unsqueeze(1)
         x = x + h
         h = self.norm2(x)
+        # h = self.cross_attn(h, context)
+        # x = x + h
+        if isinstance(context, list):
+            for ctx in context:
+                x = x + self.cross_attn(h, ctx) / len(context)
+        else:
+            h = self.cross_attn(h, context)
+            x = x + h
         h = self.norm3(x)
         h = h * (1 + scale_mlp.unsqueeze(1)) + shift_mlp.unsqueeze(1)
         h = self.mlp(h)

trellis/pipelines/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/pipelines/__pycache__/__init__.cpython-310.pyc and b/trellis/pipelines/__pycache__/__init__.cpython-310.pyc differ

trellis/pipelines/__pycache__/base.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/pipelines/__pycache__/base.cpython-310.pyc and b/trellis/pipelines/__pycache__/base.cpython-310.pyc differ

trellis/pipelines/__pycache__/trellis_image_to_3d.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/pipelines/__pycache__/trellis_image_to_3d.cpython-310.pyc and b/trellis/pipelines/__pycache__/trellis_image_to_3d.cpython-310.pyc differ

trellis/pipelines/base.py CHANGED Viewed

@@ -24,6 +24,9 @@ class Pipeline:
             self.sparse_structure_flow_model = self.models['sparse_structure_flow_model']
         if 'sparse_structure_vggt_cond' in self.models:
             self.sparse_structure_vggt_cond = self.models['sparse_structure_vggt_cond']
     @staticmethod
     def from_pretrained(path: str) -> "Pipeline":
         """

             self.sparse_structure_flow_model = self.models['sparse_structure_flow_model']
         if 'sparse_structure_vggt_cond' in self.models:
             self.sparse_structure_vggt_cond = self.models['sparse_structure_vggt_cond']
+        if 'slat_vggt_cond' in self.models:
+            self.slat_vggt_cond = self.models['slat_vggt_cond']
     @staticmethod
     def from_pretrained(path: str) -> "Pipeline":
         """

trellis/pipelines/samplers/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/pipelines/samplers/__pycache__/__init__.cpython-310.pyc and b/trellis/pipelines/samplers/__pycache__/__init__.cpython-310.pyc differ

trellis/pipelines/samplers/__pycache__/base.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/pipelines/samplers/__pycache__/base.cpython-310.pyc and b/trellis/pipelines/samplers/__pycache__/base.cpython-310.pyc differ

trellis/pipelines/samplers/__pycache__/classifier_free_guidance_mixin.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/pipelines/samplers/__pycache__/classifier_free_guidance_mixin.cpython-310.pyc and b/trellis/pipelines/samplers/__pycache__/classifier_free_guidance_mixin.cpython-310.pyc differ

trellis/pipelines/samplers/__pycache__/flow_euler.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/pipelines/samplers/__pycache__/flow_euler.cpython-310.pyc and b/trellis/pipelines/samplers/__pycache__/flow_euler.cpython-310.pyc differ

trellis/pipelines/samplers/__pycache__/flow_euler_old.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/pipelines/samplers/__pycache__/flow_euler_old.cpython-310.pyc and b/trellis/pipelines/samplers/__pycache__/flow_euler_old.cpython-310.pyc differ

trellis/pipelines/samplers/__pycache__/guidance_interval_mixin.cpython-310.pyc CHANGED Viewed

Binary files a/trellis/pipelines/samplers/__pycache__/guidance_interval_mixin.cpython-310.pyc and b/trellis/pipelines/samplers/__pycache__/guidance_interval_mixin.cpython-310.pyc differ