fffan commited on Feb 7

Commit

130118b

verified ·

1 Parent(s): 72d3318

Upload 461 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +220 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/ckpts/epoch=0-step=1000.ckpt +3 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/ckpts/last.ckpt +3 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/cmd.txt +2 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/.gitignore +7 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/.gitmodules +15 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/README.md +83 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/assets/bear.gif +3 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/assets/cherry.gif +3 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/assets/teddy.png +3 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/check_output.py +42 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/.gitignore +196 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/.pre-commit-config.yaml +34 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/README.md +129 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/__init__.py +25 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/background/gaussian_mvdream_background.py +72 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/configs/gaussian_splatting.yaml +96 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/configs/gaussian_splatting_background.yaml +111 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/configs/gaussian_splatting_mvdream.yaml +131 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/configs/gaussian_splatting_shading.yaml +115 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/configs/gaussian_splatting_zero123.yaml +144 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/configs/scene_lang.yaml +138 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/geometry/exporter.py +44 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/geometry/gaussian_base.py +1469 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/geometry/gaussian_base.py.bak +1492 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/geometry/gaussian_dynamic.py +77 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/geometry/gaussian_io.py +327 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/geometry/mesh_utils.py +150 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/material/gaussian_material.py +116 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/renderer/diff_gaussian_rasterizer.py +151 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/renderer/diff_gaussian_rasterizer_advanced.py +152 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/renderer/diff_gaussian_rasterizer_background.py +145 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/renderer/diff_gaussian_rasterizer_shading.py +226 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/renderer/gaussian_batch_renderer.py +92 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/system/gaussian_mvdream.py +249 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/system/gaussian_splatting.py +223 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/system/gaussian_zero123.py +339 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/system/scene_lang.py +528 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/utils/__init__.py +0 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/utils/ae.py +63 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/utils/sam_clip.py +366 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/bear_background.png +3 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/bear_composite.png +3 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/bear_layers.png +0 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/boy_background.png +3 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/boy_composite.png +3 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/boy_layers.png +0 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/corgi_background.png +3 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/corgi_composite.png +3 -0
000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/corgi_layers.png +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,223 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/assets/bear.gif filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/assets/cherry.gif filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/assets/teddy.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/bear_background.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/bear_composite.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/boy_background.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/boy_composite.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/corgi_background.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/corgi_composite.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/stairs_background.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/stairs_composite.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/tinycudann-1.7.post70240121-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/outpaint_0.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/outpaint_1.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1000-val.mp4 filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test.mp4 filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/0.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/1.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/10.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/100.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/101.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/102.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/103.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/104.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/105.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/106.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/107.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/108.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/109.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/11.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/110.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/111.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/112.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/113.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/114.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/115.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/116.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/117.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/118.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/119.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/12.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/13.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/14.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/15.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/16.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/17.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/18.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/19.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/2.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/20.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/21.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/22.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/23.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/24.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/25.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/26.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/27.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/28.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/29.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/3.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/30.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/31.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/32.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/33.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/34.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/35.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/36.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/37.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/38.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/39.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/4.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/40.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/41.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/42.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/43.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/44.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/45.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/46.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/47.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/48.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/49.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/5.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/50.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/51.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/52.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/53.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/54.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/55.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/56.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/57.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/58.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/59.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/6.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/60.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/61.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/62.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/63.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/64.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/65.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/66.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/67.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/68.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/69.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/7.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/70.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/71.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/72.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/73.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/74.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/75.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/76.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/77.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/78.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/79.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/8.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-test/80.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it1500-val.mp4 filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it500-val.mp4 filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat.mp4 filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/0.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/1.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/10.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/100.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/101.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/102.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/103.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/104.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/105.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/106.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/107.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/108.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/109.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/11.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/110.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/111.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/112.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/113.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/114.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/115.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/116.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/117.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/118.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/119.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/12.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/13.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/14.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/15.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/16.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/17.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/18.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/19.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/2.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/20.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/21.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/22.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/23.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/24.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/25.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/26.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/27.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/28.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/29.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/3.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/30.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/31.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/32.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/33.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/34.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/35.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/36.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/37.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/38.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/39.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/4.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/40.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/41.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/42.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/43.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/44.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/45.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/46.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/47.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/48.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/49.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/5.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/50.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/51.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/52.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/53.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/54.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/55.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/56.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/57.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/58.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/59.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/6.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/60.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/61.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/62.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/63.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/64.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/65.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/66.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/67.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/68.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/69.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/7.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/70.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/71.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/72.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/73.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/74.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/75.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/76.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/77.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/78.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/79.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/8.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/it800-feat/80.png filter=lfs diff=lfs merge=lfs -text
+000000000017.1/gs-sds-generation/3DitScene@20250207-015119/save/point_cloud.ply filter=lfs diff=lfs merge=lfs -text

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/ckpts/epoch=0-step=1000.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05b53135d3920aa7616777d0b9040ed4a12f5060d43f700c361aae7805f9d248
+size 28888900

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/ckpts/last.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2762c6bc7f087ebcf67488e4e6b106a8d7975d5143b26d3294b8a8f75b65a777
+size 28888900

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/cmd.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ python launch.py --config custom/threestudio-3dgs/configs/scene_lang.yaml --train --gpu 2 exp_root_dir=outputs/mira_video_clips/000000000/000000000017.1 tag=3DitScene system.geometry.geometry_convert_from=depth:/mnt/hdd1/wufan/datasets/MiraData/data/video_frames/000000000/000000000017.1/0.jpg system.geometry.ooi_bbox=[599,250,692,452] system.prompt_processor.prompt=It is night time in a city with tall buildings and neon lights illuminating the streets. system.empty_prompt= The background is a city at night with tall buildings, out of focus system.side_prompt= The background is a city at night with tall buildings, out of focus
2	+ Namespace(config='custom/threestudio-3dgs/configs/scene_lang.yaml', gpu='2', train=True, validate=False, test=False, export=False, save_dir=None, gradio=False, verbose=False, typecheck=False)

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+ckpts/
+outputs/
+.threestudio_cache/
+*.pyc
+*.DS_Store

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/.gitmodules ADDED Viewed

	@@ -0,0 +1,15 @@

+[submodule "submodules/MobileSAM-lang"]
+	path = submodules/MobileSAM-lang
+	url = https://github.com/zqh0253/MobileSAM-lang.git
+[submodule "submodules/segment-anything-langsplat"]
+	path = submodules/segment-anything-langsplat
+	url = https://github.com/zqh0253/segment-anything-langsplat.git
+[submodule "submodules/simple-knn"]
+	path = submodules/simple-knn
+	url = https://github.com/DSaurus/simple-knn.git
+[submodule "submodules/diff-gaussian-rasterization"]
+	path = submodules/diff-gaussian-rasterization
+	url = https://github.com/zqh0253/diff-gaussian-rasterization-lang
+[submodule "submodules/langsplat-rasterization"]
+	path = submodules/langsplat-rasterization
+	url = https://github.com/minghanqin/langsplat-rasterization.git

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/README.md ADDED Viewed

	@@ -0,0 +1,83 @@

+# 3DitScene: Editing Any Scene via Language-guided Disentangled Gaussian Splatting
+[![Project Page](https://img.shields.io/badge/Project-Website-green)](https://zqh0253.github.io/3DitScene/)
+[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/qihang/3Dit-Scene/)
+[![arXiv](https://img.shields.io/badge/arXiv-2405.18424-b31b1b.svg)](https://arxiv.org/abs/2405.18424)
+<table class="center">
+    <tr style="line-height: 0">
+      <td width=35% style="border: none; text-align: center">Move the bear, and rotate the camera</td>
+      <td width=30% style="border: none; text-align: center">Move / remove the girl, and rotate the camera</td>
+    </tr>
+    <tr style="line-height: 0">
+      <td width=35% style="border: none"><img src="assets/bear.gif"></td>
+      <td width=30% style="border: none"><img src="assets/cherry.gif"></td>
+    </tr>
+ </table>
+## Installation
++ Install `Python >= 3.8`.
++ Install `torch >= 1.12`. We have tested on `torch==2.0.1+cu118`, but other versions should also work fine.
++ Clone our repo:
+```
+git clone https://github.com/zqh0253/3DitScene.git --recursive
+```
++ Install dependencies:
+```
+pip install -r requirements.txt
+```
++ Install submodules:
+```
+pip install ./submodules/segment-anything-langsplat
+pip install ./submodules/MobileSAM-lang
+pip install ./submodules/langsplat-rasterization
+pip install ./submodules/simple-knn
+```
++ Prepare weights for `SAM`:
+```
+mkdir ckpts
+wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth -O ./ckpts/sam_vit_h_4b8939.pth
+cp submodules/MobileSAM-lang/weights/mobile_sam.pt ./ckpts/
+```
+## Usage
+Run the following command to launch the optimization procedure:
+```
+python -u launch.py --config custom/threestudio-3dgs/configs/scene_lang.yaml  --train --gpu 0 tag=3DitScene
+system.geometry.geometry_convert_from=depth:${IMGPATH} system.geometry.ooi_bbox=${BBOX}
+system.prompt_processor.prompt="${PROMPT}" system.empty_prompt="${EMPTY_PROMPT}" system.side_prompt="${SIDE_PROMPT}"
+```
+You should specify the image path `IMGPATH`, the bounding box of the interested object  `BBOX`, and the promtps: `PROMPT`, `EMPTY_PROMPT`, `SIDE_PROMPT`. These prompts describe the image itself, the background area behind the image, and the content of the novel view region, respectively.
+Here we provide an image (`./assets/teddy.png`) as example:
+```
+python -u launch.py --config custom/threestudio-3dgs/configs/scene_lang.yaml  --train --gpu 0 tag=3DitScene
+system.geometry.geometry_convert_from=depth:assets/teddy.png system.geometry.ooi_bbox=[122,119,387,495]
+system.prompt_processor.prompt="a teddy bear in Times Square" system.empty_prompt="Times Square, out of focus" system.side_prompt="Times Square, out of focus"
+```
+## Huggingface demo
+We provide a huggingface demo. You have two options to explore our demo:
+(1) Visit our [online Hugging Face space](https://huggingface.co/spaces/qihang/3Dit-Scene).
+(2) Deploy it locally by following these steps:
++ Install the necessary packages and download required files as specified in our [Dockerfile](https://huggingface.co/spaces/qihang/3Dit-Scene/blob/main/Dockerfile),
++ Run the following command to launch the service at `localhost:10091`:
+```
+python gradio_app_single_process.py --listen --hf-space --port 10091
+```
+## Citation
+If you find our work useful, please consider citing:
+```
+inproceedings{zhang20243DitScene,
+  author = {Qihang Zhang and Yinghao Xu and Chaoyang Wang and Hsin-Ying Lee and Gordon Wetzstein and Bolei Zhou and Ceyuan Yang},
+  title = {{3DitScene}: Editing Any Scene via Language-guided Disentangled Gaussian Splatting},
+  booktitle = {arXiv},
+  year = {2024}
+}
+```

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/assets/bear.gif ADDED Viewed

Git LFS Details

SHA256: ef075fb5f74ea8fc690b0b68d3abd88d151cdc246ac2df27767fc4cbb24227f9
Pointer size: 132 Bytes
Size of remote file: 6.43 MB

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/assets/cherry.gif ADDED Viewed

Git LFS Details

SHA256: a0069d4bdd8da45627cbba25f3c49a102220aff23cc8738417a13101d42a3b25
Pointer size: 132 Bytes
Size of remote file: 7.39 MB

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/assets/teddy.png ADDED Viewed

Git LFS Details

SHA256: 6d73779e3f37a6e8e6171d30019a12851cdd1a69c5bdf2ff1c1b0b8ade8e1db6
Pointer size: 131 Bytes
Size of remote file: 390 kB

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/check_output.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import os
+os.environ["MKL_THREADING_LAYER"] = "GNU"
+import numpy as np
+import pandas as pd
+# Load the CSV file
+csv_path = "/mnt/hdd1/wufan/datasets/MiraData/data/data_list/miradata_v1_9k_subset_shard_0.csv"
+missing_csv_path = "/mnt/hdd1/wufan/datasets/MiraData/data/data_list/miradata_v1_9k_output_missing.csv"
+df = pd.read_csv(csv_path)
+output_path = "/mnt/hdd1/wufan/projects/3DitScene/outputs/mira_video_clips"
+# Iterate through each row
+save_dict = []
+for index, row in df.iterrows():
+    # Construct the image path from file_path
+    # video_clips/000005007/000005007658.0.mp4
+    # '/mnt/hdd1/wufan/projects/3DitScene/outputs/mira_video_clips/000005007/000005007658.0/gs-sds-generation/save/it500-val.mp4'
+    file_path = row["file_path"].replace("video_clips/", "").replace(".mp4", "/gs-sds-generation")
+    file_dir = f"/mnt/hdd1/wufan/projects/3DitScene/outputs/mira_video_clips/{file_path}"
+    if os.path.exists(file_dir):
+        for item in os.listdir(file_dir):
+            file_path = os.path.join(file_dir, item)
+            # Check if 'it500-val.mp4' exists in the directory
+            if not os.path.exists(os.path.join(file_path, "save/it500-val.mp4")) and os.path.isfile(file_path) and os.path.getsize(file_path) > 0:
+                save_dict.append(row)  # Append the current item if the file doesn't exist
+    else:
+        save_dict.append(row)
+    # Check if the image exists before proceeding
+results_df = pd.DataFrame(save_dict)
+# Save results to CSV
+results_df.to_csv(missing_csv_path, index=False)

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/.gitignore ADDED Viewed

	@@ -0,0 +1,196 @@

+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+# End of https://www.toptal.com/developers/gitignore/api/python
+.vscode/
+.threestudio_cache/
+outputs/
+outputs-gradio/
+# pretrained model weights
+*.ckpt
+*.pt
+*.pth
+# wandb
+wandb/
+custom/*
+load/tets/256_tets.npz
+diff-gaussian-rasterization/
+simple-knn/

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,34 @@

+default_language_version:
+  python: python3
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: trailing-whitespace
+      - id: check-ast
+      - id: check-merge-conflict
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+        args: [--markdown-linebreak-ext=md]
+  - repo: https://github.com/psf/black
+    rev: 23.3.0
+    hooks:
+      - id: black
+        language_version: python3
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        exclude: README.md
+        args: ["--profile", "black"]
+  # temporarily disable static type checking
+  # - repo: https://github.com/pre-commit/mirrors-mypy
+  #   rev: v1.2.0
+  #   hooks:
+  #     - id: mypy
+  #       args: ["--ignore-missing-imports", "--scripts-are-modules", "--pretty"]

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/README.md ADDED Viewed

	@@ -0,0 +1,129 @@

+# threestudio-3dgs
+<img src="https://github.com/DSaurus/threestudio-3dgs/assets/24589363/55874a57-cff1-4e83-a006-76585bcd3b76" width="" height="128">
+<img src="https://github.com/DSaurus/threestudio-3dgs/assets/24589363/505f35e5-b160-4c12-92dc-03482404ef5e" width="" height="128">
+<img src="https://github.com/DSaurus/threestudio-3dgs/assets/24589363/a1041f0d-a56f-4f7f-adc3-1e99c2d81098" width="" height="128">
+<img src="https://github.com/DSaurus/threestudio-3dgs/assets/24589363/f524524e-33aa-4701-9f0d-31cba23eaead" width="" height="128">
+The Gaussian Splatting extension for threestudio. This extension is writen by [Ruizhi Shao](https://github.com/DSaurus) and [Youtian Lin](https://github.com/Linyou). To use it, please install [threestudio](https://github.com/threestudio-project/threestudio) first and then install this extension in threestudio `custom` directory.
+## Advanced Gaussian Splatting Installation (Recommend)
+```
+cd custom
+git clone https://github.com/DSaurus/threestudio-3dgs.git
+cd threestudio-3dgs
+git clone --recursive https://github.com/ashawkey/diff-gaussian-rasterization
+git clone https://github.com/DSaurus/simple-knn.git
+pip install ./diff-gaussian-rasterization
+pip install ./simple-knn
+```
+## Native Gaussian Splatting Installation
+```
+cd custom
+git clone https://github.com/DSaurus/threestudio-3dgs.git
+cd threestudio-3dgs
+git clone [email protected]:graphdeco-inria/gaussian-splatting.git --recursive
+cd gaussian-splatting/submodules
+python -m pip install diff-gaussian-rasterization/.
+python -m pip install simple-knn/
+# If you want to export mesh, please install pymeshlab
+pip install pymeshlab
+```
+## Quick Start
+```
+# Native Gaussian Splatting + SDS Loss
+python launch.py --config custom/threestudio-3dgs/configs/gaussian_splatting.yaml  --train --gpu 0 system.prompt_processor.prompt="a delicious hamburger"
+# Advanced Gaussian Splatting with background + SDS Loss
+python launch.py --config custom/threestudio-3dgs/configs/gaussian_splatting_background.yaml  --train --gpu 0 system.prompt_processor.prompt="a delicious hamburger"
+# Advanced Gaussian Splatting with background and shading + SDS Loss
+python launch.py --config custom/threestudio-3dgs/configs/gaussian_splatting_shading.yaml  --train --gpu 0 system.prompt_processor.prompt="a delicious hamburger"
+```
+## Gaussian Splatting + MVDream
+Please first install [MVDream extension](https://github.com/DSaurus/threestudio-mvdream), then you can run the following script:
+```
+# Advanced Gaussian Splatting with background and shading + MVDream
+python launch.py --config custom/threestudio-3dgs/configs/gaussian_splatting_mvdream.yaml  --train --gpu 0 system.prompt_processor.prompt="an astronaut riding a horse"
+```
+## Gaussian Splatting + Zero-123
+```
+# Advanced Gaussian Splatting + Zero-123
+python launch.py --config custom/threestudio-3dgs/configs/gaussian_splatting_zero123.yaml --train --gpu 0 data.image_path=./load/images/anya_front_rgba.png
+```
+## Resume from checkpoints
+```
+# resume training from the last checkpoint, you may replace last.ckpt with any other checkpoints
+python launch.py --config path/to/trial/dir/configs/parsed.yaml --train --gpu 0 resume=path/to/trial/dir/ckpts/last.ckpt
+```
+## Load from PLY
+```
+# load from Gaussian Splatting ply file
+python launch.py --config custom/threestudio-3dgs/configs/gaussian_splatting.yaml  --train --gpu 0 system.prompt_processor.prompt="a delicious hamburger" system.geometry.geometry_conver_from=path/to/poinc_cloud.ply
+# only load points position and color from ply file
+python launch.py --config custom/threestudio-3dgs/configs/gaussian_splatting.yaml  --train --gpu 0 system.prompt_processor.prompt="a delicious hamburger" system.geometry.geometry_conver_from=path/to/poinc_cloud.ply system.geometry.load_ply_only_vertex=true
+```
+If you want to use shap-e initialization, please install [threestudio-shap-e extension](https://github.com/DSaurus/threestudio-shap-e) first.
+```
+# load from shap-e initialization
+python launch.py --config custom/threestudio-3dgs/configs/gaussian_splatting.yaml  --train --gpu 0 system.prompt_processor.prompt="a delicious hamburger" system.geometry.geometry_convert_from="shap-e:a delicious hamburger"
+```
+If you want to use LRM initialization, please install [threestudio-lrm extension](https://github.com/Adamdad/threestudio-lrm) first.
+```
+# load from lrm initialization
+python launch.py --config custom/threestudio-3dgs/configs/gaussian_splatting.yaml  --train --gpu 0 system.prompt_processor.prompt="a delicious hamburger" system.geometry.geometry_convert_from="lrm:a delicious hamburger"
+```
+## Export
+You can use the following script to export Gaussian Splatting ply file and mesh obj.
+```
+python launch.py --config path/to/config --export --gpu 0 system.prompt_processor.prompt="a delicious hamburger" resume=path/to/last.ckpt
+```
+## Citation
+```
+@Article{kerbl3Dgaussians,
+    author       = {Kerbl, Bernhard and Kopanas, Georgios and Leimk{\"u}hler, Thomas and Drettakis, George},
+    title        = {3D Gaussian Splatting for Real-Time Radiance Field Rendering},
+    journal      = {ACM Transactions on Graphics},
+    number       = {4},
+    volume       = {42},
+    month        = {July},
+    year         = {2023},
+    url          = {https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/}
+}
+```
+## Acknowledgement
+Please also consider citing these work about 3D Gaussian Splatting generation. Their open-source code inspires this project..
+```
+@article{tang2023dreamgaussian,
+    title={DreamGaussian: Generative Gaussian Splatting for Efficient 3D Content Creation},
+    author={Tang, Jiaxiang and Ren, Jiawei and Zhou, Hang and Liu, Ziwei and Zeng, Gang},
+    journal={arXiv preprint arXiv:2309.16653},
+    year={2023}
+}
+```
+```
+@article{GaussianDreamer,
+    title={GaussianDreamer: Fast Generation from Text to 3D Gaussian Splatting with Point Cloud Priors},
+    author={Taoran Yi and Jiemin Fang and Guanjun Wu and Lingxi Xie and Xiaopeng Zhang and Wenyu Liu and Qi Tian and Xinggang Wang},
+    journal={arxiv:2310.08529},
+    year={2023}
+}
+```

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import threestudio
+from packaging.version import Version
+if hasattr(threestudio, "__version__") and Version(threestudio.__version__) >= Version(
+    "0.2.1"
+):
+    pass
+else:
+    if hasattr(threestudio, "__version__"):
+        print(f"[INFO] threestudio version: {threestudio.__version__}")
+    raise ValueError(
+        "threestudio version must be >= 0.2.0, please update threestudio by pulling the latest version from github"
+    )
+from .background import gaussian_mvdream_background
+from .geometry import exporter, gaussian_base, gaussian_io
+from .material import gaussian_material
+from .renderer import (
+    diff_gaussian_rasterizer,
+    diff_gaussian_rasterizer_advanced,
+    diff_gaussian_rasterizer_background,
+    diff_gaussian_rasterizer_shading,
+)
+from .system import gaussian_mvdream, gaussian_splatting, gaussian_zero123, scene_lang

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/background/gaussian_mvdream_background.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import random
+from dataclasses import dataclass, field
+import threestudio
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from threestudio.models.background.base import BaseBackground
+from threestudio.models.networks import get_encoding, get_mlp
+from threestudio.utils.ops import get_activation
+from threestudio.utils.typing import *
+@threestudio.register("gaussian-mvdream-neural-environment-map-background")
+class NeuralEnvironmentMapBackground(BaseBackground):
+    @dataclass
+    class Config(BaseBackground.Config):
+        n_output_dims: int = 3
+        color_activation: str = "sigmoid"
+        dir_encoding_config: dict = field(
+            default_factory=lambda: {"otype": "SphericalHarmonics", "degree": 3}
+        )
+        mlp_network_config: dict = field(
+            default_factory=lambda: {
+                "otype": "VanillaMLP",
+                "activation": "ReLU",
+                "n_neurons": 16,
+                "n_hidden_layers": 2,
+            }
+        )
+        random_aug: bool = False
+        random_aug_prob: float = 0.5
+        eval_color: Optional[Tuple[float, float, float]] = None
+        # multi-view diffusion
+        share_aug_bg: bool = False
+    cfg: Config
+    def configure(self) -> None:
+        self.encoding = get_encoding(3, self.cfg.dir_encoding_config)
+        self.network = get_mlp(
+            self.encoding.n_output_dims,
+            self.cfg.n_output_dims,
+            self.cfg.mlp_network_config,
+        )
+    def forward(self, dirs: Float[Tensor, "B H W 3"]) -> Float[Tensor, "B H W Nc"]:
+        if not self.training and self.cfg.eval_color is not None:
+            return torch.ones(*dirs.shape[:-1], self.cfg.n_output_dims).to(
+                dirs
+            ) * torch.as_tensor(self.cfg.eval_color).to(dirs)
+        # viewdirs must be normalized before passing to this function
+        dirs = (dirs + 1.0) / 2.0  # (-1, 1) => (0, 1)
+        dirs_embd = self.encoding(dirs.view(-1, 3))
+        color = self.network(dirs_embd).view(*dirs.shape[:-1], self.cfg.n_output_dims)
+        color = get_activation(self.cfg.color_activation)(color)
+        if (
+            self.training
+            and self.cfg.random_aug
+            and random.random() < self.cfg.random_aug_prob
+        ):
+            # use random background color with probability random_aug_prob
+            n_color = 1 if self.cfg.share_aug_bg else dirs.shape[0]
+            value = random.random() < 0.5
+            color = color * 0 + (  # prevent checking for unused parameters in DDP
+                torch.ones(n_color, 1, 1, self.cfg.n_output_dims)
+                .to(dirs)
+                .expand(*dirs.shape[:-1], -1)
+                * value
+            )
+        return color

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/configs/gaussian_splatting.yaml ADDED Viewed

	@@ -0,0 +1,96 @@

+name: "gs-sds-generation"
+tag: "${rmspace:${system.prompt_processor.prompt},_}"
+exp_root_dir: "outputs"
+seed: 0
+data_type: "random-camera-datamodule"
+data:
+  batch_size: 4
+  width: 512
+  height: 512
+  camera_distance_range: [2.5, 2.5]
+  fovy_range: [60, 70]
+  elevation_range: [-20, 90]
+  light_sample_strategy: "dreamfusion"
+  eval_camera_distance: 2.5
+  eval_fovy_deg: 70
+  rays_d_normalize: false
+system_type: "gaussian-splatting-system"
+system:
+  geometry_type: "gaussian-splatting"
+  geometry:
+    position_lr: [0, 0.001, 0.00002, 1000]
+    scale_lr: 0.005
+    feature_lr: 0.01
+    opacity_lr: 0.05
+    rotation_lr: 0.005
+    densification_interval: 300
+    prune_interval: 300
+    opacity_reset_interval: 50000000
+    densify_from_iter: 500
+    densify_until_iter: ${trainer.max_steps}
+    prune_from_iter: 500
+    prune_until_iter: ${trainer.max_steps}
+    densify_grad_threshold: 0.01
+    min_opac_prune: 0.005
+    split_thresh: 0.02
+    radii2d_thresh: 1000
+    init_num_pts: 4096
+    pc_init_radius: 0.8
+    opacity_init: 0.2
+  renderer_type: "diff-gaussian-rasterizer"
+  renderer:
+    debug: false
+    invert_bg_prob: 0.5
+  material_type: "no-material" # unused
+  material:
+    n_output_dims: 0
+  background_type: "solid-color-background" # unused
+  prompt_processor_type: "stable-diffusion-prompt-processor"
+  prompt_processor:
+    pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
+    prompt: ???
+  guidance_type: "stable-diffusion-guidance"
+  guidance:
+    pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
+    guidance_scale: 100.0
+    weighting_strategy: sds
+    min_step_percent: 0.02
+    max_step_percent: [1000, 0.98, 0.5, 1001]
+  exporter_type: "gaussian-mesh-exporter"
+  loggers:
+    wandb:
+      enable: false
+      project: 'threestudio'
+      name: None
+  loss:
+    lambda_sds: 0.1
+    lambda_position: 1.0
+    lambda_opacity: 0.0001
+    lambda_scales: 0.0001
+    lambda_tv_loss: 1.0
+    lambda_depth_tv_loss: 1.0
+trainer:
+  max_steps: 5000
+  log_every_n_steps: 1
+  num_sanity_val_steps: 0
+  val_check_interval: 100
+  enable_progress_bar: true
+  precision: 32-true
+checkpoint:
+  save_last: true # save at each validation time
+  save_top_k: -1
+  every_n_train_steps: ${trainer.max_steps}

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/configs/gaussian_splatting_background.yaml ADDED Viewed

	@@ -0,0 +1,111 @@

+name: "gs-sds-generation-background"
+tag: "${rmspace:${system.prompt_processor.prompt},_}"
+exp_root_dir: "outputs"
+seed: 0
+data_type: "random-camera-datamodule"
+data:
+  batch_size: 4
+  width: 512
+  height: 512
+  camera_distance_range: [2.5, 2.5]
+  fovy_range: [60, 70]
+  elevation_range: [-20, 90]
+  light_sample_strategy: "dreamfusion"
+  eval_camera_distance: 2.5
+  eval_fovy_deg: 70
+  rays_d_normalize: false
+system_type: "gaussian-splatting-system"
+system:
+  geometry_type: "gaussian-splatting"
+  geometry:
+    position_lr: [0, 0.001, 0.00002, 1000]
+    scale_lr: 0.005
+    feature_lr: 0.01
+    opacity_lr: 0.05
+    rotation_lr: 0.005
+    densification_interval: 300
+    prune_interval: 300
+    opacity_reset_interval: 50000000
+    densify_from_iter: 500
+    densify_until_iter: 10000
+    prune_from_iter: 500
+    prune_until_iter: ${trainer.max_steps}
+    densify_grad_threshold: 0.01
+    min_opac_prune: 0.005
+    split_thresh: 0.02
+    radii2d_thresh: 1000
+    init_num_pts: 4096
+    pc_init_radius: 0.8
+    opacity_init: 0.2
+  renderer_type: "diff-gaussian-rasterizer-background"
+  renderer:
+    debug: false
+  material_type: "no-material" # unused
+  material:
+    n_output_dims: 0
+  background_type: "gaussian-mvdream-neural-environment-map-background"
+  background:
+    color_activation: sigmoid
+    random_aug: true
+    random_aug_prob: 0.8
+  prompt_processor_type: "stable-diffusion-prompt-processor"
+  prompt_processor:
+    pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
+    prompt: ???
+  guidance_type: "stable-diffusion-guidance"
+  guidance:
+    pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
+    guidance_scale: 100.0
+    weighting_strategy: sds
+    min_step_percent: 0.02
+    max_step_percent: [1000, 0.98, 0.5, 1001]
+  exporter_type: "gaussian-mesh-exporter"
+  loggers:
+    wandb:
+      enable: false
+      project: 'threestudio'
+      name: None
+  loss:
+    lambda_sds: 0.1
+    lambda_position: 1.0
+    lambda_opacity: 0.0001
+    lambda_scales: 0.0001
+    lambda_tv_loss: 1.0
+    lambda_depth_tv_loss: 1.0
+  optimizer:
+    name: Adam
+    args:
+      lr: 0.01
+      betas: [0.9, 0.99]
+      eps: 1.e-15
+    params:
+      background:
+        lr: 0.001
+trainer:
+  max_steps: 5000
+  log_every_n_steps: 1
+  num_sanity_val_steps: 0
+  val_check_interval: 100
+  enable_progress_bar: true
+  precision: 32-true
+checkpoint:
+  save_last: true # save at each validation time
+  save_top_k: -1
+  every_n_train_steps: ${trainer.max_steps}

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/configs/gaussian_splatting_mvdream.yaml ADDED Viewed

	@@ -0,0 +1,131 @@

+name: "gs-sds-mvdream"
+tag: "${rmspace:${system.prompt_processor.prompt},_}"
+exp_root_dir: "outputs"
+seed: 0
+data_type: "mvdream-random-multiview-camera-datamodule"
+data:
+  batch_size: [4,4]
+  n_view: 4
+  # 0-4999: 64x64, >=5000: 256x256
+  width: [256, 256]
+  height: [256, 256]
+  resolution_milestones: [1000]
+  camera_distance_range: [0.8, 1.0] # relative
+  fovy_range: [15, 60]
+  elevation_range: [0, 30]
+  camera_perturb: 0.
+  center_perturb: 0.
+  up_perturb: 0.
+  n_val_views: 4
+  eval_camera_distance: 3.0
+  eval_fovy_deg: 40.
+  rays_d_normalize: false
+system_type: "gaussian-splatting-mvdream-system"
+system:
+  geometry_type: "gaussian-splatting"
+  geometry:
+    position_lr: [0, 0.0001, 0.00001, 1500]
+    scale_lr: [0, 0.01, 0.001, 1500]
+    feature_lr: [0, 0.005, 0.001, 6000]
+    opacity_lr: 0.05
+    rotation_lr: 0.001
+    pred_normal: false
+    normal_lr: 0.005
+    densification_interval: 300
+    prune_interval: 300
+    opacity_reset_interval: 100000
+    densify_from_iter: 1500
+    densify_until_iter: ${trainer.max_steps}
+    prune_from_iter: 1500
+    prune_until_iter: ${trainer.max_steps}
+    densify_grad_threshold: 0.01
+    min_opac_prune: 0.01
+    split_thresh: 0.02
+    radii2d_thresh: 1000
+    sphere: False
+    color_clip: [0, 0.01, 0.02, 1500, 0.5, 4000, 1.0, 7000]
+    init_num_pts: 4096
+    pc_init_radius: 0.5
+    opacity_init: 0.05
+    max_num: 100000
+  renderer_type: "diff-gaussian-rasterizer-shading"
+  renderer:
+    debug: false
+  material_type: "gaussian-diffuse-with-point-light-material"
+  material:
+    ambient_only_steps: 3000
+    textureless_prob: 0.0
+    ambient_light_color: [0.9, 0.9, 0.9]
+    diffuse_light_color: [0.1, 0.1, 0.1]
+    soft_shading: true
+  background_type: "gaussian-mvdream-neural-environment-map-background"
+  background:
+    color_activation: sigmoid
+    random_aug: true
+    share_aug_bg: true
+    random_aug_prob: 0.95
+  prompt_processor_type: "stable-diffusion-prompt-processor"
+  prompt_processor:
+    pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
+    prompt: ???
+    negative_prompt: "ugly, bad anatomy, blurry, pixelated obscure, unnatural colors, poor lighting, dull, and unclear, cropped, lowres, low quality, artifacts, duplicate, morbid, mutilated, poorly drawn face, deformed, dehydrated, bad proportions"
+    front_threshold: 30.
+    back_threshold: 30.
+  guidance_type: "mvdream-multiview-diffusion-guidance"
+  guidance:
+    model_name: "sd-v2.1-base-4view"
+    ckpt_path: null # path to a pre-downloaded checkpoint file (null for loading from URL)
+    guidance_scale: 50.0
+    min_step_percent: [0, 0.98, 0.02, 7000]  # (start_iter, start_val, end_val, end_iter)
+    max_step_percent: [0, 0.98, 0.50, 7000]
+    recon_loss: true
+    recon_std_rescale: 0.5
+  exporter_type: "gaussian-mesh-exporter"
+  loggers:
+    wandb:
+      enable: false
+      project: 'threestudio'
+      name: None
+  loss:
+    lambda_sds: 0.1
+    lambda_position: 1.0
+    lambda_opacity: 0.0001
+    lambda_scales: 0.0001
+    lambda_sparsity: 1.0
+    lambda_tv_loss: 0.0
+    lambda_depth_tv_loss: 1.0
+  optimizer:
+    name: Adam
+    args:
+      lr: 0.01
+      betas: [0.9, 0.99]
+      eps: 1.e-6
+    params:
+      background:
+        lr: 0.0001
+trainer:
+  max_steps: 10000
+  log_every_n_steps: 1
+  num_sanity_val_steps: 0
+  val_check_interval: 100
+  enable_progress_bar: true
+  precision: 32-true
+checkpoint:
+  save_last: true # save at each validation time
+  save_top_k: -1
+  every_n_train_steps: ${trainer.max_steps}

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/configs/gaussian_splatting_shading.yaml ADDED Viewed

	@@ -0,0 +1,115 @@

+name: "gs-sds-generation-shading"
+tag: "${rmspace:${system.prompt_processor.prompt},_}"
+exp_root_dir: "outputs"
+seed: 0
+data_type: "random-camera-datamodule"
+data:
+  batch_size: 4
+  width: 512
+  height: 512
+  camera_distance_range: [2.5, 2.5]
+  fovy_range: [60, 70]
+  elevation_range: [-20, 90]
+  light_sample_strategy: "dreamfusion"
+  eval_camera_distance: 2.5
+  eval_fovy_deg: 70
+  rays_d_normalize: false
+system_type: "gaussian-splatting-system"
+system:
+  geometry_type: "gaussian-splatting"
+  geometry:
+    position_lr: [0, 0.001, 0.00002, 1000]
+    scale_lr: 0.005
+    feature_lr: 0.01
+    opacity_lr: 0.05
+    rotation_lr: 0.005
+    densification_interval: 300
+    prune_interval: 300
+    opacity_reset_interval: 50000000
+    densify_from_iter: 500
+    densify_until_iter: ${trainer.max_steps}
+    prune_from_iter: 500
+    prune_until_iter: ${trainer.max_steps}
+    densify_grad_threshold: 0.01
+    min_opac_prune: 0.005
+    split_thresh: 0.02
+    radii2d_thresh: 1000
+    init_num_pts: 4096
+    pc_init_radius: 0.8
+    opacity_init: 0.2
+  renderer_type: "diff-gaussian-rasterizer-shading"
+  renderer:
+    debug: false
+  material_type: "gaussian-diffuse-with-point-light-material"
+  material:
+    ambient_only_steps: 2000
+    textureless_prob: 0.0
+    ambient_light_color: [1.0, 1.0, 1.0]
+    diffuse_light_color: [0.0, 0.0, 0.0]
+    soft_shading: true
+  background_type: "gaussian-mvdream-neural-environment-map-background"
+  background:
+    color_activation: sigmoid
+    random_aug: true
+    random_aug_prob: 0.8
+  prompt_processor_type: "stable-diffusion-prompt-processor"
+  prompt_processor:
+    pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
+    prompt: ???
+  guidance_type: "stable-diffusion-guidance"
+  guidance:
+    pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
+    guidance_scale: 100.0
+    weighting_strategy: sds
+    min_step_percent: 0.02
+    max_step_percent: [2000, 0.98, 0.5, 2001]
+  exporter_type: "gaussian-mesh-exporter"
+  loggers:
+    wandb:
+      enable: false
+      project: 'threestudio'
+      name: None
+  loss:
+    lambda_sds: 0.1
+    lambda_position: 1.0
+    lambda_opacity: 0.0001
+    lambda_scales: 0.0001
+    lambda_tv_loss: 1.0
+    lambda_depth_tv_loss: 1.0
+  optimizer:
+    name: Adam
+    args:
+      lr: 0.01
+      betas: [0.9, 0.99]
+      eps: 1.e-15
+    params:
+      background:
+        lr: 0.001
+trainer:
+  max_steps: 5000
+  log_every_n_steps: 1
+  num_sanity_val_steps: 0
+  val_check_interval: 100
+  enable_progress_bar: true
+  precision: 32-true
+checkpoint:
+  save_last: true # save at each validation time
+  save_top_k: -1
+  every_n_train_steps: ${trainer.max_steps}

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/configs/gaussian_splatting_zero123.yaml ADDED Viewed

	@@ -0,0 +1,144 @@

+name: "gs-zero123-sai"
+tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}"
+exp_root_dir: "outputs"
+seed: 0
+data_type: "single-image-datamodule"
+data: # threestudio/data/image.py -> SingleImageDataModuleConfig
+  image_path: ./load/images/hamburger_rgba.png
+  height: [128, 256, 512]
+  width: [128, 256, 512]
+  resolution_milestones: [200, 300]
+  default_elevation_deg: 5.0
+  default_azimuth_deg: 0.0
+  default_camera_distance: 3.8
+  default_fovy_deg: 20.0
+  requires_depth: ${cmaxgt0orcmaxgt0:${system.loss.lambda_depth},${system.loss.lambda_depth_rel}}
+  requires_normal: ${cmaxgt0:${system.loss.lambda_normal}}
+  random_camera: # threestudio/data/uncond.py -> RandomCameraDataModuleConfig
+    height: 256
+    width: 256
+    batch_size: 4
+    eval_height: 512
+    eval_width: 512
+    eval_batch_size: 1
+    elevation_range: [-10, 80]
+    azimuth_range: [-180, 180]
+    camera_distance_range: [3.8, 3.8]
+    fovy_range: [20.0, 20.0] # Zero123 has fixed fovy
+    progressive_until: 0
+    camera_perturb: 0.0
+    center_perturb: 0.0
+    up_perturb: 0.0
+    light_position_perturb: 1.0
+    light_distance_range: [7.5, 10.0]
+    eval_elevation_deg: ${data.default_elevation_deg}
+    eval_camera_distance: ${data.default_camera_distance}
+    eval_fovy_deg: ${data.default_fovy_deg}
+    light_sample_strategy: "dreamfusion"
+    batch_uniform_azimuth: False
+    n_val_views: 30
+    n_test_views: 120
+system_type: "gaussian-splatting-zero123-system"
+system:
+  geometry_type: "gaussian-splatting"
+  geometry:
+    position_lr: [0, 0.001, 0.00002, 1000]
+    scale_lr: [0, 0.01, 0.001, 1000]
+    feature_lr: 0.01
+    opacity_lr: 0.05
+    rotation_lr: 0.001
+    densification_interval: 100
+    prune_interval: 100
+    opacity_reset_interval: 100000
+    densify_from_iter: 0
+    densify_until_iter: ${trainer.max_steps}
+    prune_from_iter: 0
+    prune_until_iter: ${trainer.max_steps}
+    densify_grad_threshold: 0.01
+    min_opac_prune: 0.005
+    split_thresh: 0.02
+    radii2d_thresh: 1000
+    sphere: False
+    init_num_pts: 4096
+    pc_init_radius: 0.5
+    opacity_init: 0.05
+    max_num: 500000
+  exporter_type: "gaussian-mesh-exporter"
+  renderer_type: "diff-gaussian-rasterizer-advanced"
+  renderer:
+    debug: false
+    invert_bg_prob: 1.0
+  material_type: "no-material" # unused
+  material:
+    n_output_dims: 0
+  background_type: "solid-color-background" # unused
+  prompt_processor_type: "dummy-prompt-processor" # Zero123 doesn't use prompts
+  prompt_processor:
+    pretrained_model_name_or_path: ""
+    prompt: ""
+  guidance_type: "stable-zero123-guidance"
+  guidance:
+    pretrained_config: "./load/zero123/sd-objaverse-finetune-c_concat-256.yaml"
+    pretrained_model_name_or_path: "./load/zero123/stable_zero123.ckpt"
+    vram_O: ${not:${gt0:${system.freq.guidance_eval}}}
+    cond_image_path: ${data.image_path}
+    cond_elevation_deg: ${data.default_elevation_deg}
+    cond_azimuth_deg: ${data.default_azimuth_deg}
+    cond_camera_distance: ${data.default_camera_distance}
+    guidance_scale: 3.0
+    min_step_percent: [50, 0.7, 0.3, 200]  # (start_iter, start_val, end_val, end_iter)
+    max_step_percent: [50, 0.98, 0.8, 200]
+  freq:
+    ref_only_steps: 0
+    guidance_eval: 0
+  loggers:
+    wandb:
+      enable: false
+      project: "threestudio"
+      name: None
+  loss:
+    lambda_sds: 0.1
+    lambda_rgb: [100, 500., 1000., 400]
+    lambda_mask: 50.
+    lambda_depth: 0. # 0.05
+    lambda_depth_rel: 0. # [0, 0, 0.05, 100]
+    lambda_normal: 0. # [0, 0, 0.05, 100]
+    lambda_normal_smooth: 0.
+    lambda_3d_normal_smooth: 0.
+  optimizer:
+    name: Adam
+    args:
+      lr: 0.01
+      betas: [0.9, 0.99]
+      eps: 1.e-8
+    params:
+      background:
+        lr: 0.001
+trainer:
+  max_steps: 5000
+  log_every_n_steps: 1
+  num_sanity_val_steps: 0
+  val_check_interval: 100
+  enable_progress_bar: true
+  precision: 32
+checkpoint:
+  save_last: true # save at each validation time
+  save_top_k: -1
+  every_n_train_steps: 100 # ${trainer.max_steps}

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/configs/scene_lang.yaml ADDED Viewed

	@@ -0,0 +1,138 @@

+name: "gs-sds-generation"
+tag: "${rmspace:${system.prompt_processor.prompt},_}"
+exp_root_dir: "outputs"
+seed: 0
+# resume: "/mnt/hdd1/wufan/projects/3DitScene/outputs/gs-sds-generation/3DitScene@20250204-120500/ckpts/last.ckpt"
+data_type: "random-camera-datamodule"
+data:
+  rotate_traj: false #  WU
+  random_traj: false #  WU
+  batch_size: 1
+  width: 512
+  height: 512
+  camera_distance_range: [2.5, 2.5]
+  fovy_range: [60, 60]
+  elevation_range: [0, 0]  # The vertical angle of the camera relative to the object, in degrees.
+  light_sample_strategy: "dreamfusion"
+  eval_camera_distance: 2.5
+  eval_fovy_deg: 60  # The field of view (FOV) in the vertical direction, in degrees.
+  eval_elevation_deg: 0
+  rays_d_normalize: false
+  center_perturb: 0
+  up_perturb: 0
+  camera_perturb: 0
+  azimuth_range: [-15, 15]  # The range of horizontal rotation angles during training
+  val_azimuth_range: [-15, 15]  # The range of horizontal rotation angles during validation
+  insert_zero: true
+system_type: "scene-lang-system"
+system:
+  encoder_hidden_dims: [256, 128, 32, 3]
+  decoder_hidden_dims: [32, 128, 256, 512]
+  xyz_noise_ratio: [1000, 0.0, 0.0, 3000]
+  drop_ooi_ratio: 0.3
+  crop_with_lang: true
+  densify: false
+  geometry_type: "gaussian-splatting"
+  geometry:
+    ooi_bbox: [360,370,730,590]
+    geometry_convert_from: depth:assets/anime.png
+    position_lr: [0, 0.001, 0.00002, 1000]
+    scaling_lr: 0.05
+    feature_lr: 0.01
+    opacity_lr: 0.05
+    rotation_lr: 0.005
+    lang_lr: 0.0003
+    densification_interval: 300
+    prune_interval: 300
+    opacity_reset_interval: 50000000
+    densify_from_iter: 500
+    densify_until_iter: ${trainer.max_steps}
+    prune_from_iter: 500
+    prune_until_iter: ${trainer.max_steps}
+    densify_grad_threshold: 0.01
+    min_opac_prune: 0.005
+    split_thresh: 0.02
+    radii2d_thresh: 1000
+    init_num_pts: 4096
+    pc_init_radius: 0.8
+    opacity_init: 0.2
+    empty_prompt: ${system.empty_prompt}
+    prompt: ${system.prompt_processor.prompt}
+    max_scaling: 0.2
+  renderer_type: "diff-gaussian-rasterizer"
+  renderer:
+    debug: false
+    invert_bg_prob: 0.5
+  material_type: "no-material" # unused
+  material:
+    n_output_dims: 0
+  background_type: "solid-color-background" # unused
+  prompt_processor_type: "stable-diffusion-prompt-processor"
+  prompt_processor:
+    pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
+    # pretrained_model_name_or_path: "/mnt/petrelfs/zhangqihang/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1-base"
+    prompt: ???
+  empty_prompt: "empty"
+  guidance_type: "stable-diffusion-guidance"
+  guidance:
+    pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
+    # pretrained_model_name_or_path: "/mnt/petrelfs/zhangqihang/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1-base"
+    guidance_scale: 5.0
+    weighting_strategy: sds
+    min_step_percent: 0.02
+    max_step_percent: [0, 0.5, 0.1, 1000]
+    csd: false
+  # guidance_type: "stable-diffusion-vsd-guidance"
+  # guidance:
+  #   pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
+  #   pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1"
+  #   guidance_scale: 7.5
+  #   min_step_percent: 0.02
+  exporter_type: "gaussian-mesh-exporter"
+  sam_clip:
+    use_mobile_sam: True
+  loggers:
+    wandb:
+      enable: false
+      project: '3ditscene'
+      name: "${tag}"
+  loss:
+    lambda_sds: 0.01
+    lambda_ref: 1000
+    lambda_depth: 0.0
+    lambda_position: 1.0
+    lambda_opacity: 0.0001
+    lambda_scales: 0.0001
+    lambda_tv_loss: 1.0
+    lambda_depth_tv_loss: 1.0
+    lambda_scaling: 0.0
+trainer:
+  max_steps: 1500
+  log_every_n_steps: 1
+  num_sanity_val_steps: 110
+  val_check_interval: 500
+  enable_progress_bar: true
+  precision: 32-true
+checkpoint:
+  save_last: true # save at each validation time
+  save_top_k: -1
+  every_n_train_steps: 1000
+  save_weights_only: true
+  # every_n_train_steps: ${trainer.max_steps}

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/geometry/exporter.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from dataclasses import dataclass, field
+import cv2
+import numpy as np
+import threestudio
+import torch
+from threestudio.models.background.base import BaseBackground
+from threestudio.models.exporters.base import Exporter, ExporterOutput
+from threestudio.models.geometry.base import BaseGeometry
+from threestudio.models.materials.base import BaseMaterial
+from threestudio.models.mesh import Mesh
+from threestudio.utils.rasterize import NVDiffRasterizerContext
+from threestudio.utils.typing import *
+@threestudio.register("gaussian-mesh-exporter")
+class MeshExporter(Exporter):
+    @dataclass
+    class Config(Exporter.Config):
+        fmt: str = "obj"
+        save_name: str = "model"
+        save_video: bool = True
+    cfg: Config
+    def configure(
+        self,
+        geometry: BaseGeometry,
+        material: BaseMaterial,
+        background: BaseBackground,
+    ) -> None:
+        super().configure(geometry, material, background)
+    def __call__(self) -> List[ExporterOutput]:
+        mesh: Mesh = self.geometry.extract_mesh()
+        return self.export_obj(mesh)
+    def export_obj(self, mesh: Mesh) -> List[ExporterOutput]:
+        params = {"mesh": mesh}
+        return [
+            ExporterOutput(
+                save_name=f"{self.cfg.save_name}.obj", save_type="obj", params=params
+            )
+        ]

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/geometry/gaussian_base.py ADDED Viewed

	@@ -0,0 +1,1469 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import math
+import os
+import random
+import sys
+import argparse
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import NamedTuple
+import numpy as np
+import cv2
+from PIL import Image
+import threestudio
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+from transformers import pipeline
+from plyfile import PlyData, PlyElement
+from simple_knn._C import distCUDA2
+import diffusers
+from diffusers import StableDiffusionInpaintPipeline, AutoPipelineForInpainting
+from threestudio.models.geometry.base import BaseGeometry
+from threestudio.utils.misc import C
+from threestudio.utils.typing import *
+from segment_anything import sam_model_registry, SamPredictor
+import matplotlib.pyplot as plt
+from .gaussian_io import GaussianIO
+import imageio
+from scipy.spatial.transform import Rotation as R
+REORDER_MTX = torch.tensor([
+    [0,0,0,1],
+    [1,0,0,0],
+    [0,1,0,0],
+    [0,0,1,0]
+]).cuda().float()
+def build_rotation(r):
+    norm = torch.sqrt(
+        r[:, 0] * r[:, 0] + r[:, 1] * r[:, 1] + r[:, 2] * r[:, 2] + r[:, 3] * r[:, 3]
+    )
+    q = r / norm[:, None]
+    R = torch.zeros((q.size(0), 3, 3), device="cuda")
+    r = q[:, 0]
+    x = q[:, 1]
+    y = q[:, 2]
+    z = q[:, 3]
+    R[:, 0, 0] = 1 - 2 * (y * y + z * z)
+    R[:, 0, 1] = 2 * (x * y - r * z)
+    R[:, 0, 2] = 2 * (x * z + r * y)
+    R[:, 1, 0] = 2 * (x * y + r * z)
+    R[:, 1, 1] = 1 - 2 * (x * x + z * z)
+    R[:, 1, 2] = 2 * (y * z - r * x)
+    R[:, 2, 0] = 2 * (x * z - r * y)
+    R[:, 2, 1] = 2 * (y * z + r * x)
+    R[:, 2, 2] = 1 - 2 * (x * x + y * y)
+    return R
+def rotation_matrix(angle_x, angle_y, angle_z):
+    # Convert angles to radians
+    rad_x = torch.deg2rad(torch.tensor(angle_x))
+    rad_y = torch.deg2rad(torch.tensor(angle_y))
+    rad_z = torch.deg2rad(torch.tensor(angle_z))
+    # Compute sine and cosine of the angles
+    cos_x = torch.cos(rad_x)
+    sin_x = torch.sin(rad_x)
+    cos_y = torch.cos(rad_y)
+    sin_y = torch.sin(rad_y)
+    cos_z = torch.cos(rad_z)
+    sin_z = torch.sin(rad_z)
+    # Construct the rotation matrix
+    Rx = torch.tensor([[1, 0, 0],
+                   [0, cos_x, -sin_x],
+                   [0, sin_x, cos_x]])
+    Ry = torch.tensor([[cos_y, 0, sin_y],
+                   [0, 1, 0],
+                   [-sin_y, 0, cos_y]])
+    Rz = torch.tensor([[cos_z, -sin_z, 0],
+                   [sin_z, cos_z, 0],
+                   [0, 0, 1]])
+    # Combine the rotation matrices
+    rotation_matrix = Rz @ Ry @ Rx
+    return rotation_matrix
+# from scipy.spatial import KDTree
+#
+# def distCUDA2(points):
+#     points_np = points.detach().cpu().float().numpy()
+#     dists, inds = KDTree(points_np).query(points_np, k=4)
+#     meanDists = (dists[:, 1:] ** 2).mean(1)
+#
+#     return torch.tensor(meanDists, dtype=points.dtype, device=points.device)
+sys.path.append('./utils/GeoWizard/geowizard')
+from models.geowizard_pipeline import DepthNormalEstimationPipeline
+C0 = 0.28209479177387814
+def propagate(canvas):
+    H, W = canvas.shape
+    dx = [0, 1, 0, -1]
+    dy = [1, 0, -1, 0]
+    count = np.zeros_like(canvas)
+    while 1:
+        curr_mask = canvas > 0
+        if sum(sum(curr_mask)) == H * W:
+            break
+        expand_mask = (cv2.blur(curr_mask.astype(np.float32), (3, 3)) > 0)
+        x, y = np.where(np.logical_and(expand_mask, ~curr_mask))
+        old_canvas = canvas.copy()
+        for xx, yy in zip(x, y):
+            for i in range(4):
+                ref_x = xx + dx[i]
+                ref_y = yy + dy[i]
+                if 0<=ref_x<H and 0<=ref_y<W and old_canvas[ref_x, ref_y] != 0:
+                    canvas[xx, yy] = old_canvas[ref_x, ref_y]
+                    count[xx, yy] = count[ref_x, ref_y] + 1
+    weight = (count.max() - count) / count.max()
+    return canvas * weight
+def save_pc(save_file, pts, color):
+    '''
+        pts: N, 3
+        color: N, 3
+    '''
+    if color.dtype == np.dtype('float64'):
+        color = (color * 255).astype(np.uint8)
+    with open(save_file, 'w') as f:
+        f.writelines((
+             "ply\n",
+             "format ascii 1.0\n",
+             "element vertex {}\n".format(pts.shape[0]),
+             "property float x\n",
+             "property float y\n",
+             "property float z\n",
+             "property uchar red\n",
+             "property uchar green\n",
+             "property uchar blue\n",
+             "end_header\n"))
+        for i in range(pts.shape[0]):
+            point = "%f %f %f %d %d %d\n" % (pts[i, 0], pts[i, 1], pts[i, 2], color[i, 0], color[i, 1], color[i, 2])
+            f.writelines(point)
+    threestudio.info(f"Saved point cloud to {save_file}.")
+def RGB2SH(rgb):
+    return (rgb - 0.5) / C0
+def SH2RGB(sh):
+    return sh * C0 + 0.5
+def inverse_sigmoid(x):
+    return torch.log(x / (1 - x))
+def strip_lowerdiag(L):
+    uncertainty = torch.zeros((L.shape[0], 6), dtype=torch.float, device="cuda")
+    uncertainty[:, 0] = L[:, 0, 0]
+    uncertainty[:, 1] = L[:, 0, 1]
+    uncertainty[:, 2] = L[:, 0, 2]
+    uncertainty[:, 3] = L[:, 1, 1]
+    uncertainty[:, 4] = L[:, 1, 2]
+    uncertainty[:, 5] = L[:, 2, 2]
+    return uncertainty
+def strip_symmetric(sym):
+    return strip_lowerdiag(sym)
+def gaussian_3d_coeff(xyzs, covs):
+    # xyzs: [N, 3]
+    # covs: [N, 6]
+    x, y, z = xyzs[:, 0], xyzs[:, 1], xyzs[:, 2]
+    a, b, c, d, e, f = (
+        covs[:, 0],
+        covs[:, 1],
+        covs[:, 2],
+        covs[:, 3],
+        covs[:, 4],
+        covs[:, 5],
+    )
+    # eps must be small enough !!!
+    inv_det = 1 / (
+        a * d * f + 2 * e * c * b - e**2 * a - c**2 * d - b**2 * f + 1e-24
+    )
+    inv_a = (d * f - e**2) * inv_det
+    inv_b = (e * c - b * f) * inv_det
+    inv_c = (e * b - c * d) * inv_det
+    inv_d = (a * f - c**2) * inv_det
+    inv_e = (b * c - e * a) * inv_det
+    inv_f = (a * d - b**2) * inv_det
+    power = (
+        -0.5 * (x**2 * inv_a + y**2 * inv_d + z**2 * inv_f)
+        - x * y * inv_b
+        - x * z * inv_c
+        - y * z * inv_e
+    )
+    power[power > 0] = -1e10  # abnormal values... make weights 0
+    return torch.exp(power)
+def build_rotation(r):
+    norm = torch.sqrt(
+        r[:, 0] * r[:, 0] + r[:, 1] * r[:, 1] + r[:, 2] * r[:, 2] + r[:, 3] * r[:, 3]
+    )
+    q = r / norm[:, None]
+    R = torch.zeros((q.size(0), 3, 3), device="cuda")
+    r = q[:, 0]
+    x = q[:, 1]
+    y = q[:, 2]
+    z = q[:, 3]
+    R[:, 0, 0] = 1 - 2 * (y * y + z * z)
+    R[:, 0, 1] = 2 * (x * y - r * z)
+    R[:, 0, 2] = 2 * (x * z + r * y)
+    R[:, 1, 0] = 2 * (x * y + r * z)
+    R[:, 1, 1] = 1 - 2 * (x * x + z * z)
+    R[:, 1, 2] = 2 * (y * z - r * x)
+    R[:, 2, 0] = 2 * (x * z - r * y)
+    R[:, 2, 1] = 2 * (y * z + r * x)
+    R[:, 2, 2] = 1 - 2 * (x * x + y * y)
+    return R
+def build_scaling_rotation(s, r):
+    L = torch.zeros((s.shape[0], 3, 3), dtype=torch.float, device="cuda")
+    R = build_rotation(r)
+    L[:, 0, 0] = s[:, 0]
+    L[:, 1, 1] = s[:, 1]
+    L[:, 2, 2] = s[:, 2]
+    L = R @ L
+    return L
+def safe_state(silent):
+    old_f = sys.stdout
+    class F:
+        def __init__(self, silent):
+            self.silent = silent
+        def write(self, x):
+            if not self.silent:
+                if x.endswith("\n"):
+                    old_f.write(
+                        x.replace(
+                            "\n",
+                            " [{}]\n".format(
+                                str(datetime.now().strftime("%d/%m %H:%M:%S"))
+                            ),
+                        )
+                    )
+                else:
+                    old_f.write(x)
+        def flush(self):
+            old_f.flush()
+    sys.stdout = F(silent)
+    random.seed(0)
+    np.random.seed(0)
+    torch.manual_seed(0)
+    torch.cuda.set_device(torch.device("cuda:0"))
+class BasicPointCloud(NamedTuple):
+    points: np.array
+    colors: np.array
+    normals: np.array
+class Camera(NamedTuple):
+    FoVx: torch.Tensor
+    FoVy: torch.Tensor
+    camera_center: torch.Tensor
+    image_width: int
+    image_height: int
+    world_view_transform: torch.Tensor
+    full_proj_transform: torch.Tensor
+def fill_mask(mask):
+    mask = np.array(mask)
+    canvas = np.zeros_like(mask)
+    H, W = mask.shape
+    for i in range(H):
+        for p in range(0, W):
+            if mask[i, p]:
+                canvas[i, p] = 1
+            else:
+                break
+        for p in range(W-1, 0, -1):
+            if mask[i, p]:
+                canvas[i, p] = 1
+            else:
+                break
+    for i in range(W):
+        for p in range(0, H):
+            if mask[p, i]:
+                canvas[p, i] = 1
+            else:
+                break
+        for p in range(H-1, 0, -1):
+            if mask[p, i]:
+                canvas[p, i] = 1
+            else:
+                break
+    mask = np.logical_and(mask, canvas)
+    return Image.fromarray(mask)
+def parse_wh(wh):
+    try:
+        W, H = wh
+    except:
+        W = H = wh
+    return W, H
+@threestudio.register("gaussian-splatting")
+class GaussianBaseModel(BaseGeometry, GaussianIO):
+    @dataclass
+    class Config(BaseGeometry.Config):
+        max_num: int = 500000
+        sh_degree: int = 0
+        position_lr: Any = 0.001
+        # scale_lr: Any = 0.003
+        feature_lr: Any = 0.01
+        opacity_lr: Any = 0.05
+        scaling_lr: Any = 0.005
+        rotation_lr: Any = 0.005
+        pred_normal: bool = False
+        normal_lr: Any = 0.001
+        lang_lr: float = 0.005
+        densification_interval: int = 50
+        prune_interval: int = 50
+        opacity_reset_interval: int = 100000
+        densify_from_iter: int = 100
+        prune_from_iter: int = 100
+        densify_until_iter: int = 2000
+        prune_until_iter: int = 2000
+        densify_grad_threshold: Any = 0.01
+        min_opac_prune: Any = 0.005
+        split_thresh: Any = 0.02
+        radii2d_thresh: Any = 1000
+        sphere: bool = False
+        prune_big_points: bool = False
+        color_clip: Any = 2.0
+        geometry_convert_from: str = ""
+        load_ply_only_vertex: bool = False
+        init_num_pts: int = 100
+        pc_init_radius: float = 0.8
+        opacity_init: float = 0.1
+        img_resolution: Any = 512
+        shap_e_guidance_config: dict = field(default_factory=dict)
+        max_scaling: float = 100
+        sam_ckpt_path: str = "ckpts/sam_vit_h_4b8939.pth"
+        ooi_bbox: Any = None
+        prompt: Any = None
+        empty_prompt: Any = None
+        lang_beta_1: float = 0.9
+        lang_beta_2: float = 0.999
+        inference_only: bool = False
+        pc_max_resolution: int = 512
+        use_sdxl_for_inpaint: bool = False
+    cfg: Config
+    def setup_functions(self):
+        def build_covariance_from_scaling_rotation(scaling, scaling_modifier, rotation):
+            L = build_scaling_rotation(scaling_modifier * scaling, rotation)
+            actual_covariance = L @ L.transpose(1, 2)
+            symm = strip_symmetric(actual_covariance)
+            return symm
+        self.scaling_activation = torch.exp
+        self.scaling_inverse_activation = torch.log
+        self.covariance_activation = build_covariance_from_scaling_rotation
+        self.opacity_activation = torch.sigmoid
+        self.inverse_opacity_activation = inverse_sigmoid
+        self.rotation_activation = torch.nn.functional.normalize
+        self.color_clip = C(self.cfg.color_clip, 0, 0)
+        self.fixed_xyz = None
+        self.fixed_rot = None
+        if not self.cfg.inference_only:
+            sam = sam_model_registry["vit_h"](checkpoint=self.cfg.sam_ckpt_path).to('cuda')
+            self.predictor = SamPredictor(sam)
+    def project_pc(self, c2w, H=256, W=None):
+        if W is None:
+            W = H
+        B = c2w.shape[0]
+        mask = torch.zeros([B, H, W], device='cuda')
+        depth_canvas = torch.zeros([B, H, W], device='cuda')
+        # for pc in [self.bg_point_cloud, self.point_cloud]:
+        pc_cam = torch.einsum('bxy,ny->bnx', torch.linalg.inv(c2w), self.point_cloud)
+        depth = -1 * pc_cam[..., 2].view(pc_cam.shape[0], -1)
+        pc_cam = (pc_cam / pc_cam[..., 2:3])[..., :3]
+        pc_2d = torch.einsum('xy,bny->bnx', self.proj_mtx, pc_cam).clamp(0, 1)
+        pc_2d[..., 0] = pc_2d[..., 0] * (W-1)
+        pc_2d[..., 1] = pc_2d[..., 1] * (H-1)
+        pc_2d = pc_2d.long()
+        for i in range(pc_2d.shape[0]):
+            x = (W - pc_2d[i, :, 0]).clamp(0, W-1)
+            y = (pc_2d[i, :, 1]).clamp(0, H-1)
+            unique_id = x * H + y
+            map_2d = np.zeros((W+1)*(H+1)) + 1e8
+            np.minimum.at(map_2d, unique_id.cpu(), depth[i].cpu())
+            map_2d[map_2d==1e8] = 0
+            positive_unique_id = np.where(map_2d>0)[0]
+            x, y = positive_unique_id // H, positive_unique_id % H
+            mask[i, y, x] = 1.0
+            depth_canvas[i, y, x] = torch.tensor(map_2d[positive_unique_id], device='cuda', dtype=torch.float)
+                # depth_canvas[i, y, x] = depth[i]
+        # pc_cam = torch.einsum('bxy,hwy->bhwx', torch.linalg.inv(c2w), self.point_cloud)
+        # depth = -1 * pc_cam[..., 2].view(pc_cam.shape[0], -1)
+        # pc_cam = (pc_cam / pc_cam[..., 2:3])[..., :3]
+        # pc_2d = torch.einsum('xy,bhwy->bhwx', self.proj_mtx, pc_cam).clamp(0, 1)
+        # pc_2d[..., 0] = pc_2d[..., 0] * (W-1)
+        # pc_2d[..., 1] = pc_2d[..., 1] * (H-1)
+        # pc_2d = (pc_2d.long()).view(pc_2d.shape[0], -1, pc_2d.shape[-1])
+        # mask = self.blur_kernel(mask) > 0
+        mask = torchvision.transforms.functional.gaussian_blur(mask, 3) > 0
+        # mask = mask > 0
+        return mask, depth_canvas
+    def img2pc_inpaint(self, img, c2w=None, gt_depth=None, mask=None, proj_func=None):
+        W, H = parse_wh(self.cfg.img_resolution)
+        if max(W, H) > self.cfg.pc_max_resolution:
+            W, H = int(W / max(W, H) * self.cfg.pc_max_resolution), int(H / max(W, H) * self.cfg.pc_max_resolution)
+        with torch.no_grad():
+            self.geowizard_pipe.to('cuda')
+            depth = self.geowizard_pipe(
+                img,
+                denoising_steps = 25,
+                ensemble_size = 3,
+                processing_res = 768,
+                match_input_res = False,
+                domain = 'outdoor',
+                color_map = 'Spectral',
+                gt_depth = gt_depth, mask = mask,
+                show_progress_bar = True)['depth_np']
+            self.geowizard_pipe.to('cpu')
+            ret_depth = depth.copy()
+            depth = torch.from_numpy(depth)[None]
+            depth = torch.nn.functional.interpolate(depth[None], size=(H, W), mode='bilinear', align_corners=True).squeeze()
+        depth = depth.cpu().numpy()
+        if proj_func is None:
+            depth = depth * 20 + 5
+        else:
+            depth = proj_func(depth)
+        depth = depth * -1
+        x, y = np.meshgrid(np.arange(W, dtype=np.float32), np.arange(H, dtype=np.float32), indexing='xy')
+        x = x / float(W-1)
+        y = y / float(H-1)
+        xyz = np.stack((x, y, np.ones_like(x)), 0).transpose(1, 2, 0)
+        xyz[..., 0] = 1 - xyz[..., 0]
+        fov = 60 / 180 * np.pi
+        proj_mtx = np.array([
+            [1 / (2 * np.tan(fov/2)), 0, 1/2],
+            [0, 1 / (2 * np.tan(fov/2)), 1/2],
+            [0, 0, 1],
+        ])
+        self.proj_mtx = torch.from_numpy(proj_mtx).cuda().float()
+        if c2w is None:
+            c2w = np.array([0.0000, 0.0000, 1.0000, 2.5000, 1.0000, 0.0000, -0.0000, 0.0000, -0.0000, 1.0000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000]).reshape(4, 4)
+        else:
+            c2w = c2w[0].cpu().numpy()
+        xyz = np.einsum('ab,hwb->hwa', np.linalg.inv(proj_mtx), xyz)
+        xyz = xyz * depth[..., None]
+        xyz = np.concatenate([xyz, np.ones_like(x)[..., None]], 2)
+        xyz = np.einsum('ab,hwb->hwa', c2w, xyz)
+        return xyz, ret_depth
+    def inpaint(self, img, mask, prompt):
+        # inpaint using base pipe
+        N = 512
+        img = img.convert("RGB").resize((N, N))
+        mask = mask.convert("RGB").resize((N, N))
+        self.base_inpainting_pipe.to("cuda")
+        img = self.base_inpainting_pipe(prompt=prompt, image=img, mask_image=mask, guidance_scale=7.5).images[0]
+        self.base_inpainting_pipe.to("cpu")
+        torch.cuda.empty_cache()
+        if self.cfg.use_sdxl_for_inpaint:
+            # inpaint using sdxl pipe
+            N = 1024
+            img = img.convert("RGB").resize((N, N))
+            mask = mask.convert("RGB").resize((N, N))
+            self.sdxl_inpainting_pipe.to("cuda")
+            img = self.sdxl_inpainting_pipe(prompt=prompt, image=img, mask_image=mask, guidance_scale=7.5, num_inference_steps=20, strength=0.99).images[0]
+            self.sdxl_inpainting_pipe.to("cpu")
+        return img
+    def configure(self) -> None:
+        super().configure()
+        self.active_sh_degree = 0
+        self.max_sh_degree = self.cfg.sh_degree
+        self._xyz = torch.empty(0)
+        self._features_dc = torch.empty(0)
+        self._features_rest = torch.empty(0)
+        self._scaling = torch.empty(0)
+        self._rotation = torch.empty(0)
+        self._opacity = torch.empty(0)
+        self._opacity_mask = None
+        self.max_radii2D = torch.empty(0)
+        self.xyz_gradient_accum = torch.empty(0)
+        self.denom = torch.empty(0)
+        self.noise_ratio = 0.0
+        if self.cfg.pred_normal:
+            self._normal = torch.empty(0)
+        self.optimizer = None
+        self.setup_functions()
+        self.save_path = None
+        self.fixed_xyz = None
+        self.fixed_rot = None
+        if self.cfg.inference_only:
+            return
+        # setup GeoWizard
+        geowizard_checkpoint_path = 'lemonaddie/geowizard'
+        self.geowizard_pipe = DepthNormalEstimationPipeline.from_pretrained(
+            geowizard_checkpoint_path, torch_dtype=torch.float32)
+        self.base_inpainting_pipe = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16)
+        # self.base_inpainting_pipe = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16, safety_checker=None)
+        if self.cfg.use_sdxl_for_inpaint:
+            self.sdxl_inpainting_pipe = AutoPipelineForInpainting.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", torch_dtype=torch.float16, variant="fp16")
+            self.sdxl_inpainting_pipe.scheduler = diffusers.EulerDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
+        if self.cfg.geometry_convert_from.startswith("depth:"):
+            # estimate depth
+            W, H = parse_wh(self.cfg.img_resolution)
+            if max(W, H) > self.cfg.pc_max_resolution:
+                W, H = int(W / max(W, H) * self.cfg.pc_max_resolution), int(H / max(W, H) * self.cfg.pc_max_resolution)
+            img = self.cfg.geometry_convert_from[len("depth:"):]
+            raw_img = img = Image.open(img).convert("RGB")
+            img = img.resize((W, H))
+            bg_xyz, bg_color = [], []
+            with torch.no_grad():
+                self.predictor.set_image(np.array(raw_img))
+                self.ooi_masks = []
+                total_inp_ooi_masks = None
+                total_ooi_masks = []
+                for i in range(len(self.cfg.ooi_bbox) // 4):
+                    bbox = np.array(self.cfg.ooi_bbox[4*i:4*i+4])
+                    masks, _, _ = self.predictor.predict(
+                        point_coords=None,
+                        point_labels=None,
+                        box=bbox[None, :],
+                        multimask_output=False,
+                    )
+                    # plt.imshow(masks[0])
+                    # plt.savefig(os.path.join(self.save_path, f'mask_{i}.png'))
+                    ooi_masks = np.array(Image.fromarray(masks[0]).resize((W, H), Image.NEAREST))
+                    ooi_masks = (cv2.blur(ooi_masks.astype(np.float32), (5, 5)) > 0)
+                    inp_ooi_masks = (cv2.blur(ooi_masks.astype(np.float32), (7, 7)) > 0)
+                    if i == 0:
+                        total_inp_ooi_masks = inp_ooi_masks
+                    else:
+                        total_inp_ooi_masks += inp_ooi_masks
+                    total_ooi_masks.append(ooi_masks)
+                total_inp_ooi_masks = total_inp_ooi_masks > 0
+                original_wh = parse_wh(self.cfg.img_resolution)
+                bg_image = self.inpaint(img=img, mask=Image.fromarray(total_inp_ooi_masks), prompt=self.cfg.empty_prompt).resize((original_wh))
+                self.bg_image = np.array(bg_image)
+                self.bg_image_mask = np.array(Image.fromarray(total_inp_ooi_masks).resize((original_wh)))
+            xyz, depth = self.img2pc_inpaint(img)
+            self.point_cloud = torch.from_numpy(xyz).cuda().float().reshape(-1, 4)
+            for ooi_masks in total_ooi_masks:
+                transit_masks = np.logical_and(cv2.blur(ooi_masks.astype(np.float32), (3, 3)) > 0, ~ooi_masks)
+                depth_tensor = torch.from_numpy(depth)[None, None].cuda() * 2 - 1
+                self.ooi_masks.append(torch.tensor(ooi_masks.reshape(-1).astype(np.uint8), device='cuda').float().bool())
+                ooi_masks = cv2.blur(ooi_masks.astype(np.float32), (9, 9)) > 0
+                mask = torch.from_numpy(ooi_masks.astype(np.float32))[None, None].cuda()
+                bg_xyz_pc, _ = self.img2pc_inpaint(bg_image, gt_depth=depth_tensor, mask=1-mask)
+                bg_xyz.append(bg_xyz_pc[ooi_masks])
+                bg_color.append(np.array(bg_image.resize((W, H)))[ooi_masks] / 255)
+            # xyz = xyz[..., :3].reshape(-1, 3)
+            xyz = xyz.reshape(-1, 4)
+            color = np.array(img).reshape(-1, 3) / 255
+            bg_xyz = np.concatenate(bg_xyz, 0)
+            additional_pts_num = bg_xyz.shape[0]
+            xyz = np.concatenate([xyz, bg_xyz], 0)
+            self.point_cloud = torch.from_numpy(xyz).cuda().float()
+            color = np.concatenate([color, np.concatenate(bg_color, 0)], 0)
+            for i in range(len(self.ooi_masks)):
+                self.register_buffer(f"ooi_masks_{i}", torch.cat([self.ooi_masks[i], torch.zeros([additional_pts_num], device='cuda').bool()]) )
+                self.ooi_masks[i] = getattr(self, f"ooi_masks_{i}")
+            self.register_buffer(f"_delete_mask", torch.ones_like(self.ooi_masks[0].float()))
+            # project to 3D space
+            xyz = xyz[:, :3]
+            color = color
+            pcd = BasicPointCloud(
+                points=xyz, colors=color, normals=np.zeros((xyz.shape[0], 3))
+            )
+            self.create_from_pcd(pcd, 10)
+            self.training_setup()
+        elif self.cfg.geometry_convert_from.startswith("shap-e:"):
+            shap_e_guidance = threestudio.find("shap-e-guidance")(
+                self.cfg.shap_e_guidance_config
+            )
+            prompt = self.cfg.geometry_convert_from[len("shap-e:") :]
+            xyz, color = shap_e_guidance(prompt)
+            pcd = BasicPointCloud(
+                points=xyz, colors=color, normals=np.zeros((xyz.shape[0], 3))
+            )
+            self.create_from_pcd(pcd, 10)
+            self.training_setup()
+        # Support Initialization from OpenLRM, Please see https://github.com/Adamdad/threestudio-lrm
+        elif self.cfg.geometry_convert_from.startswith("lrm:"):
+            lrm_guidance = threestudio.find("lrm-guidance")(
+                self.cfg.shap_e_guidance_config
+            )
+            prompt = self.cfg.geometry_convert_from[len("lrm:") :]
+            xyz, color = lrm_guidance(prompt)
+            pcd = BasicPointCloud(
+                points=xyz, colors=color, normals=np.zeros((xyz.shape[0], 3))
+            )
+            self.create_from_pcd(pcd, 10)
+            self.training_setup()
+        elif os.path.exists(self.cfg.geometry_convert_from):
+            threestudio.info(
+                "Loading point cloud from %s" % self.cfg.geometry_convert_from
+            )
+            if self.cfg.geometry_convert_from.endswith(".ckpt"):
+                ckpt_dict = torch.load(self.cfg.geometry_convert_from)
+                num_pts = ckpt_dict["state_dict"]["geometry._xyz"].shape[0]
+                pcd = BasicPointCloud(
+                    points=np.zeros((num_pts, 3)),
+                    colors=np.zeros((num_pts, 3)),
+                    normals=np.zeros((num_pts, 3)),
+                )
+                self.create_from_pcd(pcd, 10)
+                self.training_setup()
+                new_ckpt_dict = {}
+                for key in self.state_dict():
+                    if ckpt_dict["state_dict"].__contains__("geometry." + key):
+                        new_ckpt_dict[key] = ckpt_dict["state_dict"]["geometry." + key]
+                    else:
+                        new_ckpt_dict[key] = self.state_dict()[key]
+                self.load_state_dict(new_ckpt_dict)
+            elif self.cfg.geometry_convert_from.endswith(".ply"):
+                if self.cfg.load_ply_only_vertex:
+                    plydata = PlyData.read(self.cfg.geometry_convert_from)
+                    vertices = plydata["vertex"]
+                    positions = np.vstack(
+                        [vertices["x"], vertices["y"], vertices["z"]]
+                    ).T
+                    if vertices.__contains__("red"):
+                        colors = (
+                            np.vstack(
+                                [vertices["red"], vertices["green"], vertices["blue"]]
+                            ).T
+                            / 255.0
+                        )
+                    else:
+                        shs = np.random.random((positions.shape[0], 3)) / 255.0
+                        C0 = 0.28209479177387814
+                        colors = shs * C0 + 0.5
+                    normals = np.zeros_like(positions)
+                    pcd = BasicPointCloud(
+                        points=positions, colors=colors, normals=normals
+                    )
+                    self.create_from_pcd(pcd, 10)
+                else:
+                    self.load_ply(self.cfg.geometry_convert_from)
+                self.training_setup()
+        else:
+            threestudio.info("Geometry not found, initilization with random points")
+            num_pts = self.cfg.init_num_pts
+            phis = np.random.random((num_pts,)) * 2 * np.pi
+            costheta = np.random.random((num_pts,)) * 2 - 1
+            thetas = np.arccos(costheta)
+            mu = np.random.random((num_pts,))
+            radius = self.cfg.pc_init_radius * np.cbrt(mu)
+            x = radius * np.sin(thetas) * np.cos(phis)
+            y = radius * np.sin(thetas) * np.sin(phis)
+            z = radius * np.cos(thetas)
+            xyz = np.stack((x, y, z), axis=1)
+            shs = np.random.random((num_pts, 3)) / 255.0
+            C0 = 0.28209479177387814
+            color = shs * C0 + 0.5
+            pcd = BasicPointCloud(
+                points=xyz, colors=color, normals=np.zeros((num_pts, 3))
+            )
+            self.create_from_pcd(pcd, 10)
+            self.training_setup()
+    def add_pc_from_novel_view(self, rgb, mask, depth, c2w, save_path=None):
+        W, H = parse_wh(self.cfg.img_resolution)
+        if max(W, H) > self.cfg.pc_max_resolution:
+            W, H = int(W / max(W, H) * self.cfg.pc_max_resolution), int(H / max(W, H) * self.cfg.pc_max_resolution)
+        # depth estimation -> add points.
+        mask = fill_mask(mask)
+        blur_mask = Image.fromarray(cv2.blur(np.array(mask).astype(np.float32), (7, 7)) > 0)
+        res = self.inpaint(img=rgb, mask=blur_mask, prompt=self.side_prompt)
+        self.geowizard_pipe.to('cuda')
+        depth_unaligned = self.geowizard_pipe(
+                res,
+                denoising_steps = 25,
+                ensemble_size = 3,
+                processing_res = 768,
+                match_input_res = False,
+                domain = 'outdoor',
+                color_map = 'Spectral',
+                gt_depth = None, mask = None,
+                show_progress_bar = True)['depth_np']
+        self.geowizard_pipe.to('cpu')
+        prev_depth = depth_unaligned[~np.array(mask.resize((768,768)))]
+        # inpaint the depth map
+        depth_nd = depth[0].cpu().numpy().astype(np.uint8)
+        inpaint_mask = np.logical_and(~np.array(mask) , depth[0].cpu().numpy().astype(np.uint8)==0 ).astype(np.uint8)
+        l, r = depth[depth>0].min().item(), depth.max().item()
+        depth = (depth - l) / (r - l) * 255
+        depth = cv2.inpaint(depth[0].cpu().numpy().astype(np.uint8), inpaint_mask, 3, cv2.INPAINT_TELEA)
+        depth = torch.tensor(depth)[None].cuda().float() / 255
+        reproj_func = lambda x: (x - prev_depth.min().item()) / (prev_depth.max().item() - prev_depth.min().item()) * (r-l) + l
+        depth = depth * (prev_depth.max() - prev_depth.min()) + prev_depth.min()
+        depth_tensor = torch.nn.functional.interpolate(depth[None].cuda(), 768, mode='nearest') * 2 - 1
+        _masks = cv2.blur(np.array(mask.resize((768, 768))).astype(float), (20, 20)) > 0
+        mask_tensor = torch.from_numpy(_masks.astype(np.float32))[None, None].cuda()
+        bg_xyz_pc, _ = self.img2pc_inpaint(res, gt_depth=depth_tensor, mask=1-mask_tensor, proj_func=reproj_func, c2w=c2w)
+        mask = np.array(Image.fromarray(_masks).resize((W, H)))
+        new_xyz = bg_xyz_pc[mask][:, :3]
+        res = res.resize((W, H))
+        new_color = np.array(res)[mask] / 255
+        pcd = BasicPointCloud(points=new_xyz, colors=new_color, normals=np.zeros((new_xyz.shape[0], 3)))
+        self.merge_from_pcd(pcd, 10)
+        original_wh = parse_wh(self.cfg.img_resolution)
+        return res.resize((original_wh)), Image.fromarray(_masks).resize((original_wh))
+    @property
+    def get_scaling(self):
+        if self.cfg.sphere:
+            return self.scaling_activation(
+                torch.mean(self._scaling, dim=-1).unsqueeze(-1).repeat(1, 3)
+            ).clip(0, self.cfg.max_scaling)
+        return self.scaling_activation(self._scaling).clip(0, self.cfg.max_scaling)
+    @property
+    def get_rotation(self):
+        return self.rotation_activation(self._rotation)
+    @property
+    def get_language_feature(self):
+        return self._language_feature
+    @property
+    def get_xyz(self):
+        ret = self._xyz
+        if self.noise_ratio > 0.0:
+           offset = torch.zeros_like(ret)
+           for idx in range(len(self.ooi_masks)):
+               ooi_masks = getattr(self, f"ooi_masks_{idx}")
+               offset[ooi_masks] = torch.rand(3, device='cuda') * self.noise_ratio
+        return ret
+    @property
+    def get_features(self):
+        features_dc = self._features_dc
+        features_dc = features_dc.clip(-self.color_clip, self.color_clip)
+        features_rest = self._features_rest
+        return torch.cat((features_dc, features_rest), dim=1)
+    @property
+    def get_opacity(self):
+        if self._opacity_mask is None:
+            ret = self.opacity_activation(self._opacity)
+        else:
+            ret = self.opacity_activation(self._opacity) * self._opacity_mask.unsqueeze(-1)
+        if self._delete_mask is None:
+            return ret
+        else:
+            return ret * self._delete_mask.unsqueeze(-1)
+    @property
+    def get_normal(self):
+        if self.cfg.pred_normal:
+            return self._normal
+        else:
+            raise ValueError("Normal is not predicted")
+    def recover_xyzrot(self):
+        self._xyz = torch.nn.Parameter(self.fixed_xyz)
+        self._rotation = torch.nn.Parameter(self.fixed_rot)
+    def random_rotate(self, rotate_aug_scale, apply_rotate):
+        if self.fixed_xyz is None:
+            self.fixed_xyz = self.get_xyz.data
+            self.fixed_rot = self.get_rotation.data
+        if apply_rotate:
+            ooi_mask = self.ooi_masks_0.view(-1).byte().to(device='cuda').float()
+            rotate = random.randint(-rotate_aug_scale, rotate_aug_scale)
+            rot_matrix = rotation_matrix(0, 0, rotate).cuda()
+            prev_xyz = self.fixed_xyz.clone()
+            ooi_xyz = prev_xyz[ooi_mask.bool()]
+            mean = ooi_xyz.mean(0)
+            ooi_xyz = ooi_xyz - mean
+            after_xyz = torch.einsum('ab,nb->na', rot_matrix, ooi_xyz) + mean
+            prev_xyz[ooi_mask.bool()] = after_xyz
+            self._xyz = torch.nn.Parameter(prev_xyz)
+            prev_rotation = self.fixed_rot.clone()
+            prev_rotation_mtx = build_rotation(prev_rotation)
+            after_rotation_mtx = torch.einsum('ab,nbc->nac', rot_matrix, prev_rotation_mtx)
+            after_rotation = torch.from_numpy(R.from_matrix(after_rotation_mtx.detach().cpu()).as_quat()).cuda().float()
+            after_rotation = torch.einsum('ab,nb->na', REORDER_MTX, after_rotation)
+            prev_rotation[ooi_mask.bool()] = after_rotation[ooi_mask.bool()]
+            self._rotation = torch.nn.Parameter(prev_rotation)
+        else:
+            self.recover_xyzrot()
+    def get_covariance(self, scaling_modifier=1):
+        return self.covariance_activation(
+            self.get_scaling, scaling_modifier, self._rotation
+        )
+    def create_from_pcd(self, pcd: BasicPointCloud, spatial_lr_scale: float):
+        self.spatial_lr_scale = spatial_lr_scale
+        fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda()
+        fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda())
+        features = (
+            torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2))
+            .float()
+            .cuda()
+        )
+        features[:, :3, 0] = fused_color
+        features[:, 3:, 1:] = 0.0
+        threestudio.info(
+            f"Number of points at initialisation:{fused_point_cloud.shape[0]}"
+        )
+        dist2 = torch.clamp_min(
+            distCUDA2(torch.from_numpy(np.asarray(pcd.points)).float().cuda()),
+            0.0000001,
+        )
+        scales = torch.log(torch.sqrt(dist2))[..., None].repeat(1, 3)
+        rots = torch.zeros((fused_point_cloud.shape[0], 4), device="cuda")
+        rots[:, 0] = 1
+        opacities = inverse_sigmoid(
+            self.cfg.opacity_init
+            * torch.ones(
+                (fused_point_cloud.shape[0], 1), dtype=torch.float, device="cuda"
+            )
+        )
+        self._xyz = nn.Parameter(fused_point_cloud.requires_grad_(True))
+        self._features_dc = nn.Parameter(
+            features[:, :, 0:1].transpose(1, 2).contiguous().requires_grad_(True)
+        )
+        self._features_rest = nn.Parameter(
+            features[:, :, 1:].transpose(1, 2).contiguous().requires_grad_(True)
+        )
+        self._scaling = nn.Parameter(scales.requires_grad_(True))
+        self._rotation = nn.Parameter(rots.requires_grad_(True))
+        self._opacity = nn.Parameter(opacities.requires_grad_(True))
+        if self.cfg.pred_normal:
+            normals = torch.zeros((fused_point_cloud.shape[0], 3), device="cuda")
+            self._normal = nn.Parameter(normals.requires_grad_(True))
+        self.max_radii2D = torch.zeros((self._xyz.shape[0]), device="cuda")
+        self.fused_point_cloud = fused_point_cloud.cpu().clone().detach()
+        self.features = features.cpu().clone().detach()
+        self.scales = scales.cpu().clone().detach()
+        self.rots = rots.cpu().clone().detach()
+        self.opacities = opacities.cpu().clone().detach()
+        language_feature = torch.zeros((self._xyz.shape[0], 3), device="cuda")
+        self._language_feature = torch.nn.Parameter(language_feature.requires_grad_(True))
+    def merge_from_pcd(self, pcd: BasicPointCloud, spatial_lr_scale: float):
+        self.spatial_lr_scale = spatial_lr_scale
+        fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda()
+        fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda())
+        features = (
+            torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2))
+            .float()
+            .cuda()
+        )
+        features[:, :3, 0] = fused_color
+        features[:, 3:, 1:] = 0.0
+        threestudio.info(
+            f"Number of points at merging:{fused_point_cloud.shape[0]}"
+        )
+        dist2 = torch.clamp_min(
+            distCUDA2(torch.from_numpy(np.asarray(pcd.points)).float().cuda()),
+            0.0000001,
+        )
+        scales = torch.log(torch.sqrt(dist2))[..., None].repeat(1, 3)
+        rots = torch.zeros((fused_point_cloud.shape[0], 4), device="cuda")
+        rots[:, 0] = 1
+        opacities = inverse_sigmoid(
+            self.cfg.opacity_init
+            * torch.ones(
+                (fused_point_cloud.shape[0], 1), dtype=torch.float, device="cuda"
+            )
+        )
+        self.densification_postfix(
+            fused_point_cloud,
+            features[:, :, 0:1].transpose(1, 2).contiguous(),
+            features[:, :, 1:].transpose(1, 2).contiguous(),
+            opacities,
+            scales,
+            rots,
+            None,
+            torch.zeros((fused_point_cloud.shape[0], 3), device="cuda")
+        )
+        for idx in range(len(self.ooi_masks)):
+            # self.ooi_masks[idx] = torch.cat([self.ooi_masks[idx], torch.ones([fused_point_cloud.shape[0]], device='cuda') > 0])
+            self.register_buffer(f"ooi_masks_{idx}", torch.cat([getattr(self, f"ooi_masks_{idx}"), torch.zeros([fused_point_cloud.shape[0]], device='cuda').bool()]) )
+            self.ooi_masks[idx] = getattr(self, f"ooi_masks_{idx}")
+        self.register_buffer(f"_delete_mask", torch.ones_like(self.ooi_masks[0].float()))
+        # self._xyz = torch.nn.Parameter(torch.cat([self._xyz, fused_point_cloud],0),requires_grad=True)
+        # self._features_dc = torch.nn.Parameter(torch.cat([self._features_dc, features[:, :, 0:1].transpose(1, 2).contiguous()],0),requires_grad=True)
+        # self._features_rest = torch.nn.Parameter(torch.cat([self._features_rest, features[:, :, 1:].transpose(1, 2).contiguous()],0),requires_grad=True)
+        # self._scaling = torch.nn.Parameter(torch.cat([self._scaling, scales],0),requires_grad=True)
+        # self._rotation = torch.nn.Parameter(torch.cat([self._rotation, rots],0),requires_grad=True)
+        # self._opacity = torch.nn.Parameter(torch.cat([self._opacity, opacities],0),requires_grad=True)
+        # if self.cfg.pred_normal:
+        #     normals = torch.zeros((fused_point_cloud.shape[0], 3), device="cuda")
+        #     self._normal = nn.Parameter(normals.requires_grad_(True))
+        # self.max_radii2D = torch.zeros((self._xyz.shape[0]), device="cuda")
+        # self.fused_point_cloud = fused_point_cloud.cpu().clone().detach()
+        # self.features = features.cpu().clone().detach()
+        # self.scales = scales.cpu().clone().detach()
+        # self.rots = rots.cpu().clone().detach()
+        # self.opacities = opacities.cpu().clone().detach()
+        # language_feature = torch.zeros((fused_point_cloud.shape[0], 3), device="cuda")
+        # self._language_feature = torch.nn.Parameter(torch.cat([self._language_feature, language_feature], 0), requires_grad=True)
+        # self.training_setup()
+    def lang_training_setup(self):
+        training_args = self.cfg
+        l = [
+                {'params': [self._language_feature], 'lr': C(training_args.lang_lr, 0, 0)},
+            ]
+        self._xyz.requires_grad_(False)
+        self._features_dc.requires_grad_(False)
+        self._features_rest.requires_grad_(False)
+        self._scaling.requires_grad_(False)
+        self._rotation.requires_grad_(False)
+        self._opacity.requires_grad_(False)
+        self._language_feature.requires_grad_(True)
+        # self.lang_optimizer = torch.optim.SGD(l, lr=0.0)
+        self.lang_optimizer = torch.optim.Adam(l, lr=0.0, eps=1e-15, betas=(self.cfg.lang_beta_1, self.cfg.lang_beta_2))
+        self.optimize_params = ["lang"]
+        self.optimize_list = l
+    def after_lang(self):
+        self._xyz.requires_grad_(True)
+        self._features_dc.requires_grad_(True)
+        self._features_rest.requires_grad_(True)
+        self._scaling.requires_grad_(True)
+        self._rotation.requires_grad_(True)
+        self._opacity.requires_grad_(True)
+        self._language_feature.requires_grad_(False)
+    def training_setup(self):
+        self._xyz.requires_grad_(True)
+        self._features_dc.requires_grad_(True)
+        self._features_rest.requires_grad_(True)
+        self._scaling.requires_grad_(True)
+        self._rotation.requires_grad_(True)
+        self._opacity.requires_grad_(True)
+        self._language_feature.requires_grad_(False)
+        training_args = self.cfg
+        self.xyz_gradient_accum = torch.zeros((self.get_xyz.shape[0], 1), device="cuda")
+        self.denom = torch.zeros((self.get_xyz.shape[0], 1), device="cuda")
+        l = [
+            {
+                "params": [self._xyz],
+                "lr": C(training_args.position_lr, 0, 0),
+                "name": "xyz",
+            },
+            {
+                "params": [self._features_dc],
+                "lr": C(training_args.feature_lr, 0, 0),
+                "name": "f_dc",
+            },
+            {
+                "params": [self._features_rest],
+                "lr": C(training_args.feature_lr, 0, 0) / 20.0,
+                "name": "f_rest",
+            },
+            {
+                "params": [self._opacity],
+                "lr": C(training_args.opacity_lr, 0, 0),
+                "name": "opacity",
+            },
+            {
+                "params": [self._scaling],
+                "lr": C(training_args.scaling_lr, 0, 0),
+                "name": "scaling",
+            },
+            {
+                "params": [self._rotation],
+                "lr": C(training_args.rotation_lr, 0, 0),
+                "name": "rotation",
+            },
+            {'params': [self._language_feature], 'lr': C(training_args.lang_lr, 0, 0), "name": "language_feature"},
+        ]
+        if self.cfg.pred_normal:
+            l.append(
+                {
+                    "params": [self._normal],
+                    "lr": C(training_args.normal_lr, 0, 0),
+                    "name": "normal",
+                },
+            )
+        self.optimize_params = [
+            "xyz",
+            "f_dc",
+            "f_rest",
+            "opacity",
+            "scaling",
+            "rotation",
+            "language_feature"
+        ]
+        self.optimize_list = l
+        self.optimizer = torch.optim.Adam(l, lr=0.0, eps=1e-15)
+        self.lang_optimizer = None
+    def merge_optimizer(self, net_optimizer):
+        l = self.optimize_list
+        for param in net_optimizer.param_groups:
+            l.append(
+                {
+                    "params": param["params"],
+                    "lr": param["lr"],
+                }
+            )
+        self.optimizer = torch.optim.Adam(l, lr=0.0)
+        return self.optimizer
+    def update_learning_rate(self, iteration):
+        """Learning rate scheduling per step"""
+        for param_group in self.optimizer.param_groups:
+            if not ("name" in param_group):
+                continue
+            if param_group["name"] == "xyz":
+                param_group["lr"] = C(
+                    self.cfg.position_lr, 0, iteration, interpolation="exp"
+                )
+            if param_group["name"] == "scaling":
+                param_group["lr"] = C(
+                    self.cfg.scaling_lr, 0, iteration, interpolation="exp"
+                )
+            if param_group["name"] == "f_dc":
+                param_group["lr"] = C(
+                    self.cfg.feature_lr, 0, iteration, interpolation="exp"
+                )
+            if param_group["name"] == "f_rest":
+                param_group["lr"] = (
+                    C(self.cfg.feature_lr, 0, iteration, interpolation="exp") / 20.0
+                )
+            if param_group["name"] == "opacity":
+                param_group["lr"] = C(
+                    self.cfg.opacity_lr, 0, iteration, interpolation="exp"
+                )
+            if param_group["name"] == "rotation":
+                param_group["lr"] = C(
+                    self.cfg.rotation_lr, 0, iteration, interpolation="exp"
+                )
+            if param_group["name"] == "normal":
+                param_group["lr"] = C(
+                    self.cfg.normal_lr, 0, iteration, interpolation="exp"
+                )
+        if self.lang_optimizer is not None:
+            for param_group in self.lang_optimizer.param_groups:
+                if not ("name" in param_group):
+                    continue
+                if param_group["name"] == "language_feature":
+                    param_group["lr"] = C(
+                        self.cfg.lang_lr, 0, iteration, interpolation="exp"
+                    )
+        self.color_clip = C(self.cfg.color_clip, 0, iteration)
+    def reset_opacity(self):
+        # opacities_new = inverse_sigmoid(torch.min(self.get_opacity, torch.ones_like(self.get_opacity)*0.01))
+        opacities_new = inverse_sigmoid(self.get_opacity * 0.9)
+        optimizable_tensors = self.replace_tensor_to_optimizer(opacities_new, "opacity")
+        self._opacity = optimizable_tensors["opacity"]
+    def to(self, device="cpu"):
+        self._xyz = self._xyz.to(device)
+        self._features_dc = self._features_dc.to(device)
+        self._features_rest = self._features_rest.to(device)
+        self._opacity = self._opacity.to(device)
+        self._scaling = self._scaling.to(device)
+        self._rotation = self._rotation.to(device)
+        self._normal = self._normal.to(device)
+        self._language_feature = self._language_feature.to(device)
+    def replace_tensor_to_optimizer(self, tensor, name):
+        optimizable_tensors = {}
+        for group in self.optimizer.param_groups:
+            if ("name" in group) and group["name"] == name:
+                stored_state = self.optimizer.state.get(group["params"][0], None)
+                stored_state["exp_avg"] = torch.zeros_like(tensor)
+                stored_state["exp_avg_sq"] = torch.zeros_like(tensor)
+                del self.optimizer.state[group["params"][0]]
+                group["params"][0] = nn.Parameter(tensor.requires_grad_(True))
+                self.optimizer.state[group["params"][0]] = stored_state
+                optimizable_tensors[group["name"]] = group["params"][0]
+        return optimizable_tensors
+    def _prune_optimizer(self, mask):
+        optimizable_tensors = {}
+        for group in self.optimizer.param_groups:
+            if ("name" in group) and (group["name"] in self.optimize_params):
+                stored_state = self.optimizer.state.get(group["params"][0], None)
+                if stored_state is not None:
+                    stored_state["exp_avg"] = stored_state["exp_avg"][mask]
+                    stored_state["exp_avg_sq"] = stored_state["exp_avg_sq"][mask]
+                    del self.optimizer.state[group["params"][0]]
+                    group["params"][0] = nn.Parameter(
+                        (group["params"][0][mask].requires_grad_(True))
+                    )
+                    self.optimizer.state[group["params"][0]] = stored_state
+                    optimizable_tensors[group["name"]] = group["params"][0]
+                else:
+                    group["params"][0] = nn.Parameter(
+                        group["params"][0][mask].requires_grad_(True)
+                    )
+                    optimizable_tensors[group["name"]] = group["params"][0]
+        return optimizable_tensors
+    def prune_points(self, mask):
+        valid_points_mask = ~mask
+        optimizable_tensors = self._prune_optimizer(valid_points_mask)
+        self._xyz = optimizable_tensors["xyz"]
+        self._features_dc = optimizable_tensors["f_dc"]
+        self._features_rest = optimizable_tensors["f_rest"]
+        self._opacity = optimizable_tensors["opacity"]
+        self._scaling = optimizable_tensors["scaling"]
+        self._rotation = optimizable_tensors["rotation"]
+        self._language_feature = optimizable_tensors["language_feature"]
+        if self.cfg.pred_normal:
+            self._normal = optimizable_tensors["normal"]
+        self.xyz_gradient_accum = self.xyz_gradient_accum[valid_points_mask]
+        self.denom = self.denom[valid_points_mask]
+        self.max_radii2D = self.max_radii2D[valid_points_mask]
+    def cat_tensors_to_optimizer(self, tensors_dict):
+        optimizable_tensors = {}
+        for group in self.optimizer.param_groups:
+            if ("name" in group) and (group["name"] in self.optimize_params):
+                extension_tensor = tensors_dict[group["name"]]
+                stored_state = self.optimizer.state.get(group["params"][0], None)
+                if stored_state is not None:
+                    stored_state["exp_avg"] = torch.cat(
+                        (stored_state["exp_avg"], torch.zeros_like(extension_tensor)),
+                        dim=0,
+                    )
+                    stored_state["exp_avg_sq"] = torch.cat(
+                        (
+                            stored_state["exp_avg_sq"],
+                            torch.zeros_like(extension_tensor),
+                        ),
+                        dim=0,
+                    )
+                    del self.optimizer.state[group["params"][0]]
+                    group["params"][0] = nn.Parameter(
+                        torch.cat(
+                            (group["params"][0], extension_tensor), dim=0
+                        ).requires_grad_(True)
+                    )
+                    self.optimizer.state[group["params"][0]] = stored_state
+                    optimizable_tensors[group["name"]] = group["params"][0]
+                else:
+                    group["params"][0] = nn.Parameter(
+                        torch.cat(
+                            (group["params"][0], extension_tensor), dim=0
+                        ).requires_grad_(True)
+                    )
+                    optimizable_tensors[group["name"]] = group["params"][0]
+        return optimizable_tensors
+    def densification_postfix(
+        self,
+        new_xyz,
+        new_features_dc,
+        new_features_rest,
+        new_opacities,
+        new_scaling,
+        new_rotation,
+        new_normal=None,
+        new_language_feature=None
+    ):
+        d = {
+            "xyz": new_xyz,
+            "f_dc": new_features_dc,
+            "f_rest": new_features_rest,
+            "opacity": new_opacities,
+            "scaling": new_scaling,
+            "rotation": new_rotation,
+            "language_feature": new_language_feature,
+        }
+        if self.cfg.pred_normal:
+            d.update({"normal": new_normal})
+        optimizable_tensors = self.cat_tensors_to_optimizer(d)
+        self._xyz = optimizable_tensors["xyz"]
+        self._features_dc = optimizable_tensors["f_dc"]
+        self._features_rest = optimizable_tensors["f_rest"]
+        self._opacity = optimizable_tensors["opacity"]
+        self._scaling = optimizable_tensors["scaling"]
+        self._rotation = optimizable_tensors["rotation"]
+        self._language_feature = optimizable_tensors["language_feature"]
+        if self.cfg.pred_normal:
+            self._normal = optimizable_tensors["normal"]
+        self.xyz_gradient_accum = torch.zeros((self._xyz.shape[0], 1), device="cuda")
+        self.denom = torch.zeros((self._xyz.shape[0], 1), device="cuda")
+        self.max_radii2D = torch.zeros((self._xyz.shape[0]), device="cuda")
+    def densify_and_split(self, grads, grad_threshold, N=2):
+        n_init_points = self._xyz.shape[0]
+        # Extract points that satisfy the gradient condition
+        padded_grad = torch.zeros((n_init_points), device="cuda")
+        padded_grad[: grads.shape[0]] = grads.squeeze()
+        selected_pts_mask = torch.where(padded_grad >= grad_threshold, True, False)
+        selected_pts_mask = torch.logical_and(
+            selected_pts_mask,
+            torch.norm(self.get_scaling, dim=1) > self.cfg.split_thresh,
+        )
+        # divide N to enhance robustness
+        stds = self.get_scaling[selected_pts_mask].repeat(N, 1) / N
+        means = torch.zeros((stds.size(0), 3), device="cuda")
+        samples = torch.normal(mean=means, std=stds)
+        rots = build_rotation(self._rotation[selected_pts_mask]).repeat(N, 1, 1)
+        new_xyz = torch.bmm(rots, samples.unsqueeze(-1)).squeeze(-1) + self._xyz[
+            selected_pts_mask
+        ].repeat(N, 1)
+        new_scaling = self.scaling_inverse_activation(
+            self.get_scaling[selected_pts_mask].repeat(N, 1) / (0.8 * N)
+        )
+        new_rotation = self._rotation[selected_pts_mask].repeat(N, 1)
+        new_features_dc = self._features_dc[selected_pts_mask].repeat(N, 1, 1)
+        new_features_rest = self._features_rest[selected_pts_mask].repeat(N, 1, 1)
+        new_opacity = self._opacity[selected_pts_mask].repeat(N, 1)
+        new_language_feature = self._language_feature[selected_pts_mask].repeat(N,1)
+        if self.cfg.pred_normal:
+            new_normal = self._normal[selected_pts_mask].repeat(N, 1)
+        else:
+            new_normal = None
+        self.densification_postfix(
+            new_xyz,
+            new_features_dc,
+            new_features_rest,
+            new_opacity,
+            new_scaling,
+            new_rotation,
+            new_normal,
+            new_language_feature
+        )
+        prune_filter = torch.cat(
+            (
+                selected_pts_mask,
+                torch.zeros(N * selected_pts_mask.sum(), device="cuda", dtype=bool),
+            )
+        )
+        self.prune_points(prune_filter)
+    def densify_and_clone(self, grads, grad_threshold):
+        # Extract points that satisfy the gradient condition
+        selected_pts_mask = torch.where(
+            torch.norm(grads, dim=-1) >= grad_threshold, True, False
+        )
+        selected_pts_mask = torch.logical_and(
+            selected_pts_mask,
+            torch.norm(self.get_scaling, dim=1) <= self.cfg.split_thresh,
+        )
+        new_xyz = self._xyz[selected_pts_mask]
+        new_features_dc = self._features_dc[selected_pts_mask]
+        new_features_rest = self._features_rest[selected_pts_mask]
+        new_opacities = self._opacity[selected_pts_mask]
+        new_scaling = self._scaling[selected_pts_mask]
+        new_rotation = self._rotation[selected_pts_mask]
+        new_language_feature = self._language_feature[selected_pts_mask]
+        if self.cfg.pred_normal:
+            new_normal = self._normal[selected_pts_mask]
+        else:
+            new_normal = None
+        self.densification_postfix(
+            new_xyz,
+            new_features_dc,
+            new_features_rest,
+            new_opacities,
+            new_scaling,
+            new_rotation,
+            new_normal,
+            new_language_feature
+        )
+    def densify(self, max_grad):
+        grads = self.xyz_gradient_accum / self.denom
+        grads[grads.isnan()] = 0.0
+        self.densify_and_clone(grads, max_grad)
+        self.densify_and_split(grads, max_grad)
+    def prune(self, min_opacity, max_screen_size):
+        prune_mask = (self.get_opacity < min_opacity).squeeze()
+        if self.cfg.prune_big_points:
+            big_points_vs = self.max_radii2D > (torch.mean(self.max_radii2D) * 3)
+            prune_mask = torch.logical_or(prune_mask, big_points_vs)
+        self.prune_points(prune_mask)
+        torch.cuda.empty_cache()
+    def add_densification_stats(self, viewspace_point_tensor, update_filter):
+        self.xyz_gradient_accum[update_filter] += torch.norm(
+            viewspace_point_tensor.grad[update_filter, :2], dim=-1, keepdim=True
+        )
+        self.denom[update_filter] += 1
+    @torch.no_grad()
+    def update_states(
+        self,
+        iteration,
+        visibility_filter,
+        radii,
+        viewspace_point_tensor,
+    ):
+        if self._xyz.shape[0] >= self.cfg.max_num + 100:
+            prune_mask = torch.randperm(self._xyz.shape[0]).to(self._xyz.device)
+            prune_mask = prune_mask > self.cfg.max_num
+            self.prune_points(prune_mask)
+            return
+        # Keep track of max radii in image-space for pruning
+        # loop over batch
+        bs = len(viewspace_point_tensor)
+        for i in range(bs):
+            radii_i = radii[i]
+            visibility_filter_i = visibility_filter[i]
+            viewspace_point_tensor_i = viewspace_point_tensor[i]
+            self.max_radii2D = torch.max(self.max_radii2D, radii_i.float())
+            self.add_densification_stats(viewspace_point_tensor_i, visibility_filter_i)
+        if (
+            iteration > self.cfg.prune_from_iter
+            and iteration < self.cfg.prune_until_iter
+            and iteration % self.cfg.prune_interval == 0
+        ):
+            self.prune(self.cfg.min_opac_prune, self.cfg.radii2d_thresh)
+            if iteration % self.cfg.opacity_reset_interval == 0:
+                self.reset_opacity()
+        if (
+            iteration > self.cfg.densify_from_iter
+            and iteration < self.cfg.densify_until_iter
+            and iteration % self.cfg.densification_interval == 0
+        ):
+            self.densify(self.cfg.densify_grad_threshold)

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/geometry/gaussian_base.py.bak ADDED Viewed

	@@ -0,0 +1,1492 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import math
+import os
+import random
+import sys
+import argparse
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import NamedTuple
+import numpy as np
+import cv2
+from PIL import Image
+import threestudio
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+from transformers import pipeline
+from plyfile import PlyData, PlyElement
+from simple_knn._C import distCUDA2
+import diffusers
+from diffusers import StableDiffusionInpaintPipeline, AutoPipelineForInpainting
+from threestudio.models.geometry.base import BaseGeometry
+from threestudio.utils.misc import C
+from threestudio.utils.typing import *
+from segment_anything import sam_model_registry, SamPredictor
+import matplotlib.pyplot as plt
+from .gaussian_io import GaussianIO
+import imageio
+from scipy.spatial.transform import Rotation as R
+REORDER_MTX = torch.tensor([
+    [0,0,0,1],
+    [1,0,0,0],
+    [0,1,0,0],
+    [0,0,1,0]
+]).cuda().float()
+def build_rotation(r):
+    norm = torch.sqrt(
+        r[:, 0] * r[:, 0] + r[:, 1] * r[:, 1] + r[:, 2] * r[:, 2] + r[:, 3] * r[:, 3]
+    )
+    q = r / norm[:, None]
+    R = torch.zeros((q.size(0), 3, 3), device="cuda")
+    r = q[:, 0]
+    x = q[:, 1]
+    y = q[:, 2]
+    z = q[:, 3]
+    R[:, 0, 0] = 1 - 2 * (y * y + z * z)
+    R[:, 0, 1] = 2 * (x * y - r * z)
+    R[:, 0, 2] = 2 * (x * z + r * y)
+    R[:, 1, 0] = 2 * (x * y + r * z)
+    R[:, 1, 1] = 1 - 2 * (x * x + z * z)
+    R[:, 1, 2] = 2 * (y * z - r * x)
+    R[:, 2, 0] = 2 * (x * z - r * y)
+    R[:, 2, 1] = 2 * (y * z + r * x)
+    R[:, 2, 2] = 1 - 2 * (x * x + y * y)
+    return R
+def rotation_matrix(angle_x, angle_y, angle_z):
+    # Convert angles to radians
+    rad_x = torch.deg2rad(torch.tensor(angle_x))
+    rad_y = torch.deg2rad(torch.tensor(angle_y))
+    rad_z = torch.deg2rad(torch.tensor(angle_z))
+    # Compute sine and cosine of the angles
+    cos_x = torch.cos(rad_x)
+    sin_x = torch.sin(rad_x)
+    cos_y = torch.cos(rad_y)
+    sin_y = torch.sin(rad_y)
+    cos_z = torch.cos(rad_z)
+    sin_z = torch.sin(rad_z)
+    # Construct the rotation matrix
+    Rx = torch.tensor([[1, 0, 0],
+                   [0, cos_x, -sin_x],
+                   [0, sin_x, cos_x]])
+    Ry = torch.tensor([[cos_y, 0, sin_y],
+                   [0, 1, 0],
+                   [-sin_y, 0, cos_y]])
+    Rz = torch.tensor([[cos_z, -sin_z, 0],
+                   [sin_z, cos_z, 0],
+                   [0, 0, 1]])
+    # Combine the rotation matrices
+    rotation_matrix = Rz @ Ry @ Rx
+    return rotation_matrix
+# from scipy.spatial import KDTree
+#
+# def distCUDA2(points):
+#     points_np = points.detach().cpu().float().numpy()
+#     dists, inds = KDTree(points_np).query(points_np, k=4)
+#     meanDists = (dists[:, 1:] ** 2).mean(1)
+#
+#     return torch.tensor(meanDists, dtype=points.dtype, device=points.device)
+sys.path.append('./GeoWizard/geowizard')
+from models.geowizard_pipeline import DepthNormalEstimationPipeline
+C0 = 0.28209479177387814
+def propagate(canvas):
+    H, W = canvas.shape
+    dx = [0, 1, 0, -1]
+    dy = [1, 0, -1, 0]
+    count = np.zeros_like(canvas)
+    while 1:
+        curr_mask = canvas > 0
+        if sum(sum(curr_mask)) == H * W:
+            break
+        expand_mask = (cv2.blur(curr_mask.astype(np.float32), (3, 3)) > 0)
+        x, y = np.where(np.logical_and(expand_mask, ~curr_mask))
+        old_canvas = canvas.copy()
+        for xx, yy in zip(x, y):
+            for i in range(4):
+                ref_x = xx + dx[i]
+                ref_y = yy + dy[i]
+                if 0<=ref_x<H and 0<=ref_y<W and old_canvas[ref_x, ref_y] != 0:
+                    canvas[xx, yy] = old_canvas[ref_x, ref_y]
+                    count[xx, yy] = count[ref_x, ref_y] + 1
+    weight = (count.max() - count) / count.max()
+    return canvas * weight
+def save_pc(save_file, pts, color):
+    '''
+        pts: N, 3
+        color: N, 3
+    '''
+    if color.dtype == np.dtype('float64'):
+        color = (color * 255).astype(np.uint8)
+    with open(save_file, 'w') as f:
+        f.writelines((
+             "ply\n",
+             "format ascii 1.0\n",
+             "element vertex {}\n".format(pts.shape[0]),
+             "property float x\n",
+             "property float y\n",
+             "property float z\n",
+             "property uchar red\n",
+             "property uchar green\n",
+             "property uchar blue\n",
+             "end_header\n"))
+        for i in range(pts.shape[0]):
+            point = "%f %f %f %d %d %d\n" % (pts[i, 0], pts[i, 1], pts[i, 2], color[i, 0], color[i, 1], color[i, 2])
+            f.writelines(point)
+    threestudio.info(f"Saved point cloud to {save_file}.")
+def RGB2SH(rgb):
+    return (rgb - 0.5) / C0
+def SH2RGB(sh):
+    return sh * C0 + 0.5
+def inverse_sigmoid(x):
+    return torch.log(x / (1 - x))
+def strip_lowerdiag(L):
+    uncertainty = torch.zeros((L.shape[0], 6), dtype=torch.float, device="cuda")
+    uncertainty[:, 0] = L[:, 0, 0]
+    uncertainty[:, 1] = L[:, 0, 1]
+    uncertainty[:, 2] = L[:, 0, 2]
+    uncertainty[:, 3] = L[:, 1, 1]
+    uncertainty[:, 4] = L[:, 1, 2]
+    uncertainty[:, 5] = L[:, 2, 2]
+    return uncertainty
+def strip_symmetric(sym):
+    return strip_lowerdiag(sym)
+def gaussian_3d_coeff(xyzs, covs):
+    # xyzs: [N, 3]
+    # covs: [N, 6]
+    x, y, z = xyzs[:, 0], xyzs[:, 1], xyzs[:, 2]
+    a, b, c, d, e, f = (
+        covs[:, 0],
+        covs[:, 1],
+        covs[:, 2],
+        covs[:, 3],
+        covs[:, 4],
+        covs[:, 5],
+    )
+    # eps must be small enough !!!
+    inv_det = 1 / (
+        a * d * f + 2 * e * c * b - e**2 * a - c**2 * d - b**2 * f + 1e-24
+    )
+    inv_a = (d * f - e**2) * inv_det
+    inv_b = (e * c - b * f) * inv_det
+    inv_c = (e * b - c * d) * inv_det
+    inv_d = (a * f - c**2) * inv_det
+    inv_e = (b * c - e * a) * inv_det
+    inv_f = (a * d - b**2) * inv_det
+    power = (
+        -0.5 * (x**2 * inv_a + y**2 * inv_d + z**2 * inv_f)
+        - x * y * inv_b
+        - x * z * inv_c
+        - y * z * inv_e
+    )
+    power[power > 0] = -1e10  # abnormal values... make weights 0
+    return torch.exp(power)
+def build_rotation(r):
+    norm = torch.sqrt(
+        r[:, 0] * r[:, 0] + r[:, 1] * r[:, 1] + r[:, 2] * r[:, 2] + r[:, 3] * r[:, 3]
+    )
+    q = r / norm[:, None]
+    R = torch.zeros((q.size(0), 3, 3), device="cuda")
+    r = q[:, 0]
+    x = q[:, 1]
+    y = q[:, 2]
+    z = q[:, 3]
+    R[:, 0, 0] = 1 - 2 * (y * y + z * z)
+    R[:, 0, 1] = 2 * (x * y - r * z)
+    R[:, 0, 2] = 2 * (x * z + r * y)
+    R[:, 1, 0] = 2 * (x * y + r * z)
+    R[:, 1, 1] = 1 - 2 * (x * x + z * z)
+    R[:, 1, 2] = 2 * (y * z - r * x)
+    R[:, 2, 0] = 2 * (x * z - r * y)
+    R[:, 2, 1] = 2 * (y * z + r * x)
+    R[:, 2, 2] = 1 - 2 * (x * x + y * y)
+    return R
+def build_scaling_rotation(s, r):
+    L = torch.zeros((s.shape[0], 3, 3), dtype=torch.float, device="cuda")
+    R = build_rotation(r)
+    L[:, 0, 0] = s[:, 0]
+    L[:, 1, 1] = s[:, 1]
+    L[:, 2, 2] = s[:, 2]
+    L = R @ L
+    return L
+def safe_state(silent):
+    old_f = sys.stdout
+    class F:
+        def __init__(self, silent):
+            self.silent = silent
+        def write(self, x):
+            if not self.silent:
+                if x.endswith("\n"):
+                    old_f.write(
+                        x.replace(
+                            "\n",
+                            " [{}]\n".format(
+                                str(datetime.now().strftime("%d/%m %H:%M:%S"))
+                            ),
+                        )
+                    )
+                else:
+                    old_f.write(x)
+        def flush(self):
+            old_f.flush()
+    sys.stdout = F(silent)
+    random.seed(0)
+    np.random.seed(0)
+    torch.manual_seed(0)
+    torch.cuda.set_device(torch.device("cuda:0"))
+class BasicPointCloud(NamedTuple):
+    points: np.array
+    colors: np.array
+    normals: np.array
+class Camera(NamedTuple):
+    FoVx: torch.Tensor
+    FoVy: torch.Tensor
+    camera_center: torch.Tensor
+    image_width: int
+    image_height: int
+    world_view_transform: torch.Tensor
+    full_proj_transform: torch.Tensor
+def fill_mask(mask):
+    mask = np.array(mask)
+    canvas = np.zeros_like(mask)
+    H, W = mask.shape
+    for i in range(H):
+        for p in range(0, W):
+            if mask[i, p]:
+                canvas[i, p] = 1
+            else:
+                break
+        for p in range(W-1, 0, -1):
+            if mask[i, p]:
+                canvas[i, p] = 1
+            else:
+                break
+    for i in range(W):
+        for p in range(0, H):
+            if mask[p, i]:
+                canvas[p, i] = 1
+            else:
+                break
+        for p in range(H-1, 0, -1):
+            if mask[p, i]:
+                canvas[p, i] = 1
+            else:
+                break
+    mask = np.logical_and(mask, canvas)
+    return Image.fromarray(mask)
+def parse_wh(wh):
+    try:
+        W, H = wh
+    except:
+        H = W = wh
+    return W, H
+@threestudio.register("gaussian-splatting")
+class GaussianBaseModel(BaseGeometry, GaussianIO):
+    @dataclass
+    class Config(BaseGeometry.Config):
+        max_num: int = 500000
+        sh_degree: int = 0
+        position_lr: Any = 0.001
+        # scale_lr: Any = 0.003
+        feature_lr: Any = 0.01
+        opacity_lr: Any = 0.05
+        scaling_lr: Any = 0.005
+        rotation_lr: Any = 0.005
+        pred_normal: bool = False
+        normal_lr: Any = 0.001
+        lang_lr: float = 0.005
+        densification_interval: int = 50
+        prune_interval: int = 50
+        opacity_reset_interval: int = 100000
+        densify_from_iter: int = 100
+        prune_from_iter: int = 100
+        densify_until_iter: int = 2000
+        prune_until_iter: int = 2000
+        densify_grad_threshold: Any = 0.01
+        min_opac_prune: Any = 0.005
+        split_thresh: Any = 0.02
+        radii2d_thresh: Any = 1000
+        sphere: bool = False
+        prune_big_points: bool = False
+        color_clip: Any = 2.0
+        geometry_convert_from: str = ""
+        load_ply_only_vertex: bool = False
+        init_num_pts: int = 100
+        pc_init_radius: float = 0.8
+        opacity_init: float = 0.1
+        img_resolution: Any = 512
+        shap_e_guidance_config: dict = field(default_factory=dict)
+        max_scaling: float = 100
+        sam_ckpt_path: str = "ckpts/sam_vit_h_4b8939.pth"
+        ooi_bbox: Any = None
+        prompt: Any = None
+        empty_prompt: Any = None
+        novel_view_gradual: bool = False
+        lang_beta_1: float = 0.9
+        lang_beta_2: float = 0.999
+        inference_only: bool = False
+    cfg: Config
+    def setup_functions(self):
+        def build_covariance_from_scaling_rotation(scaling, scaling_modifier, rotation):
+            L = build_scaling_rotation(scaling_modifier * scaling, rotation)
+            actual_covariance = L @ L.transpose(1, 2)
+            symm = strip_symmetric(actual_covariance)
+            return symm
+        self.scaling_activation = torch.exp
+        self.scaling_inverse_activation = torch.log
+        self.covariance_activation = build_covariance_from_scaling_rotation
+        self.opacity_activation = torch.sigmoid
+        self.inverse_opacity_activation = inverse_sigmoid
+        self.rotation_activation = torch.nn.functional.normalize
+        self.color_clip = C(self.cfg.color_clip, 0, 0)
+        self.fixed_xyz = None
+        self.fixed_rot = None
+        if not self.cfg.inference_only:
+            sam = sam_model_registry["vit_h"](checkpoint=self.cfg.sam_ckpt_path).to('cuda')
+            self.predictor = SamPredictor(sam)
+    def project_pc(self, c2w, H=None, W=None):
+        W, H = parse_wh(self.cfg.img_resolution)
+        # if W is None:
+        #     W = H
+        assert self.point_cloud is not None
+        pc_cam = torch.einsum('bxy,hwy->bhwx', torch.linalg.inv(c2w), self.point_cloud)
+        depth = -1 * pc_cam[..., 2].view(pc_cam.shape[0], -1)
+        pc_cam = (pc_cam / pc_cam[..., 2:3])[..., :3]
+        pc_2d = torch.einsum('xy,bhwy->bhwx', self.proj_mtx, pc_cam).clamp(0, 1)
+        pc_2d[..., 0] = pc_2d[..., 0] * (W-1)
+        pc_2d[..., 1] = pc_2d[..., 1] * (H-1)
+        pc_2d = (pc_2d.long()).view(pc_2d.shape[0], -1, pc_2d.shape[-1])
+        mask = torch.zeros([pc_2d.shape[0], H, W], device='cuda')
+        depth_canvas = torch.zeros([pc_2d.shape[0], H, W], device='cuda')
+        for i in range(pc_2d.shape[0]):
+            x = (W - pc_2d[i, :, 0]).clamp(0, W-1)
+            y = (pc_2d[i, :, 1]).clamp(0, H-1)
+            mask[i, y, x] = 1.0
+            depth_canvas[i, y, x] = depth[i]
+        mask = torchvision.transforms.functional.gaussian_blur(mask, 3) > 0
+        return mask, depth_canvas
+    def img2pc_inpaint(self, img, c2w=None, gt_depth=None, mask=None, proj_func=None):
+        W, H = parse_wh(self.cfg.img_resolution)
+        with torch.no_grad():
+            depth = self.geowizard_pipe(
+                img,
+                denoising_steps = 25,
+                ensemble_size = 3,
+                processing_res = 768,
+                match_input_res = False,
+                domain = 'outdoor',
+                color_map = 'Spectral',
+                gt_depth = gt_depth, mask = mask,
+                show_progress_bar = True)['depth_np']
+            ret_depth = depth.copy()
+            depth = torch.from_numpy(depth)[None]
+            depth = torch.nn.functional.interpolate(depth[None], size=(H, W), mode='bilinear', align_corners=True).squeeze()
+        depth = depth.cpu().numpy()
+        if proj_func is None:
+            depth = depth * 20 + 5
+        else:
+            depth = proj_func(depth)
+        depth = depth * -1
+        x, y = np.meshgrid(np.arange(W, dtype=np.float32), np.arange(H, dtype=np.float32), indexing='xy')
+        x = x / float(W-1)
+        y = y / float(H-1)
+        xyz = np.stack((x, y, np.ones_like(x)), 0).transpose(1, 2, 0)
+        xyz[..., 0] = 1 - xyz[..., 0]
+        fov = 60 / 180 * np.pi
+        proj_mtx = np.array([
+            [1 / (2 * np.tan(fov/2)), 0, 1/2],
+            [0, 1 / (2 * np.tan(fov/2)), 1/2],
+            [0, 0, 1],
+        ])
+        self.proj_mtx = torch.from_numpy(proj_mtx).cuda().float()
+        if c2w is None:
+            c2w = np.array([0.0000, -0.3420,  0.9397,  2.3492,  1.0000,  0.0000, -0.0000,  0.0000, -0.0000,  0.9397,  0.3420,  0.8551,  0.0000,  0.0000,  0.0000,  1.0000]).reshape(4, 4)
+            c2w = np.array([0.0000, 0.0000, 1.0000, 2.5000, 1.0000, 0.0000, -0.0000, 0.0000, -0.0000, 1.0000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000]).reshape(4, 4)
+        else:
+            c2w = c2w[0].cpu().numpy()
+        xyz = np.einsum('ab,hwb->hwa', np.linalg.inv(proj_mtx), xyz)
+        xyz = xyz * depth[..., None]
+        xyz = np.concatenate([xyz, np.ones_like(x)[..., None]], 2)
+        xyz = np.einsum('ab,hwb->hwa', c2w, xyz)
+        return xyz, ret_depth
+    def img2pc(self, img, transit_mask=None, fg_transit_l=None, fg_transit_r=None, c2w=None, fg_depth=None):
+        H, W = parse_hw(self.cfg.img_resolution)
+        with torch.no_grad():
+            depth = self.geowizard_pipe(
+                img,
+                denoising_steps = 25,
+                ensemble_size = 3,
+                processing_res = 768,
+                match_input_res = True,
+                domain = 'outdoor',
+                color_map = 'Spectral',
+                show_progress_bar = True)['depth_np']
+            depth = torch.from_numpy(depth)[None]
+            depth = torch.nn.functional.interpolate(depth[None], size=(W, H), mode='bilinear', align_corners=True).squeeze()
+        depth = depth.cpu().numpy()
+        if fg_depth is None:
+            if fg_transit_l is None:
+                l, r = np.quantile(depth, 0.05), np.quantile(depth, 0.95)
+                depth = (depth - l) / (r - l) * 20 * 0.9 + 2 + 5 / 90
+                ret_depth = depth.copy()
+            else:
+                transit_l, transit_r = depth[transit_mask].min(), depth[transit_mask].max()
+                depth = (depth - transit_l) / (transit_r - transit_l) * (fg_transit_r - fg_transit_l) + fg_transit_l
+                ret_depth = depth
+        else:
+            delta = fg_depth[0] - depth
+            delta[~transit_mask] = 0
+            delta = propagate(delta)
+            depth = depth + delta
+            ret_depth = depth.copy()
+        depth = depth * -1
+        x, y = np.meshgrid(np.arange(H, dtype=np.float32), np.arange(W, dtype=np.float32), indexing='xy')
+        x = x / float(H-1)
+        y = y / float(W-1)
+        xyz = np.stack((x, y, np.ones_like(x)), 0).transpose(1, 2, 0)
+        xyz[..., 0] = 1 - xyz[..., 0]
+        fov = 60 / 180 * np.pi
+        proj_mtx = np.array([
+            [1 / (2 * np.tan(fov/2)), 0, 1/2],
+            [0, 1 / (2 * np.tan(fov/2)), 1/2],
+            [0, 0, 1],
+        ])
+        self.proj_mtx = torch.from_numpy(proj_mtx).cuda().float()
+        if c2w is None:
+            c2w = np.array([0.0000, -0.3420,  0.9397,  2.3492,  1.0000,  0.0000, -0.0000,  0.0000, -0.0000,  0.9397,  0.3420,  0.8551,  0.0000,  0.0000,  0.0000,  1.0000]).reshape(4, 4)
+            c2w = np.array([0.0000, 0.0000, 1.0000, 2.5000, 1.0000, 0.0000, -0.0000, 0.0000, -0.0000, 1.0000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000]).reshape(4, 4)
+        else:
+            c2w = c2w[0].cpu().numpy()
+        xyz = np.einsum('ab,hwb->hwa', np.linalg.inv(proj_mtx), xyz)
+        xyz = xyz * depth[..., None]
+        xyz = np.concatenate([xyz, np.ones_like(x)[..., None]], 2)
+        xyz = np.einsum('ab,hwb->hwa', c2w, xyz)
+        return xyz, ret_depth
+    def inpaint(self, img, mask, prompt):
+        # inpaint using base pipe
+        N = 512
+        img = img.convert("RGB").resize((N, N))
+        mask = mask.convert("RGB").resize((N, N))
+        self.base_inpainting_pipe.to("cuda")
+        img = self.base_inpainting_pipe(prompt=prompt, image=img, mask_image=mask, guidance_scale=7.5).images[0]
+        self.base_inpainting_pipe.to("cpu")
+        torch.cuda.empty_cache()
+        # inpaint using sdxl pipe
+        N = 1024
+        img = img.convert("RGB").resize((N, N))
+        mask = mask.convert("RGB").resize((N, N))
+        self.sdxl_inpainting_pipe.to("cuda")
+        img = self.sdxl_inpainting_pipe(prompt=prompt, image=img, mask_image=mask, guidance_scale=7.5, num_inference_steps=20, strength=0.99).images[0]
+        self.sdxl_inpainting_pipe.to("cpu")
+        return img
+    def configure(self) -> None:
+        super().configure()
+        self.active_sh_degree = 0
+        self.max_sh_degree = self.cfg.sh_degree
+        self._xyz = torch.empty(0)
+        self._features_dc = torch.empty(0)
+        self._features_rest = torch.empty(0)
+        self._scaling = torch.empty(0)
+        self._rotation = torch.empty(0)
+        self._opacity = torch.empty(0)
+        self._opacity_mask = None
+        self.max_radii2D = torch.empty(0)
+        self.xyz_gradient_accum = torch.empty(0)
+        self.denom = torch.empty(0)
+        self.noise_ratio = 0.0
+        if self.cfg.pred_normal:
+            self._normal = torch.empty(0)
+        self.optimizer = None
+        self.setup_functions()
+        self.save_path = None
+        self.fixed_xyz = None
+        self.fixed_rot = None
+        if self.cfg.inference_only:
+            return
+        # setup GeoWizard
+        geowizard_checkpoint_path = 'lemonaddie/geowizard'
+        self.geowizard_pipe = DepthNormalEstimationPipeline.from_pretrained(
+            geowizard_checkpoint_path, torch_dtype=torch.float32).to(torch.device("cuda"))
+        self.base_inpainting_pipe = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16)
+        # self.base_inpainting_pipe = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16, safety_checker=None)
+        self.sdxl_inpainting_pipe = AutoPipelineForInpainting.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", torch_dtype=torch.float16, variant="fp16")
+        self.sdxl_inpainting_pipe.scheduler = diffusers.EulerDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
+        if self.cfg.geometry_convert_from.startswith("depth:"):
+            # estimate depth
+            W, H = parse_wh(self.cfg.img_resolution)
+            mask_H, mask_W = H, W
+            if max(H, W) > 1024:
+                mask_H, mask_W = int(H / max(H, W) * 1024), int(W / max(H, W) * 1024)
+            img = self.cfg.geometry_convert_from[len("depth:"):]
+            raw_img = img = Image.open(img).convert("RGB")
+            img = img.resize((W, H))
+            bg_xyz, bg_color = [], []
+            with torch.no_grad():
+                self.predictor.set_image(np.array(raw_img))
+                self.ooi_masks = []
+                total_inp_ooi_masks = None
+                total_ooi_masks = []
+                for i in range(len(self.cfg.ooi_bbox) // 4):
+                    bbox = np.array(self.cfg.ooi_bbox[4*i:4*i+4])
+                    masks, _, _ = self.predictor.predict(
+                        point_coords=None,
+                        point_labels=None,
+                        box=bbox[None, :],
+                        multimask_output=False,
+                    )
+                    # plt.imshow(masks[0])
+                    # plt.savefig(os.path.join(self.save_path, f'mask_{i}.png'))
+                    ooi_masks = np.array(Image.fromarray(masks[0]).resize((W, H), Image.NEAREST))
+                    ooi_masks = (cv2.blur(ooi_masks.astype(np.float32), (5, 5)) > 0)
+                    inp_ooi_masks = (cv2.blur(ooi_masks.astype(np.float32), (7, 7)) > 0)
+                    if i == 0:
+                        total_inp_ooi_masks = inp_ooi_masks
+                    else:
+                        total_inp_ooi_masks += inp_ooi_masks
+                    total_ooi_masks.append(ooi_masks)
+                total_inp_ooi_masks = total_inp_ooi_masks > 0
+                bg_image = self.inpaint(img=img, mask=Image.fromarray(total_inp_ooi_masks), prompt=self.cfg.empty_prompt).resize((W, H))
+                self.bg_image = np.array(bg_image)
+                self.bg_image_mask = np.array(Image.fromarray(total_inp_ooi_masks).resize((W, H)))
+            xyz, depth = self.img2pc_inpaint(img)
+            self.point_cloud = torch.from_numpy(xyz).cuda().float()
+            for ooi_masks in total_ooi_masks:
+                transit_masks = np.logical_and(cv2.blur(ooi_masks.astype(np.float32), (3, 3)) > 0, ~ooi_masks)
+                depth_tensor = torch.from_numpy(depth)[None, None].cuda() * 2 - 1
+                self.ooi_masks.append(torch.tensor(ooi_masks.reshape(-1).astype(np.uint8), device='cuda').float().bool())
+                ooi_masks = cv2.blur(ooi_masks.astype(np.float32), (9, 9)) > 0
+                mask = torch.from_numpy(ooi_masks.astype(np.float32))[None, None].cuda()
+                bg_xyz_pc, _ = self.img2pc_inpaint(bg_image, gt_depth=depth_tensor, mask=1-mask)
+                bg_xyz.append(bg_xyz_pc[ooi_masks][:, :3])
+                bg_color.append(np.array(bg_image)[ooi_masks] / 255)
+            xyz = xyz[..., :3].reshape(-1, 3)
+            color = np.array(img).reshape(-1, 3) / 255
+            additional_pts_num = sum([len(each) for each in bg_xyz])
+            xyz = np.concatenate([xyz, np.concatenate(bg_xyz, 0)], 0)
+            color = np.concatenate([color, np.concatenate(bg_color, 0)], 0)
+            for i in range(len(self.ooi_masks)):
+                self.register_buffer(f"ooi_masks_{i}", torch.cat([self.ooi_masks[i], torch.zeros([additional_pts_num], device='cuda').bool()]) )
+                self.ooi_masks[i] = getattr(self, f"ooi_masks_{i}")
+            self.register_buffer(f"_delete_mask", torch.ones_like(self.ooi_masks[0].float()))
+            # project to 3D space
+            xyz = xyz
+            color = color
+            pcd = BasicPointCloud(
+                points=xyz, colors=color, normals=np.zeros((xyz.shape[0], 3))
+            )
+            self.create_from_pcd(pcd, 10)
+            self.training_setup()
+        elif self.cfg.geometry_convert_from.startswith("shap-e:"):
+            shap_e_guidance = threestudio.find("shap-e-guidance")(
+                self.cfg.shap_e_guidance_config
+            )
+            prompt = self.cfg.geometry_convert_from[len("shap-e:") :]
+            xyz, color = shap_e_guidance(prompt)
+            pcd = BasicPointCloud(
+                points=xyz, colors=color, normals=np.zeros((xyz.shape[0], 3))
+            )
+            self.create_from_pcd(pcd, 10)
+            self.training_setup()
+        # Support Initialization from OpenLRM, Please see https://github.com/Adamdad/threestudio-lrm
+        elif self.cfg.geometry_convert_from.startswith("lrm:"):
+            lrm_guidance = threestudio.find("lrm-guidance")(
+                self.cfg.shap_e_guidance_config
+            )
+            prompt = self.cfg.geometry_convert_from[len("lrm:") :]
+            xyz, color = lrm_guidance(prompt)
+            pcd = BasicPointCloud(
+                points=xyz, colors=color, normals=np.zeros((xyz.shape[0], 3))
+            )
+            self.create_from_pcd(pcd, 10)
+            self.training_setup()
+        elif os.path.exists(self.cfg.geometry_convert_from):
+            threestudio.info(
+                "Loading point cloud from %s" % self.cfg.geometry_convert_from
+            )
+            if self.cfg.geometry_convert_from.endswith(".ckpt"):
+                ckpt_dict = torch.load(self.cfg.geometry_convert_from)
+                num_pts = ckpt_dict["state_dict"]["geometry._xyz"].shape[0]
+                pcd = BasicPointCloud(
+                    points=np.zeros((num_pts, 3)),
+                    colors=np.zeros((num_pts, 3)),
+                    normals=np.zeros((num_pts, 3)),
+                )
+                self.create_from_pcd(pcd, 10)
+                self.training_setup()
+                new_ckpt_dict = {}
+                for key in self.state_dict():
+                    if ckpt_dict["state_dict"].__contains__("geometry." + key):
+                        new_ckpt_dict[key] = ckpt_dict["state_dict"]["geometry." + key]
+                    else:
+                        new_ckpt_dict[key] = self.state_dict()[key]
+                self.load_state_dict(new_ckpt_dict)
+            elif self.cfg.geometry_convert_from.endswith(".ply"):
+                if self.cfg.load_ply_only_vertex:
+                    plydata = PlyData.read(self.cfg.geometry_convert_from)
+                    vertices = plydata["vertex"]
+                    positions = np.vstack(
+                        [vertices["x"], vertices["y"], vertices["z"]]
+                    ).T
+                    if vertices.__contains__("red"):
+                        colors = (
+                            np.vstack(
+                                [vertices["red"], vertices["green"], vertices["blue"]]
+                            ).T
+                            / 255.0
+                        )
+                    else:
+                        shs = np.random.random((positions.shape[0], 3)) / 255.0
+                        C0 = 0.28209479177387814
+                        colors = shs * C0 + 0.5
+                    normals = np.zeros_like(positions)
+                    pcd = BasicPointCloud(
+                        points=positions, colors=colors, normals=normals
+                    )
+                    self.create_from_pcd(pcd, 10)
+                else:
+                    self.load_ply(self.cfg.geometry_convert_from)
+                self.training_setup()
+        else:
+            threestudio.info("Geometry not found, initilization with random points")
+            num_pts = self.cfg.init_num_pts
+            phis = np.random.random((num_pts,)) * 2 * np.pi
+            costheta = np.random.random((num_pts,)) * 2 - 1
+            thetas = np.arccos(costheta)
+            mu = np.random.random((num_pts,))
+            radius = self.cfg.pc_init_radius * np.cbrt(mu)
+            x = radius * np.sin(thetas) * np.cos(phis)
+            y = radius * np.sin(thetas) * np.sin(phis)
+            z = radius * np.cos(thetas)
+            xyz = np.stack((x, y, z), axis=1)
+            shs = np.random.random((num_pts, 3)) / 255.0
+            C0 = 0.28209479177387814
+            color = shs * C0 + 0.5
+            pcd = BasicPointCloud(
+                points=xyz, colors=color, normals=np.zeros((num_pts, 3))
+            )
+            self.create_from_pcd(pcd, 10)
+            self.training_setup()
+    def add_pc_from_novel_view(self, rgb, mask, depth, c2w, save_path=None):
+        W, H = parse_wh(self.cfg.img_resolution)
+        # depth estimation -> add points.
+        mask = fill_mask(mask)
+        mask_array = np.array(mask)
+        blur_mask = Image.fromarray(cv2.blur(np.array(mask).astype(np.float32), (7, 7)) > 0)
+        res = self.inpaint(img=rgb, mask=blur_mask, prompt=self.side_prompt)
+        depth_unaligned = self.geowizard_pipe(
+                res,
+                denoising_steps = 25,
+                ensemble_size = 3,
+                processing_res = 768,
+                match_input_res = False,
+                domain = 'outdoor',
+                color_map = 'Spectral',
+                gt_depth = None, mask = None,
+                show_progress_bar = True)['depth_np']
+        prev_depth = depth_unaligned[~np.array(mask.resize((768,768)))]
+        # inpaint the depth map
+        depth_array = depth[0].cpu().numpy().astype(np.uint8)
+        inpaint_mask = (~mask_array & (depth_array == 0)).astype(np.uint8)
+        # inpaint_mask = np.logical_and(~np.array(mask.resize((512, 512), Image.NEAREST)) , depth[0].cpu().numpy().astype(np.uint8)==0 ).astype(np.uint8)
+        l, r = depth[depth>0].min().item(), depth.max().item()
+        depth = (depth - l) / (r - l) * 255
+        depth = cv2.inpaint(depth_array, inpaint_mask, 3, cv2.INPAINT_TELEA)
+        depth = torch.tensor(depth)[None].cuda().float() / 255
+        reproj_func = lambda x: (x - prev_depth.min().item()) / (prev_depth.max().item() - prev_depth.min().item()) * (r-l) + l
+        depth = depth * (prev_depth.max() - prev_depth.min()) + prev_depth.min()
+        depth_tensor = torch.nn.functional.interpolate(depth[None].cuda(), 768, mode='nearest') * 2 - 1
+        _masks = cv2.blur(np.array(mask.resize((768, 768))).astype(float), (20, 20)) > 0
+        mask_tensor = torch.from_numpy(_masks.astype(np.float32))[None, None].cuda()
+        bg_xyz_pc, _ = self.img2pc_inpaint(res, gt_depth=depth_tensor, mask=1-mask_tensor, proj_func=reproj_func, c2w=c2w)
+        new_xyz = bg_xyz_pc[mask_array][:, :3]
+        res = res.resize((W, H))
+        new_color = np.array(res)[mask_array] / 255
+        pcd = BasicPointCloud(points=new_xyz, colors=new_color, normals=np.zeros((new_xyz.shape[0], 3)))
+        self.merge_from_pcd(pcd, 10)
+        save_pc(save_path, new_xyz, new_color)
+        return res, mask
+    @property
+    def get_scaling(self):
+        if self.cfg.sphere:
+            return self.scaling_activation(
+                torch.mean(self._scaling, dim=-1).unsqueeze(-1).repeat(1, 3)
+            ).clip(0, self.cfg.max_scaling)
+        return self.scaling_activation(self._scaling).clip(0, self.cfg.max_scaling)
+    @property
+    def get_rotation(self):
+        return self.rotation_activation(self._rotation)
+    @property
+    def get_language_feature(self):
+        return self._language_feature
+    @property
+    def get_xyz(self):
+        ret = self._xyz
+        if self.noise_ratio > 0.0:
+           offset = torch.zeros_like(ret)
+           for idx in range(len(self.ooi_masks)):
+               ooi_masks = getattr(self, f"ooi_masks_{idx}")
+               offset[ooi_masks] = torch.rand(3, device='cuda') * self.noise_ratio
+        return ret
+    @property
+    def get_features(self):
+        features_dc = self._features_dc
+        features_dc = features_dc.clip(-self.color_clip, self.color_clip)
+        features_rest = self._features_rest
+        return torch.cat((features_dc, features_rest), dim=1)
+    @property
+    def get_opacity(self):
+        if self._opacity_mask is None:
+            ret = self.opacity_activation(self._opacity)
+        else:
+            ret = self.opacity_activation(self._opacity) * self._opacity_mask.unsqueeze(-1)
+        if self._delete_mask is None:
+            return ret
+        else:
+            return ret * self._delete_mask.unsqueeze(-1)
+    @property
+    def get_normal(self):
+        if self.cfg.pred_normal:
+            return self._normal
+        else:
+            raise ValueError("Normal is not predicted")
+    def recover_xyzrot(self):
+        self._xyz = torch.nn.Parameter(self.fixed_xyz)
+        self._rotation = torch.nn.Parameter(self.fixed_rot)
+    def random_rotate(self, rotate_aug_scale, apply_rotate):
+        if self.fixed_xyz is None:
+            self.fixed_xyz = self.get_xyz.data
+            self.fixed_rot = self.get_rotation.data
+        if apply_rotate:
+            ooi_mask = self.ooi_masks_0.view(-1).byte().to(device='cuda').float()
+            rotate = random.randint(-rotate_aug_scale, rotate_aug_scale)
+            rot_matrix = rotation_matrix(0, 0, rotate).cuda()
+            prev_xyz = self.fixed_xyz.clone()
+            ooi_xyz = prev_xyz[ooi_mask.bool()]
+            mean = ooi_xyz.mean(0)
+            ooi_xyz = ooi_xyz - mean
+            after_xyz = torch.einsum('ab,nb->na', rot_matrix, ooi_xyz) + mean
+            prev_xyz[ooi_mask.bool()] = after_xyz
+            self._xyz = torch.nn.Parameter(prev_xyz)
+            prev_rotation = self.fixed_rot.clone()
+            prev_rotation_mtx = build_rotation(prev_rotation)
+            after_rotation_mtx = torch.einsum('ab,nbc->nac', rot_matrix, prev_rotation_mtx)
+            after_rotation = torch.from_numpy(R.from_matrix(after_rotation_mtx.detach().cpu()).as_quat()).cuda().float()
+            after_rotation = torch.einsum('ab,nb->na', REORDER_MTX, after_rotation)
+            prev_rotation[ooi_mask.bool()] = after_rotation[ooi_mask.bool()]
+            self._rotation = torch.nn.Parameter(prev_rotation)
+        else:
+            self.recover_xyzrot()
+    def get_covariance(self, scaling_modifier=1):
+        return self.covariance_activation(
+            self.get_scaling, scaling_modifier, self._rotation
+        )
+    def create_from_pcd(self, pcd: BasicPointCloud, spatial_lr_scale: float):
+        self.spatial_lr_scale = spatial_lr_scale
+        fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda()
+        fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda())
+        features = (
+            torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2))
+            .float()
+            .cuda()
+        )
+        features[:, :3, 0] = fused_color
+        features[:, 3:, 1:] = 0.0
+        threestudio.info(
+            f"Number of points at initialisation:{fused_point_cloud.shape[0]}"
+        )
+        dist2 = torch.clamp_min(
+            distCUDA2(torch.from_numpy(np.asarray(pcd.points)).float().cuda()),
+            0.0000001,
+        )
+        scales = torch.log(torch.sqrt(dist2))[..., None].repeat(1, 3)
+        rots = torch.zeros((fused_point_cloud.shape[0], 4), device="cuda")
+        rots[:, 0] = 1
+        opacities = inverse_sigmoid(
+            self.cfg.opacity_init
+            * torch.ones(
+                (fused_point_cloud.shape[0], 1), dtype=torch.float, device="cuda"
+            )
+        )
+        self._xyz = nn.Parameter(fused_point_cloud.requires_grad_(True))
+        self._features_dc = nn.Parameter(
+            features[:, :, 0:1].transpose(1, 2).contiguous().requires_grad_(True)
+        )
+        self._features_rest = nn.Parameter(
+            features[:, :, 1:].transpose(1, 2).contiguous().requires_grad_(True)
+        )
+        self._scaling = nn.Parameter(scales.requires_grad_(True))
+        self._rotation = nn.Parameter(rots.requires_grad_(True))
+        self._opacity = nn.Parameter(opacities.requires_grad_(True))
+        if self.cfg.pred_normal:
+            normals = torch.zeros((fused_point_cloud.shape[0], 3), device="cuda")
+            self._normal = nn.Parameter(normals.requires_grad_(True))
+        self.max_radii2D = torch.zeros((self._xyz.shape[0]), device="cuda")
+        self.fused_point_cloud = fused_point_cloud.cpu().clone().detach()
+        self.features = features.cpu().clone().detach()
+        self.scales = scales.cpu().clone().detach()
+        self.rots = rots.cpu().clone().detach()
+        self.opacities = opacities.cpu().clone().detach()
+        language_feature = torch.zeros((self._xyz.shape[0], 3), device="cuda")
+        self._language_feature = torch.nn.Parameter(language_feature.requires_grad_(True))
+    def merge_from_pcd(self, pcd: BasicPointCloud, spatial_lr_scale: float):
+        self.spatial_lr_scale = spatial_lr_scale
+        fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda()
+        fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda())
+        features = (
+            torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2))
+            .float()
+            .cuda()
+        )
+        features[:, :3, 0] = fused_color
+        features[:, 3:, 1:] = 0.0
+        threestudio.info(
+            f"Number of points at merging:{fused_point_cloud.shape[0]}"
+        )
+        dist2 = torch.clamp_min(
+            distCUDA2(torch.from_numpy(np.asarray(pcd.points)).float().cuda()),
+            0.0000001,
+        )
+        scales = torch.log(torch.sqrt(dist2))[..., None].repeat(1, 3)
+        rots = torch.zeros((fused_point_cloud.shape[0], 4), device="cuda")
+        rots[:, 0] = 1
+        opacities = inverse_sigmoid(
+            self.cfg.opacity_init
+            * torch.ones(
+                (fused_point_cloud.shape[0], 1), dtype=torch.float, device="cuda"
+            )
+        )
+        self.densification_postfix(
+            fused_point_cloud,
+            features[:, :, 0:1].transpose(1, 2).contiguous(),
+            features[:, :, 1:].transpose(1, 2).contiguous(),
+            opacities,
+            scales,
+            rots,
+            None,
+            torch.zeros((fused_point_cloud.shape[0], 3), device="cuda")
+        )
+        for idx in range(len(self.ooi_masks)):
+            # self.ooi_masks[idx] = torch.cat([self.ooi_masks[idx], torch.ones([fused_point_cloud.shape[0]], device='cuda') > 0])
+            self.register_buffer(f"ooi_masks_{idx}", torch.cat([getattr(self, f"ooi_masks_{idx}"), torch.zeros([fused_point_cloud.shape[0]], device='cuda').bool()]) )
+            self.ooi_masks[idx] = getattr(self, f"ooi_masks_{idx}")
+        self.register_buffer(f"_delete_mask", torch.ones_like(self.ooi_masks[0].float()))
+        # self._xyz = torch.nn.Parameter(torch.cat([self._xyz, fused_point_cloud],0),requires_grad=True)
+        # self._features_dc = torch.nn.Parameter(torch.cat([self._features_dc, features[:, :, 0:1].transpose(1, 2).contiguous()],0),requires_grad=True)
+        # self._features_rest = torch.nn.Parameter(torch.cat([self._features_rest, features[:, :, 1:].transpose(1, 2).contiguous()],0),requires_grad=True)
+        # self._scaling = torch.nn.Parameter(torch.cat([self._scaling, scales],0),requires_grad=True)
+        # self._rotation = torch.nn.Parameter(torch.cat([self._rotation, rots],0),requires_grad=True)
+        # self._opacity = torch.nn.Parameter(torch.cat([self._opacity, opacities],0),requires_grad=True)
+        # if self.cfg.pred_normal:
+        #     normals = torch.zeros((fused_point_cloud.shape[0], 3), device="cuda")
+        #     self._normal = nn.Parameter(normals.requires_grad_(True))
+        # self.max_radii2D = torch.zeros((self._xyz.shape[0]), device="cuda")
+        # self.fused_point_cloud = fused_point_cloud.cpu().clone().detach()
+        # self.features = features.cpu().clone().detach()
+        # self.scales = scales.cpu().clone().detach()
+        # self.rots = rots.cpu().clone().detach()
+        # self.opacities = opacities.cpu().clone().detach()
+        # language_feature = torch.zeros((fused_point_cloud.shape[0], 3), device="cuda")
+        # self._language_feature = torch.nn.Parameter(torch.cat([self._language_feature, language_feature], 0), requires_grad=True)
+        # self.training_setup()
+    def lang_training_setup(self):
+        training_args = self.cfg
+        l = [
+                {'params': [self._language_feature], 'lr': C(training_args.lang_lr, 0, 0)},
+            ]
+        self._xyz.requires_grad_(False)
+        self._features_dc.requires_grad_(False)
+        self._features_rest.requires_grad_(False)
+        self._scaling.requires_grad_(False)
+        self._rotation.requires_grad_(False)
+        self._opacity.requires_grad_(False)
+        self._language_feature.requires_grad_(True)
+        # self.lang_optimizer = torch.optim.SGD(l, lr=0.0)
+        self.lang_optimizer = torch.optim.Adam(l, lr=0.0, eps=1e-15, betas=(self.cfg.lang_beta_1, self.cfg.lang_beta_2))
+        self.optimize_params = ["lang"]
+        self.optimize_list = l
+    def after_lang(self):
+        self._xyz.requires_grad_(True)
+        self._features_dc.requires_grad_(True)
+        self._features_rest.requires_grad_(True)
+        self._scaling.requires_grad_(True)
+        self._rotation.requires_grad_(True)
+        self._opacity.requires_grad_(True)
+        self._language_feature.requires_grad_(False)
+    def training_setup(self):
+        self._xyz.requires_grad_(True)
+        self._features_dc.requires_grad_(True)
+        self._features_rest.requires_grad_(True)
+        self._scaling.requires_grad_(True)
+        self._rotation.requires_grad_(True)
+        self._opacity.requires_grad_(True)
+        self._language_feature.requires_grad_(False)
+        training_args = self.cfg
+        self.xyz_gradient_accum = torch.zeros((self.get_xyz.shape[0], 1), device="cuda")
+        self.denom = torch.zeros((self.get_xyz.shape[0], 1), device="cuda")
+        l = [
+            {
+                "params": [self._xyz],
+                "lr": C(training_args.position_lr, 0, 0),
+                "name": "xyz",
+            },
+            {
+                "params": [self._features_dc],
+                "lr": C(training_args.feature_lr, 0, 0),
+                "name": "f_dc",
+            },
+            {
+                "params": [self._features_rest],
+                "lr": C(training_args.feature_lr, 0, 0) / 20.0,
+                "name": "f_rest",
+            },
+            {
+                "params": [self._opacity],
+                "lr": C(training_args.opacity_lr, 0, 0),
+                "name": "opacity",
+            },
+            {
+                "params": [self._scaling],
+                "lr": C(training_args.scaling_lr, 0, 0),
+                "name": "scaling",
+            },
+            {
+                "params": [self._rotation],
+                "lr": C(training_args.rotation_lr, 0, 0),
+                "name": "rotation",
+            },
+            {'params': [self._language_feature], 'lr': C(training_args.lang_lr, 0, 0), "name": "language_feature"},
+        ]
+        if self.cfg.pred_normal:
+            l.append(
+                {
+                    "params": [self._normal],
+                    "lr": C(training_args.normal_lr, 0, 0),
+                    "name": "normal",
+                },
+            )
+        self.optimize_params = [
+            "xyz",
+            "f_dc",
+            "f_rest",
+            "opacity",
+            "scaling",
+            "rotation",
+            "language_feature"
+        ]
+        self.optimize_list = l
+        self.optimizer = torch.optim.Adam(l, lr=0.0, eps=1e-15)
+        self.lang_optimizer = None
+    def merge_optimizer(self, net_optimizer):
+        l = self.optimize_list
+        for param in net_optimizer.param_groups:
+            l.append(
+                {
+                    "params": param["params"],
+                    "lr": param["lr"],
+                }
+            )
+        self.optimizer = torch.optim.Adam(l, lr=0.0)
+        return self.optimizer
+    def update_learning_rate(self, iteration):
+        """Learning rate scheduling per step"""
+        for param_group in self.optimizer.param_groups:
+            if not ("name" in param_group):
+                continue
+            if param_group["name"] == "xyz":
+                param_group["lr"] = C(
+                    self.cfg.position_lr, 0, iteration, interpolation="exp"
+                )
+            if param_group["name"] == "scaling":
+                param_group["lr"] = C(
+                    self.cfg.scaling_lr, 0, iteration, interpolation="exp"
+                )
+            if param_group["name"] == "f_dc":
+                param_group["lr"] = C(
+                    self.cfg.feature_lr, 0, iteration, interpolation="exp"
+                )
+            if param_group["name"] == "f_rest":
+                param_group["lr"] = (
+                    C(self.cfg.feature_lr, 0, iteration, interpolation="exp") / 20.0
+                )
+            if param_group["name"] == "opacity":
+                param_group["lr"] = C(
+                    self.cfg.opacity_lr, 0, iteration, interpolation="exp"
+                )
+            if param_group["name"] == "rotation":
+                param_group["lr"] = C(
+                    self.cfg.rotation_lr, 0, iteration, interpolation="exp"
+                )
+            if param_group["name"] == "normal":
+                param_group["lr"] = C(
+                    self.cfg.normal_lr, 0, iteration, interpolation="exp"
+                )
+        if self.lang_optimizer is not None:
+            for param_group in self.lang_optimizer.param_groups:
+                if not ("name" in param_group):
+                    continue
+                if param_group["name"] == "language_feature":
+                    param_group["lr"] = C(
+                        self.cfg.lang_lr, 0, iteration, interpolation="exp"
+                    )
+        self.color_clip = C(self.cfg.color_clip, 0, iteration)
+    def reset_opacity(self):
+        # opacities_new = inverse_sigmoid(torch.min(self.get_opacity, torch.ones_like(self.get_opacity)*0.01))
+        opacities_new = inverse_sigmoid(self.get_opacity * 0.9)
+        optimizable_tensors = self.replace_tensor_to_optimizer(opacities_new, "opacity")
+        self._opacity = optimizable_tensors["opacity"]
+    def to(self, device="cpu"):
+        self._xyz = self._xyz.to(device)
+        self._features_dc = self._features_dc.to(device)
+        self._features_rest = self._features_rest.to(device)
+        self._opacity = self._opacity.to(device)
+        self._scaling = self._scaling.to(device)
+        self._rotation = self._rotation.to(device)
+        self._normal = self._normal.to(device)
+        self._language_feature = self._language_feature.to(device)
+    def replace_tensor_to_optimizer(self, tensor, name):
+        optimizable_tensors = {}
+        for group in self.optimizer.param_groups:
+            if ("name" in group) and group["name"] == name:
+                stored_state = self.optimizer.state.get(group["params"][0], None)
+                stored_state["exp_avg"] = torch.zeros_like(tensor)
+                stored_state["exp_avg_sq"] = torch.zeros_like(tensor)
+                del self.optimizer.state[group["params"][0]]
+                group["params"][0] = nn.Parameter(tensor.requires_grad_(True))
+                self.optimizer.state[group["params"][0]] = stored_state
+                optimizable_tensors[group["name"]] = group["params"][0]
+        return optimizable_tensors
+    def _prune_optimizer(self, mask):
+        optimizable_tensors = {}
+        for group in self.optimizer.param_groups:
+            if ("name" in group) and (group["name"] in self.optimize_params):
+                stored_state = self.optimizer.state.get(group["params"][0], None)
+                if stored_state is not None:
+                    stored_state["exp_avg"] = stored_state["exp_avg"][mask]
+                    stored_state["exp_avg_sq"] = stored_state["exp_avg_sq"][mask]
+                    del self.optimizer.state[group["params"][0]]
+                    group["params"][0] = nn.Parameter(
+                        (group["params"][0][mask].requires_grad_(True))
+                    )
+                    self.optimizer.state[group["params"][0]] = stored_state
+                    optimizable_tensors[group["name"]] = group["params"][0]
+                else:
+                    group["params"][0] = nn.Parameter(
+                        group["params"][0][mask].requires_grad_(True)
+                    )
+                    optimizable_tensors[group["name"]] = group["params"][0]
+        return optimizable_tensors
+    def prune_points(self, mask):
+        valid_points_mask = ~mask
+        optimizable_tensors = self._prune_optimizer(valid_points_mask)
+        self._xyz = optimizable_tensors["xyz"]
+        self._features_dc = optimizable_tensors["f_dc"]
+        self._features_rest = optimizable_tensors["f_rest"]
+        self._opacity = optimizable_tensors["opacity"]
+        self._scaling = optimizable_tensors["scaling"]
+        self._rotation = optimizable_tensors["rotation"]
+        self._language_feature = optimizable_tensors["language_feature"]
+        if self.cfg.pred_normal:
+            self._normal = optimizable_tensors["normal"]
+        self.xyz_gradient_accum = self.xyz_gradient_accum[valid_points_mask]
+        self.denom = self.denom[valid_points_mask]
+        self.max_radii2D = self.max_radii2D[valid_points_mask]
+    def cat_tensors_to_optimizer(self, tensors_dict):
+        optimizable_tensors = {}
+        for group in self.optimizer.param_groups:
+            if ("name" in group) and (group["name"] in self.optimize_params):
+                extension_tensor = tensors_dict[group["name"]]
+                stored_state = self.optimizer.state.get(group["params"][0], None)
+                if stored_state is not None:
+                    stored_state["exp_avg"] = torch.cat(
+                        (stored_state["exp_avg"], torch.zeros_like(extension_tensor)),
+                        dim=0,
+                    )
+                    stored_state["exp_avg_sq"] = torch.cat(
+                        (
+                            stored_state["exp_avg_sq"],
+                            torch.zeros_like(extension_tensor),
+                        ),
+                        dim=0,
+                    )
+                    del self.optimizer.state[group["params"][0]]
+                    group["params"][0] = nn.Parameter(
+                        torch.cat(
+                            (group["params"][0], extension_tensor), dim=0
+                        ).requires_grad_(True)
+                    )
+                    self.optimizer.state[group["params"][0]] = stored_state
+                    optimizable_tensors[group["name"]] = group["params"][0]
+                else:
+                    group["params"][0] = nn.Parameter(
+                        torch.cat(
+                            (group["params"][0], extension_tensor), dim=0
+                        ).requires_grad_(True)
+                    )
+                    optimizable_tensors[group["name"]] = group["params"][0]
+        return optimizable_tensors
+    def densification_postfix(
+        self,
+        new_xyz,
+        new_features_dc,
+        new_features_rest,
+        new_opacities,
+        new_scaling,
+        new_rotation,
+        new_normal=None,
+        new_language_feature=None
+    ):
+        d = {
+            "xyz": new_xyz,
+            "f_dc": new_features_dc,
+            "f_rest": new_features_rest,
+            "opacity": new_opacities,
+            "scaling": new_scaling,
+            "rotation": new_rotation,
+            "language_feature": new_language_feature,
+        }
+        if self.cfg.pred_normal:
+            d.update({"normal": new_normal})
+        optimizable_tensors = self.cat_tensors_to_optimizer(d)
+        self._xyz = optimizable_tensors["xyz"]
+        self._features_dc = optimizable_tensors["f_dc"]
+        self._features_rest = optimizable_tensors["f_rest"]
+        self._opacity = optimizable_tensors["opacity"]
+        self._scaling = optimizable_tensors["scaling"]
+        self._rotation = optimizable_tensors["rotation"]
+        self._language_feature = optimizable_tensors["language_feature"]
+        if self.cfg.pred_normal:
+            self._normal = optimizable_tensors["normal"]
+        self.xyz_gradient_accum = torch.zeros((self._xyz.shape[0], 1), device="cuda")
+        self.denom = torch.zeros((self._xyz.shape[0], 1), device="cuda")
+        self.max_radii2D = torch.zeros((self._xyz.shape[0]), device="cuda")
+    def densify_and_split(self, grads, grad_threshold, N=2):
+        n_init_points = self._xyz.shape[0]
+        # Extract points that satisfy the gradient condition
+        padded_grad = torch.zeros((n_init_points), device="cuda")
+        padded_grad[: grads.shape[0]] = grads.squeeze()
+        selected_pts_mask = torch.where(padded_grad >= grad_threshold, True, False)
+        selected_pts_mask = torch.logical_and(
+            selected_pts_mask,
+            torch.norm(self.get_scaling, dim=1) > self.cfg.split_thresh,
+        )
+        # divide N to enhance robustness
+        stds = self.get_scaling[selected_pts_mask].repeat(N, 1) / N
+        means = torch.zeros((stds.size(0), 3), device="cuda")
+        samples = torch.normal(mean=means, std=stds)
+        rots = build_rotation(self._rotation[selected_pts_mask]).repeat(N, 1, 1)
+        new_xyz = torch.bmm(rots, samples.unsqueeze(-1)).squeeze(-1) + self._xyz[
+            selected_pts_mask
+        ].repeat(N, 1)
+        new_scaling = self.scaling_inverse_activation(
+            self.get_scaling[selected_pts_mask].repeat(N, 1) / (0.8 * N)
+        )
+        new_rotation = self._rotation[selected_pts_mask].repeat(N, 1)
+        new_features_dc = self._features_dc[selected_pts_mask].repeat(N, 1, 1)
+        new_features_rest = self._features_rest[selected_pts_mask].repeat(N, 1, 1)
+        new_opacity = self._opacity[selected_pts_mask].repeat(N, 1)
+        new_language_feature = self._language_feature[selected_pts_mask].repeat(N,1)
+        if self.cfg.pred_normal:
+            new_normal = self._normal[selected_pts_mask].repeat(N, 1)
+        else:
+            new_normal = None
+        self.densification_postfix(
+            new_xyz,
+            new_features_dc,
+            new_features_rest,
+            new_opacity,
+            new_scaling,
+            new_rotation,
+            new_normal,
+            new_language_feature
+        )
+        prune_filter = torch.cat(
+            (
+                selected_pts_mask,
+                torch.zeros(N * selected_pts_mask.sum(), device="cuda", dtype=bool),
+            )
+        )
+        self.prune_points(prune_filter)
+    def densify_and_clone(self, grads, grad_threshold):
+        # Extract points that satisfy the gradient condition
+        selected_pts_mask = torch.where(
+            torch.norm(grads, dim=-1) >= grad_threshold, True, False
+        )
+        selected_pts_mask = torch.logical_and(
+            selected_pts_mask,
+            torch.norm(self.get_scaling, dim=1) <= self.cfg.split_thresh,
+        )
+        new_xyz = self._xyz[selected_pts_mask]
+        new_features_dc = self._features_dc[selected_pts_mask]
+        new_features_rest = self._features_rest[selected_pts_mask]
+        new_opacities = self._opacity[selected_pts_mask]
+        new_scaling = self._scaling[selected_pts_mask]
+        new_rotation = self._rotation[selected_pts_mask]
+        new_language_feature = self._language_feature[selected_pts_mask]
+        if self.cfg.pred_normal:
+            new_normal = self._normal[selected_pts_mask]
+        else:
+            new_normal = None
+        self.densification_postfix(
+            new_xyz,
+            new_features_dc,
+            new_features_rest,
+            new_opacities,
+            new_scaling,
+            new_rotation,
+            new_normal,
+            new_language_feature
+        )
+    def densify(self, max_grad):
+        grads = self.xyz_gradient_accum / self.denom
+        grads[grads.isnan()] = 0.0
+        self.densify_and_clone(grads, max_grad)
+        self.densify_and_split(grads, max_grad)
+    def prune(self, min_opacity, max_screen_size):
+        prune_mask = (self.get_opacity < min_opacity).squeeze()
+        if self.cfg.prune_big_points:
+            big_points_vs = self.max_radii2D > (torch.mean(self.max_radii2D) * 3)
+            prune_mask = torch.logical_or(prune_mask, big_points_vs)
+        self.prune_points(prune_mask)
+        torch.cuda.empty_cache()
+    def add_densification_stats(self, viewspace_point_tensor, update_filter):
+        self.xyz_gradient_accum[update_filter] += torch.norm(
+            viewspace_point_tensor.grad[update_filter, :2], dim=-1, keepdim=True
+        )
+        self.denom[update_filter] += 1
+    @torch.no_grad()
+    def update_states(
+        self,
+        iteration,
+        visibility_filter,
+        radii,
+        viewspace_point_tensor,
+    ):
+        if self._xyz.shape[0] >= self.cfg.max_num + 100:
+            prune_mask = torch.randperm(self._xyz.shape[0]).to(self._xyz.device)
+            prune_mask = prune_mask > self.cfg.max_num
+            self.prune_points(prune_mask)
+            return
+        # Keep track of max radii in image-space for pruning
+        # loop over batch
+        bs = len(viewspace_point_tensor)
+        for i in range(bs):
+            radii_i = radii[i]
+            visibility_filter_i = visibility_filter[i]
+            viewspace_point_tensor_i = viewspace_point_tensor[i]
+            self.max_radii2D = torch.max(self.max_radii2D, radii_i.float())
+            self.add_densification_stats(viewspace_point_tensor_i, visibility_filter_i)
+        if (
+            iteration > self.cfg.prune_from_iter
+            and iteration < self.cfg.prune_until_iter
+            and iteration % self.cfg.prune_interval == 0
+        ):
+            self.prune(self.cfg.min_opac_prune, self.cfg.radii2d_thresh)
+            if iteration % self.cfg.opacity_reset_interval == 0:
+                self.reset_opacity()
+        if (
+            iteration > self.cfg.densify_from_iter
+            and iteration < self.cfg.densify_until_iter
+            and iteration % self.cfg.densification_interval == 0
+        ):
+            self.densify(self.cfg.densify_grad_threshold)

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/geometry/gaussian_dynamic.py ADDED Viewed

	@@ -0,0 +1,77 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import math
+import os
+import random
+import sys
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import NamedTuple
+import numpy as np
+import threestudio
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from plyfile import PlyData, PlyElement
+from simple_knn._C import distCUDA2
+from threestudio.models.geometry.base import BaseGeometry
+from threestudio.utils.misc import C
+from threestudio.utils.typing import *
+from .gaussian_base import GaussianBaseModel
+@threestudio.register("gaussian-splatting-dynamic")
+class GaussianDynamicModel(GaussianBaseModel):
+    @dataclass
+    class Config(GaussianBaseModel.Config):
+        flow: bool = True
+        num_frames: int = 10
+        delta_pos_lr: float = 0.001
+        delta_rot_lr: float = 0.0001
+    cfg: Config
+    def configure(self) -> None:
+        super().configure()
+        self._delta_xyz = torch.empty(0)
+        self._delta_rot = torch.empty(0)
+        self.time_index = 0
+    def training_setup(self):
+        super().training_setup()
+        l = self.optimize_list
+        training_args = self.cfg
+        l.append(
+            {
+                "params": [self._delta_xyz],
+                "lr": C(training_args.delta_pos_lr, 0, 0),
+                "name": "normal",
+            },
+        )
+        l.append(
+            {
+                "params": [self._delta_rot],
+                "lr": C(training_args.delta_rot_lr, 0, 0),
+                "name": "normal",
+            },
+        )
+    @property
+    def get_rotation(self):
+        return self.rotation_activation(
+            self._rotation + self._delta_rot[self.time_index]
+        )
+    @property
+    def get_xyz(self):
+        return self._xyz + self._delta_xyz[self.time_index]

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/geometry/gaussian_io.py ADDED Viewed

	@@ -0,0 +1,327 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import os
+import random
+import sys
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import NamedTuple
+import mcubes
+import numpy as np
+import threestudio
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from plyfile import PlyData, PlyElement
+from simple_knn._C import distCUDA2
+from threestudio.models.geometry.base import BaseGeometry
+from threestudio.models.mesh import Mesh
+from threestudio.utils.typing import *
+from tqdm import tqdm
+from .mesh_utils import *
+def gaussian_3d_coeff(xyzs, covs):
+    # xyzs: [N, 3]
+    # covs: [N, 6]
+    x, y, z = xyzs[:, 0], xyzs[:, 1], xyzs[:, 2]
+    a, b, c, d, e, f = (
+        covs[:, 0],
+        covs[:, 1],
+        covs[:, 2],
+        covs[:, 3],
+        covs[:, 4],
+        covs[:, 5],
+    )
+    # eps must be small enough !!!
+    inv_det = 1 / (
+        a * d * f + 2 * e * c * b - e**2 * a - c**2 * d - b**2 * f + 1e-24
+    )
+    inv_a = (d * f - e**2) * inv_det
+    inv_b = (e * c - b * f) * inv_det
+    inv_c = (e * b - c * d) * inv_det
+    inv_d = (a * f - c**2) * inv_det
+    inv_e = (b * c - e * a) * inv_det
+    inv_f = (a * d - b**2) * inv_det
+    power = (
+        -0.5 * (x**2 * inv_a + y**2 * inv_d + z**2 * inv_f)
+        - x * y * inv_b
+        - x * z * inv_c
+        - y * z * inv_e
+    )
+    power[power > 0] = -1e10  # abnormal values... make weights 0
+    return torch.exp(power)
+@threestudio.register("gaussian-splatting-io")
+class GaussianIO:
+    def construct_list_of_attributes(self):
+        l = ["x", "y", "z", "nx", "ny", "nz"]
+        # All channels except the 3 DC
+        for i in range(self._features_dc.shape[1] * self._features_dc.shape[2]):
+            l.append("f_dc_{}".format(i))
+        for i in range(self._features_rest.shape[1] * self._features_rest.shape[2]):
+            l.append("f_rest_{}".format(i))
+        l.append("opacity")
+        for i in range(self._scaling.shape[1]):
+            l.append("scale_{}".format(i))
+        for i in range(self._rotation.shape[1]):
+            l.append("rot_{}".format(i))
+        return l
+    def save_ply(self, path):
+        xyz = self._xyz.detach().cpu().numpy()
+        normals = np.zeros_like(xyz)
+        f_dc = (
+            self._features_dc.detach()
+            .transpose(1, 2)
+            .flatten(start_dim=1)
+            .contiguous()
+            .cpu()
+            .numpy()
+        )
+        f_rest = (
+            self._features_rest.detach()
+            .transpose(1, 2)
+            .flatten(start_dim=1)
+            .contiguous()
+            .cpu()
+            .numpy()
+        )
+        opacities = self._opacity.detach().cpu().numpy()
+        scale = self._scaling.detach().cpu().numpy()
+        rotation = self._rotation.detach().cpu().numpy()
+        dtype_full = [
+            (attribute, "f4") for attribute in self.construct_list_of_attributes()
+        ]
+        elements = np.empty(xyz.shape[0], dtype=dtype_full)
+        attributes = np.concatenate(
+            (xyz, normals, f_dc, f_rest, opacities, scale, rotation), axis=1
+        )
+        elements[:] = list(map(tuple, attributes))
+        el = PlyElement.describe(elements, "vertex")
+        PlyData([el]).write(path)
+    def load_ply(self, path):
+        plydata = PlyData.read(path)
+        xyz = np.stack(
+            (
+                np.asarray(plydata.elements[0]["x"]),
+                np.asarray(plydata.elements[0]["y"]),
+                np.asarray(plydata.elements[0]["z"]),
+            ),
+            axis=1,
+        )
+        opacities = np.asarray(plydata.elements[0]["opacity"])[..., np.newaxis]
+        features_dc = np.zeros((xyz.shape[0], 3, 1))
+        features_dc[:, 0, 0] = np.asarray(plydata.elements[0]["f_dc_0"])
+        features_dc[:, 1, 0] = np.asarray(plydata.elements[0]["f_dc_1"])
+        features_dc[:, 2, 0] = np.asarray(plydata.elements[0]["f_dc_2"])
+        if self.max_sh_degree > 0:
+            extra_f_names = [
+                p.name
+                for p in plydata.elements[0].properties
+                if p.name.startswith("f_rest_")
+            ]
+            extra_f_names = sorted(extra_f_names, key=lambda x: int(x.split("_")[-1]))
+            assert len(extra_f_names) == 3 * (self.max_sh_degree + 1) ** 2 - 3
+            features_extra = np.zeros((xyz.shape[0], len(extra_f_names)))
+            for idx, attr_name in enumerate(extra_f_names):
+                features_extra[:, idx] = np.asarray(plydata.elements[0][attr_name])
+            # Reshape (P,F*SH_coeffs) to (P, F, SH_coeffs except DC)
+            features_extra = features_extra.reshape(
+                (features_extra.shape[0], 3, (self.max_sh_degree + 1) ** 2 - 1)
+            )
+        scale_names = [
+            p.name
+            for p in plydata.elements[0].properties
+            if p.name.startswith("scale_")
+        ]
+        scale_names = sorted(scale_names, key=lambda x: int(x.split("_")[-1]))
+        scales = np.zeros((xyz.shape[0], len(scale_names)))
+        for idx, attr_name in enumerate(scale_names):
+            scales[:, idx] = np.asarray(plydata.elements[0][attr_name])
+        rot_names = [
+            p.name for p in plydata.elements[0].properties if p.name.startswith("rot")
+        ]
+        rot_names = sorted(rot_names, key=lambda x: int(x.split("_")[-1]))
+        rots = np.zeros((xyz.shape[0], len(rot_names)))
+        for idx, attr_name in enumerate(rot_names):
+            rots[:, idx] = np.asarray(plydata.elements[0][attr_name])
+        self._xyz = nn.Parameter(
+            torch.tensor(xyz, dtype=torch.float, device="cuda").requires_grad_(True)
+        )
+        self._features_dc = nn.Parameter(
+            torch.tensor(features_dc, dtype=torch.float, device="cuda")
+            .transpose(1, 2)
+            .contiguous()
+            .requires_grad_(True)
+        )
+        if self.max_sh_degree > 0:
+            self._features_rest = nn.Parameter(
+                torch.tensor(features_extra, dtype=torch.float, device="cuda")
+                .transpose(1, 2)
+                .contiguous()
+                .requires_grad_(True)
+            )
+        else:
+            self._features_rest = nn.Parameter(
+                torch.tensor(features_dc, dtype=torch.float, device="cuda")[:, :, 1:]
+                .transpose(1, 2)
+                .contiguous()
+                .requires_grad_(True)
+            )
+        self._opacity = nn.Parameter(
+            torch.tensor(opacities, dtype=torch.float, device="cuda").requires_grad_(
+                True
+            )
+        )
+        self._scaling = nn.Parameter(
+            torch.tensor(scales, dtype=torch.float, device="cuda").requires_grad_(True)
+        )
+        self._rotation = nn.Parameter(
+            torch.tensor(rots, dtype=torch.float, device="cuda").requires_grad_(True)
+        )
+        self.max_radii2D = torch.zeros((self._xyz.shape[0]), device="cuda")
+        self.active_sh_degree = self.max_sh_degree
+    @torch.no_grad()
+    def extract_fields(self, resolution=128, num_blocks=16, relax_ratio=1.5):
+        # resolution: resolution of field
+        block_size = 2 / num_blocks
+        assert resolution % block_size == 0
+        split_size = resolution // num_blocks
+        opacities = self.get_opacity
+        # pre-filter low opacity gaussians to save computation
+        mask = (opacities > 0.005).squeeze(1)
+        opacities = opacities[mask]
+        xyzs = self.get_xyz[mask]
+        stds = self.get_scaling[mask]
+        # normalize to ~ [-1, 1]
+        mn, mx = xyzs.amin(0), xyzs.amax(0)
+        self.center = (mn + mx) / 2
+        self.scale = 1.8 / (mx - mn).amax().item()
+        xyzs = (xyzs - self.center) * self.scale
+        stds = stds * self.scale
+        covs = self.covariance_activation(stds, 1, self._rotation[mask])
+        # tile
+        device = opacities.device
+        occ = torch.zeros([resolution] * 3, dtype=torch.float32, device=device)
+        X = torch.linspace(-1, 1, resolution).split(split_size)
+        Y = torch.linspace(-1, 1, resolution).split(split_size)
+        Z = torch.linspace(-1, 1, resolution).split(split_size)
+        # loop blocks (assume max size of gaussian is small than relax_ratio * block_size !!!)
+        for xi, xs in tqdm(enumerate(X)):
+            for yi, ys in enumerate(Y):
+                for zi, zs in enumerate(Z):
+                    xx, yy, zz = torch.meshgrid(xs, ys, zs)
+                    # sample points [M, 3]
+                    pts = torch.cat(
+                        [xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)],
+                        dim=-1,
+                    ).to(device)
+                    # in-tile gaussians mask
+                    vmin, vmax = pts.amin(0), pts.amax(0)
+                    vmin -= block_size * relax_ratio
+                    vmax += block_size * relax_ratio
+                    mask = (xyzs < vmax).all(-1) & (xyzs > vmin).all(-1)
+                    # if hit no gaussian, continue to next block
+                    if not mask.any():
+                        continue
+                    mask_xyzs = xyzs[mask]  # [L, 3]
+                    mask_covs = covs[mask]  # [L, 6]
+                    mask_opas = opacities[mask].view(1, -1)  # [L, 1] --> [1, L]
+                    # query per point-gaussian pair.
+                    g_pts = pts.unsqueeze(1).repeat(
+                        1, mask_covs.shape[0], 1
+                    ) - mask_xyzs.unsqueeze(
+                        0
+                    )  # [M, L, 3]
+                    g_covs = mask_covs.unsqueeze(0).repeat(
+                        pts.shape[0], 1, 1
+                    )  # [M, L, 6]
+                    # batch on gaussian to avoid OOM
+                    batch_g = 1024
+                    val = 0
+                    for start in range(0, g_covs.shape[1], batch_g):
+                        end = min(start + batch_g, g_covs.shape[1])
+                        w = gaussian_3d_coeff(
+                            g_pts[:, start:end].reshape(-1, 3),
+                            g_covs[:, start:end].reshape(-1, 6),
+                        ).reshape(
+                            pts.shape[0], -1
+                        )  # [M, l]
+                        val += (mask_opas[:, start:end] * w).sum(-1)
+                    # kiui.lo(val, mask_opas, w)
+                    occ[
+                        xi * split_size : xi * split_size + len(xs),
+                        yi * split_size : yi * split_size + len(ys),
+                        zi * split_size : zi * split_size + len(zs),
+                    ] = val.reshape(len(xs), len(ys), len(zs))
+        # kiui.lo(occ, verbose=1)
+        return occ
+    def extract_mesh(self, density_thresh=0.8, resolution=128, decimate_target=1e5):
+        occ = self.extract_fields(resolution).detach().cpu().numpy()
+        vertices, triangles = mcubes.marching_cubes(occ, density_thresh)
+        vertices = vertices / (resolution - 1.0) * 2 - 1
+        # transform back to the original space
+        vertices = vertices / self.scale + self.center.detach().cpu().numpy()
+        vertices, triangles = clean_mesh(
+            vertices, triangles, remesh=True, remesh_size=0.015
+        )
+        if decimate_target > 0 and triangles.shape[0] > decimate_target:
+            vertices, triangles = decimate_mesh(vertices, triangles, decimate_target)
+        v = torch.from_numpy(vertices.astype(np.float32)).contiguous().cuda()
+        f = torch.from_numpy(triangles.astype(np.int32)).contiguous().cuda()
+        threestudio.info(
+            f"marching cubes result: {v.shape} ({v.min().item()}-{v.max().item()}), {f.shape}"
+        )
+        mesh = Mesh(v_pos=v, t_pos_idx=f)
+        return mesh

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/geometry/mesh_utils.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import numpy as np
+import threestudio
+def poisson_mesh_reconstruction(points, normals=None):
+    # points/normals: [N, 3] np.ndarray
+    import open3d as o3d
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(points)
+    # outlier removal
+    pcd, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=10)
+    # normals
+    if normals is None:
+        pcd.estimate_normals()
+    else:
+        pcd.normals = o3d.utility.Vector3dVector(normals[ind])
+    # visualize
+    o3d.visualization.draw_geometries([pcd], point_show_normal=False)
+    mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
+        pcd, depth=9
+    )
+    vertices_to_remove = densities < np.quantile(densities, 0.1)
+    mesh.remove_vertices_by_mask(vertices_to_remove)
+    # visualize
+    o3d.visualization.draw_geometries([mesh])
+    vertices = np.asarray(mesh.vertices)
+    triangles = np.asarray(mesh.triangles)
+    print(
+        f"[INFO] poisson mesh reconstruction: {points.shape} --> {vertices.shape} / {triangles.shape}"
+    )
+    return vertices, triangles
+def decimate_mesh(
+    verts, faces, target, backend="pymeshlab", remesh=False, optimalplacement=True
+):
+    # optimalplacement: default is True, but for flat mesh must turn False to prevent spike artifect.
+    _ori_vert_shape = verts.shape
+    _ori_face_shape = faces.shape
+    if backend == "pyfqmr":
+        import pyfqmr
+        solver = pyfqmr.Simplify()
+        solver.setMesh(verts, faces)
+        solver.simplify_mesh(target_count=target, preserve_border=False, verbose=False)
+        verts, faces, normals = solver.getMesh()
+    else:
+        import pymeshlab as pml
+        m = pml.Mesh(verts, faces)
+        ms = pml.MeshSet()
+        ms.add_mesh(m, "mesh")  # will copy!
+        # filters
+        # ms.meshing_decimation_clustering(threshold=pml.PercentageValue(1))
+        ms.meshing_decimation_quadric_edge_collapse(
+            targetfacenum=int(target), optimalplacement=optimalplacement
+        )
+        if remesh:
+            # ms.apply_coord_taubin_smoothing()
+            ms.meshing_isotropic_explicit_remeshing(
+                iterations=3, targetlen=pml.PercentageValue(1)
+            )
+        # extract mesh
+        m = ms.current_mesh()
+        verts = m.vertex_matrix()
+        faces = m.face_matrix()
+    print(
+        f"[INFO] mesh decimation: {_ori_vert_shape} --> {verts.shape}, {_ori_face_shape} --> {faces.shape}"
+    )
+    return verts, faces
+def clean_mesh(
+    verts,
+    faces,
+    v_pct=1,
+    min_f=64,
+    min_d=20,
+    repair=True,
+    remesh=True,
+    remesh_size=0.01,
+):
+    # verts: [N, 3]
+    # faces: [N, 3]
+    import pymeshlab as pml
+    _ori_vert_shape = verts.shape
+    _ori_face_shape = faces.shape
+    m = pml.Mesh(verts, faces)
+    ms = pml.MeshSet()
+    ms.add_mesh(m, "mesh")  # will copy!
+    # filters
+    ms.meshing_remove_unreferenced_vertices()  # verts not refed by any faces
+    if v_pct > 0:
+        ms.meshing_merge_close_vertices(
+            threshold=pml.PercentageValue(v_pct)
+        )  # 1/10000 of bounding box diagonal
+    ms.meshing_remove_duplicate_faces()  # faces defined by the same verts
+    ms.meshing_remove_null_faces()  # faces with area == 0
+    if min_d > 0:
+        ms.meshing_remove_connected_component_by_diameter(
+            mincomponentdiag=pml.PercentageValue(min_d)
+        )
+    if min_f > 0:
+        ms.meshing_remove_connected_component_by_face_number(mincomponentsize=min_f)
+    if repair:
+        # ms.meshing_remove_t_vertices(method=0, threshold=40, repeat=True)
+        ms.meshing_repair_non_manifold_edges(method=0)
+        ms.meshing_repair_non_manifold_vertices(vertdispratio=0)
+    if remesh:
+        # ms.apply_coord_taubin_smoothing()
+        ms.meshing_isotropic_explicit_remeshing(
+            iterations=3, targetlen=pml.PureValue(remesh_size)
+        )
+    # extract mesh
+    m = ms.current_mesh()
+    verts = m.vertex_matrix()
+    faces = m.face_matrix()
+    print(
+        f"[INFO] mesh cleaning: {_ori_vert_shape} --> {verts.shape}, {_ori_face_shape} --> {faces.shape}"
+    )
+    return verts, faces

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/material/gaussian_material.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import random
+from dataclasses import dataclass, field
+import threestudio
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from threestudio.models.materials.base import BaseMaterial
+from threestudio.utils.ops import dot, get_activation
+from threestudio.utils.typing import *
+@threestudio.register("gaussian-diffuse-with-point-light-material")
+class GaussianDiffuseWithPointLightMaterial(BaseMaterial):
+    @dataclass
+    class Config(BaseMaterial.Config):
+        ambient_light_color: Tuple[float, float, float] = (0.1, 0.1, 0.1)
+        diffuse_light_color: Tuple[float, float, float] = (0.9, 0.9, 0.9)
+        ambient_only_steps: int = 1000
+        diffuse_prob: float = 0.75
+        textureless_prob: float = 0.5
+        soft_shading: bool = False
+    cfg: Config
+    def configure(self) -> None:
+        self.requires_normal = True
+        self.ambient_light_color: Float[Tensor, "3"]
+        self.register_buffer(
+            "ambient_light_color",
+            torch.as_tensor(self.cfg.ambient_light_color, dtype=torch.float32),
+        )
+        self.diffuse_light_color: Float[Tensor, "3"]
+        self.register_buffer(
+            "diffuse_light_color",
+            torch.as_tensor(self.cfg.diffuse_light_color, dtype=torch.float32),
+        )
+        self.ambient_only = False
+    def forward(
+        self,
+        positions: Float[Tensor, "B ... 3"],
+        shading_normal: Float[Tensor, "B ... 3"],
+        light_positions: Float[Tensor, "B ... 3"],
+        albedo: Float[Tensor, "B ... 3"],
+        ambient_ratio: Optional[float] = None,
+        shading: Optional[str] = None,
+        **kwargs,
+    ) -> Float[Tensor, "B ... 3"]:
+        if ambient_ratio is not None:
+            # if ambient ratio is specified, use it
+            diffuse_light_color = (1 - ambient_ratio) * torch.ones_like(
+                self.diffuse_light_color
+            )
+            ambient_light_color = ambient_ratio * torch.ones_like(
+                self.ambient_light_color
+            )
+        elif self.training and self.cfg.soft_shading:
+            # otherwise if in training and soft shading is enabled, random a ambient ratio
+            diffuse_light_color = torch.full_like(
+                self.diffuse_light_color, random.random()
+            )
+            ambient_light_color = 1.0 - diffuse_light_color
+        else:
+            # otherwise use the default fixed values
+            diffuse_light_color = self.diffuse_light_color
+            ambient_light_color = self.ambient_light_color
+        light_directions: Float[Tensor, "B ... 3"] = F.normalize(
+            light_positions - positions, dim=-1
+        )
+        diffuse_light: Float[Tensor, "B ... 3"] = (
+            dot(shading_normal, light_directions).clamp(min=0.0) * diffuse_light_color
+        )
+        textureless_color = diffuse_light + ambient_light_color
+        # clamp albedo to [0, 1] to compute shading
+        color = albedo.clamp(0.0, 1.0) * textureless_color
+        if shading is None:
+            if self.training:
+                # adopt the same type of augmentation for the whole batch
+                if self.ambient_only or random.random() > self.cfg.diffuse_prob:
+                    shading = "albedo"
+                elif random.random() < self.cfg.textureless_prob:
+                    shading = "textureless"
+                else:
+                    shading = "diffuse"
+            else:
+                if self.ambient_only:
+                    shading = "albedo"
+                else:
+                    # return shaded color by default in evaluation
+                    shading = "diffuse"
+        # multiply by 0 to prevent checking for unused parameters in DDP
+        if shading == "albedo":
+            return albedo + textureless_color * 0
+        elif shading == "textureless":
+            return albedo * 0 + textureless_color
+        elif shading == "diffuse":
+            return color
+        else:
+            raise ValueError(f"Unknown shading type {shading}")
+    def update_step(self, epoch: int, global_step: int, on_load_weights: bool = False):
+        if global_step < self.cfg.ambient_only_steps:
+            self.ambient_only = True
+        else:
+            self.ambient_only = False
+    def export(self, features: Float[Tensor, "*N Nf"], **kwargs) -> Dict[str, Any]:
+        albedo = get_activation(self.cfg.albedo_activation)(features[..., :3]).clamp(
+            0.0, 1.0
+        )
+        return {"albedo": albedo}

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/renderer/diff_gaussian_rasterizer.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import math
+from dataclasses import dataclass
+import numpy as np
+import threestudio
+import torch
+import torch.nn.functional as F
+from diff_gaussian_rasterization import (
+    GaussianRasterizationSettings,
+    GaussianRasterizer,
+)
+from threestudio.models.background.base import BaseBackground
+from threestudio.models.geometry.base import BaseGeometry
+from threestudio.models.materials.base import BaseMaterial
+from threestudio.models.renderers.base import Rasterizer
+from threestudio.utils.typing import *
+from .gaussian_batch_renderer import GaussianBatchRenderer
+@threestudio.register("diff-gaussian-rasterizer")
+class DiffGaussian(Rasterizer, GaussianBatchRenderer):
+    @dataclass
+    class Config(Rasterizer.Config):
+        debug: bool = False
+        invert_bg_prob: float = 1.0
+        back_ground_color: Tuple[float, float, float] = (1, 1, 1)
+    cfg: Config
+    def configure(
+        self,
+        geometry: BaseGeometry,
+        material: BaseMaterial,
+        background: BaseBackground,
+    ) -> None:
+        threestudio.info(
+            "[Note] Gaussian Splatting doesn't support material and background now."
+        )
+        super().configure(geometry, material, background)
+        self.background_tensor = torch.tensor(
+            self.cfg.back_ground_color, dtype=torch.float32, device="cuda"
+        )
+    def forward(
+        self,
+        viewpoint_camera,
+        bg_color: torch.Tensor,
+        scaling_modifier=1.0,
+        override_color=None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Render the scene.
+        Background tensor (bg_color) must be on GPU!
+        """
+        if self.training:
+            invert_bg_color = np.random.rand() > self.cfg.invert_bg_prob
+        else:
+            invert_bg_color = True
+        bg_color = bg_color if not invert_bg_color else (1.0 - bg_color)
+        pc = self.geometry
+        # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
+        screenspace_points = (
+            torch.zeros_like(
+                pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda"
+            )
+            + 0
+        )
+        try:
+            screenspace_points.retain_grad()
+        except:
+            pass
+        # Set up rasterization configuration
+        tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
+        tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
+        raster_settings = GaussianRasterizationSettings(
+            image_height=int(viewpoint_camera.image_height),
+            image_width=int(viewpoint_camera.image_width),
+            tanfovx=tanfovx,
+            tanfovy=tanfovy,
+            bg=bg_color,
+            scale_modifier=scaling_modifier,
+            viewmatrix=viewpoint_camera.world_view_transform,
+            projmatrix=viewpoint_camera.full_proj_transform,
+            sh_degree=pc.active_sh_degree,
+            campos=viewpoint_camera.camera_center,
+            prefiltered=False,
+            debug=False,
+            include_feature=True
+        )
+        rasterizer = GaussianRasterizer(raster_settings=raster_settings)
+        means3D = pc.get_xyz
+        means2D = screenspace_points
+        opacity = pc.get_opacity
+        # If precomputed 3d covariance is provided, use it. If not, then it will be computed from
+        # scaling / rotation by the rasterizer.
+        scales = None
+        rotations = None
+        cov3D_precomp = None
+        scales = pc.get_scaling
+        rotations = pc.get_rotation
+        # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors
+        # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer.
+        shs = None
+        colors_precomp = None
+        if override_color is None:
+            shs = pc.get_features
+        else:
+            colors_precomp = override_color
+        language_feature_precomp = pc.get_language_feature
+        language_feature_precomp = language_feature_precomp/ (language_feature_precomp.norm(dim=-1, keepdim=True) + 1e-9)
+        # Rasterize visible Gaussians to image, obtain their radii (on screen).
+        result_list = rasterizer(
+            means3D=means3D,
+            means2D=means2D,
+            shs=shs,
+            colors_precomp=colors_precomp,
+            language_feature_precomp = language_feature_precomp,
+            opacities=opacity,
+            scales=scales,
+            rotations=rotations,
+            cov3D_precomp=cov3D_precomp,
+        )
+        rendered_image, rendered_feature, radii = result_list[0], result_list[1], result_list[2]
+        # Retain gradients of the 2D (screen-space) means for batch dim
+        if self.training:
+            screenspace_points.retain_grad()
+        # Those Gaussians that were frustum culled or had a radius of 0 were not visible.
+        # They will be excluded from value updates used in the splitting criteria.
+        return {
+            "render": rendered_image.clamp(0, 1),
+            "lang": rendered_feature,
+            "viewspace_points": screenspace_points,
+            "visibility_filter": radii > 0,
+            "radii": radii,
+        }

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/renderer/diff_gaussian_rasterizer_advanced.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import math
+from dataclasses import dataclass
+import numpy as np
+import threestudio
+import torch
+import torch.nn.functional as F
+from diff_gaussian_rasterization import (
+    GaussianRasterizationSettings,
+    GaussianRasterizer,
+)
+from threestudio.models.background.base import BaseBackground
+from threestudio.models.geometry.base import BaseGeometry
+from threestudio.models.materials.base import BaseMaterial
+from threestudio.models.renderers.base import Rasterizer
+from threestudio.utils.typing import *
+from .gaussian_batch_renderer import GaussianBatchRenderer
+@threestudio.register("diff-gaussian-rasterizer-advanced")
+class DiffGaussian(Rasterizer, GaussianBatchRenderer):
+    @dataclass
+    class Config(Rasterizer.Config):
+        debug: bool = False
+        invert_bg_prob: float = 1.0
+        back_ground_color: Tuple[float, float, float] = (1, 1, 1)
+    cfg: Config
+    def configure(
+        self,
+        geometry: BaseGeometry,
+        material: BaseMaterial,
+        background: BaseBackground,
+    ) -> None:
+        threestudio.info(
+            "[Note] Gaussian Splatting doesn't support material and background now."
+        )
+        super().configure(geometry, material, background)
+        self.background_tensor = torch.tensor(
+            self.cfg.back_ground_color, dtype=torch.float32, device="cuda"
+        )
+    def forward(
+        self,
+        viewpoint_camera,
+        bg_color: torch.Tensor,
+        scaling_modifier=1.0,
+        override_color=None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Render the scene.
+        Background tensor (bg_color) must be on GPU!
+        """
+        if self.training:
+            invert_bg_color = np.random.rand() > self.cfg.invert_bg_prob
+        else:
+            invert_bg_color = True
+        bg_color = bg_color if not invert_bg_color else (1.0 - bg_color)
+        pc = self.geometry
+        # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
+        screenspace_points = (
+            torch.zeros_like(
+                pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda"
+            )
+            + 0
+        )
+        try:
+            screenspace_points.retain_grad()
+        except:
+            pass
+        # Set up rasterization configuration
+        tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
+        tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
+        raster_settings = GaussianRasterizationSettings(
+            image_height=int(viewpoint_camera.image_height),
+            image_width=int(viewpoint_camera.image_width),
+            tanfovx=tanfovx,
+            tanfovy=tanfovy,
+            bg=bg_color,
+            scale_modifier=scaling_modifier,
+            viewmatrix=viewpoint_camera.world_view_transform,
+            projmatrix=viewpoint_camera.full_proj_transform,
+            sh_degree=pc.active_sh_degree,
+            campos=viewpoint_camera.camera_center,
+            prefiltered=False,
+            debug=False,
+            include_feature=True,
+        )
+        rasterizer = GaussianRasterizer(raster_settings=raster_settings)
+        means3D = pc.get_xyz
+        means2D = screenspace_points
+        opacity = pc.get_opacity
+        # If precomputed 3d covariance is provided, use it. If not, then it will be computed from
+        # scaling / rotation by the rasterizer.
+        scales = None
+        rotations = None
+        cov3D_precomp = None
+        scales = pc.get_scaling
+        rotations = pc.get_rotation
+        # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors
+        # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer.
+        shs = None
+        colors_precomp = None
+        if override_color is None:
+            shs = pc.get_features
+        else:
+            colors_precomp = override_color
+        language_feature_precomp = pc.get_language_feature
+        language_feature_precomp = language_feature_precomp/ (language_feature_precomp.norm(dim=-1, keepdim=True) + 1e-9)
+        # Rasterize visible Gaussians to image, obtain their radii (on screen).
+        rendered_image, rendered_feature, radii, rendered_depth, rendered_alpha = rasterizer(
+            means3D=means3D,
+            means2D=means2D,
+            shs=shs,
+            colors_precomp=colors_precomp,
+            language_feature_precomp=language_feature_precomp,
+            opacities=opacity,
+            scales=scales,
+            rotations=rotations,
+            cov3D_precomp=cov3D_precomp,
+        )
+        # Retain gradients of the 2D (screen-space) means for batch dim
+        if self.training:
+            screenspace_points.retain_grad()
+        # Those Gaussians that were frustum culled or had a radius of 0 were not visible.
+        # They will be excluded from value updates used in the splitting criteria.
+        print(rendered_feature.mean())
+        return {
+            "render": rendered_image.clamp(0, 1),
+            "depth": rendered_depth,
+            "mask": rendered_alpha,
+            "viewspace_points": screenspace_points,
+            "visibility_filter": radii > 0,
+            "radii": radii,
+        }

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/renderer/diff_gaussian_rasterizer_background.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import math
+from dataclasses import dataclass
+import numpy as np
+import threestudio
+import torch
+import torch.nn.functional as F
+from diff_gaussian_rasterization import (
+    GaussianRasterizationSettings,
+    GaussianRasterizer,
+)
+from threestudio.models.background.base import BaseBackground
+from threestudio.models.geometry.base import BaseGeometry
+from threestudio.models.materials.base import BaseMaterial
+from threestudio.models.renderers.base import Rasterizer
+from threestudio.utils.typing import *
+from .gaussian_batch_renderer import GaussianBatchRenderer
+@threestudio.register("diff-gaussian-rasterizer-background")
+class DiffGaussian(Rasterizer, GaussianBatchRenderer):
+    @dataclass
+    class Config(Rasterizer.Config):
+        debug: bool = False
+        back_ground_color: Tuple[float, float, float] = (1, 1, 1)
+    cfg: Config
+    def configure(
+        self,
+        geometry: BaseGeometry,
+        material: BaseMaterial,
+        background: BaseBackground,
+    ) -> None:
+        threestudio.info(
+            "[Note] diff-gaussian-rasterizer-background doesn't support material."
+        )
+        super().configure(geometry, material, background)
+        self.background_tensor = torch.tensor(
+            self.cfg.back_ground_color, dtype=torch.float32, device="cuda"
+        )
+    def forward(
+        self,
+        viewpoint_camera,
+        bg_color: torch.Tensor,
+        scaling_modifier=1.0,
+        override_color=None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Render the scene.
+        Background tensor (bg_color) must be on GPU!
+        """
+        # use neural background
+        bg_color = bg_color * 0
+        pc = self.geometry
+        # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
+        screenspace_points = (
+            torch.zeros_like(
+                pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda"
+            )
+            + 0
+        )
+        try:
+            screenspace_points.retain_grad()
+        except:
+            pass
+        # Set up rasterization configuration
+        tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
+        tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
+        raster_settings = GaussianRasterizationSettings(
+            image_height=int(viewpoint_camera.image_height),
+            image_width=int(viewpoint_camera.image_width),
+            tanfovx=tanfovx,
+            tanfovy=tanfovy,
+            bg=bg_color,
+            scale_modifier=scaling_modifier,
+            viewmatrix=viewpoint_camera.world_view_transform,
+            projmatrix=viewpoint_camera.full_proj_transform,
+            sh_degree=pc.active_sh_degree,
+            campos=viewpoint_camera.camera_center,
+            prefiltered=False,
+            debug=False,
+        )
+        rasterizer = GaussianRasterizer(raster_settings=raster_settings)
+        means3D = pc.get_xyz
+        means2D = screenspace_points
+        opacity = pc.get_opacity
+        # If precomputed 3d covariance is provided, use it. If not, then it will be computed from
+        # scaling / rotation by the rasterizer.
+        scales = None
+        rotations = None
+        cov3D_precomp = None
+        scales = pc.get_scaling
+        rotations = pc.get_rotation
+        # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors
+        # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer.
+        shs = None
+        colors_precomp = None
+        if override_color is None:
+            shs = pc.get_features
+        else:
+            colors_precomp = override_color
+        # Rasterize visible Gaussians to image, obtain their radii (on screen).
+        rays_d = kwargs["rays_d"][kwargs["batch_idx"]]
+        comp_rgb_bg = self.background(dirs=rays_d.unsqueeze(0))
+        rendered_image, radii, rendered_depth, rendered_alpha = rasterizer(
+            means3D=means3D,
+            means2D=means2D,
+            shs=shs,
+            colors_precomp=colors_precomp,
+            opacities=opacity,
+            scales=scales,
+            rotations=rotations,
+            cov3D_precomp=cov3D_precomp,
+        )
+        _, H, W = rendered_image.shape
+        rendered_image = rendered_image + (1 - rendered_alpha) * comp_rgb_bg.reshape(
+            H, W, 3
+        ).permute(2, 0, 1)
+        # Retain gradients of the 2D (screen-space) means for batch dim
+        if self.training:
+            screenspace_points.retain_grad()
+        # Those Gaussians that were frustum culled or had a radius of 0 were not visible.
+        # They will be excluded from value updates used in the splitting criteria.
+        return {
+            "render": rendered_image.clamp(0, 1),
+            "viewspace_points": screenspace_points,
+            "visibility_filter": radii > 0,
+            "radii": radii,
+        }

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/renderer/diff_gaussian_rasterizer_shading.py ADDED Viewed

	@@ -0,0 +1,226 @@

+import math
+from dataclasses import dataclass
+import numpy as np
+import threestudio
+import torch
+import torch.nn.functional as F
+from diff_gaussian_rasterization import (
+    GaussianRasterizationSettings,
+    GaussianRasterizer,
+)
+from threestudio.models.background.base import BaseBackground
+from threestudio.models.geometry.base import BaseGeometry
+from threestudio.models.materials.base import BaseMaterial
+from threestudio.models.renderers.base import Rasterizer
+from threestudio.utils.typing import *
+from ..material.gaussian_material import GaussianDiffuseWithPointLightMaterial
+from .gaussian_batch_renderer import GaussianBatchRenderer
+class Depth2Normal(torch.nn.Module):
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.delzdelxkernel = torch.tensor(
+            [
+                [0.00000, 0.00000, 0.00000],
+                [-1.00000, 0.00000, 1.00000],
+                [0.00000, 0.00000, 0.00000],
+            ]
+        )
+        self.delzdelykernel = torch.tensor(
+            [
+                [0.00000, -1.00000, 0.00000],
+                [0.00000, 0.00000, 0.00000],
+                [0.0000, 1.00000, 0.00000],
+            ]
+        )
+    def forward(self, x):
+        B, C, H, W = x.shape
+        delzdelxkernel = self.delzdelxkernel.view(1, 1, 3, 3).to(x.device)
+        delzdelx = F.conv2d(
+            x.reshape(B * C, 1, H, W), delzdelxkernel, padding=1
+        ).reshape(B, C, H, W)
+        delzdelykernel = self.delzdelykernel.view(1, 1, 3, 3).to(x.device)
+        delzdely = F.conv2d(
+            x.reshape(B * C, 1, H, W), delzdelykernel, padding=1
+        ).reshape(B, C, H, W)
+        normal = -torch.cross(delzdelx, delzdely, dim=1)
+        return normal
+@threestudio.register("diff-gaussian-rasterizer-shading")
+class DiffGaussian(Rasterizer, GaussianBatchRenderer):
+    @dataclass
+    class Config(Rasterizer.Config):
+        debug: bool = False
+        back_ground_color: Tuple[float, float, float] = (1, 1, 1)
+    cfg: Config
+    def configure(
+        self,
+        geometry: BaseGeometry,
+        material: BaseMaterial,
+        background: BaseBackground,
+    ) -> None:
+        if not isinstance(material, GaussianDiffuseWithPointLightMaterial):
+            raise NotImplementedError(
+                "diff-gaussian-rasterizer-shading only support Gaussian material."
+            )
+        super().configure(geometry, material, background)
+        self.normal_module = Depth2Normal()
+        self.background_tensor = torch.tensor(
+            self.cfg.back_ground_color, dtype=torch.float32, device="cuda"
+        )
+    def forward(
+        self,
+        viewpoint_camera,
+        bg_color: torch.Tensor,
+        scaling_modifier=1.0,
+        override_color=None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Render the scene.
+        Background tensor (bg_color) must be on GPU!
+        """
+        # use neural background
+        bg_color = bg_color * 0
+        pc = self.geometry
+        # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
+        screenspace_points = (
+            torch.zeros_like(
+                pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda"
+            )
+            + 0
+        )
+        try:
+            screenspace_points.retain_grad()
+        except:
+            pass
+        # Set up rasterization configuration
+        tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
+        tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
+        raster_settings = GaussianRasterizationSettings(
+            image_height=int(viewpoint_camera.image_height),
+            image_width=int(viewpoint_camera.image_width),
+            tanfovx=tanfovx,
+            tanfovy=tanfovy,
+            bg=bg_color,
+            scale_modifier=scaling_modifier,
+            viewmatrix=viewpoint_camera.world_view_transform,
+            projmatrix=viewpoint_camera.full_proj_transform,
+            sh_degree=pc.active_sh_degree,
+            campos=viewpoint_camera.camera_center,
+            prefiltered=False,
+            debug=False,
+        )
+        rasterizer = GaussianRasterizer(raster_settings=raster_settings)
+        means3D = pc.get_xyz
+        means2D = screenspace_points
+        opacity = pc.get_opacity
+        # If precomputed 3d covariance is provided, use it. If not, then it will be computed from
+        # scaling / rotation by the rasterizer.
+        scales = None
+        rotations = None
+        cov3D_precomp = None
+        scales = pc.get_scaling
+        rotations = pc.get_rotation
+        # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors
+        # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer.
+        shs = None
+        colors_precomp = None
+        if override_color is None:
+            shs = pc.get_features
+        else:
+            colors_precomp = override_color
+        # Rasterize visible Gaussians to image, obtain their radii (on screen).
+        batch_idx = kwargs["batch_idx"]
+        rays_d = kwargs["rays_d"][batch_idx]
+        rays_o = kwargs["rays_o"][batch_idx]
+        # rays_d_flatten: Float[Tensor, "Nr 3"] = rays_d.unsqueeze(0)
+        comp_rgb_bg = self.background(dirs=rays_d.unsqueeze(0))
+        rendered_image, radii, rendered_depth, rendered_alpha = rasterizer(
+            means3D=means3D,
+            means2D=means2D,
+            shs=shs,
+            colors_precomp=colors_precomp,
+            opacities=opacity,
+            scales=scales,
+            rotations=rotations,
+            cov3D_precomp=cov3D_precomp,
+        )
+        _, H, W = rendered_image.shape
+        xyz_map = rays_o + rendered_depth.permute(1, 2, 0) * rays_d
+        normal_map = self.normal_module(xyz_map.permute(2, 0, 1).unsqueeze(0))[0]
+        normal_map = F.normalize(normal_map, dim=0)
+        if pc.cfg.pred_normal:
+            pred_normal_map, _, _, _ = rasterizer(
+                means3D=means3D,
+                means2D=torch.zeros_like(means2D),
+                shs=pc.get_normal.unsqueeze(1),
+                colors_precomp=None,
+                opacities=opacity,
+                scales=scales,
+                rotations=rotations,
+                cov3D_precomp=cov3D_precomp,
+            )
+        else:
+            pred_normal_map = None
+        light_positions = kwargs["light_positions"][batch_idx, None, None, :].expand(
+            H, W, -1
+        )
+        if pred_normal_map is not None:
+            shading_normal = pred_normal_map.permute(1, 2, 0).detach() * 2 - 1
+            shading_normal = F.normalize(shading_normal, dim=2)
+        else:
+            shading_normal = normal_map.permute(1, 2, 0)
+        rgb_fg = self.material(
+            positions=xyz_map,
+            shading_normal=shading_normal,
+            albedo=(rendered_image / (rendered_alpha + 1e-6)).permute(1, 2, 0),
+            light_positions=light_positions,
+        ).permute(2, 0, 1)
+        rendered_image = rgb_fg * rendered_alpha + (
+            1 - rendered_alpha
+        ) * comp_rgb_bg.reshape(H, W, 3).permute(2, 0, 1)
+        normal_map = normal_map * 0.5 * rendered_alpha + 0.5
+        mask = rendered_alpha > 0.99
+        normal_mask = mask.repeat(3, 1, 1)
+        normal_map[~normal_mask] = normal_map[~normal_mask].detach()
+        rendered_depth[~mask] = rendered_depth[~mask].detach()
+        # Retain gradients of the 2D (screen-space) means for batch dim
+        if self.training:
+            screenspace_points.retain_grad()
+        # Those Gaussians that were frustum culled or had a radius of 0 were not visible.
+        # They will be excluded from value updates used in the splitting criteria.
+        return {
+            "render": rendered_image.clamp(0, 1),
+            "normal": normal_map,
+            "pred_normal": pred_normal_map,
+            "mask": rendered_alpha,
+            "depth": rendered_depth,
+            "viewspace_points": screenspace_points,
+            "visibility_filter": radii > 0,
+            "radii": radii,
+        }

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/renderer/gaussian_batch_renderer.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import torch
+from threestudio.utils.ops import get_cam_info_gaussian
+from torch.cuda.amp import autocast
+from ..geometry.gaussian_base import BasicPointCloud, Camera
+class GaussianBatchRenderer:
+    def batch_forward(self, batch):
+        bs = batch["c2w"].shape[0]
+        renders = []
+        viewspace_points = []
+        visibility_filters = []
+        radiis = []
+        normals = []
+        pred_normals = []
+        depths = []
+        masks = []
+        langs = []
+        for batch_idx in range(bs):
+            batch["batch_idx"] = batch_idx
+            fovy = batch["fovy"][batch_idx]
+            w2c, proj, cam_p, cam_proj = get_cam_info_gaussian(
+                c2w=batch["c2w"][batch_idx], fovx=fovy, fovy=fovy, znear=0.1, zfar=100
+            )
+            viewpoint_cam = Camera(
+                FoVx=fovy,
+                FoVy=fovy,
+                image_width=batch["width"],
+                image_height=batch["height"],
+                world_view_transform=w2c,
+                full_proj_transform=proj,
+                camera_center=cam_p,
+            )
+            with autocast(enabled=False):
+                render_pkg = self.forward(
+                    viewpoint_cam, self.background_tensor, **batch
+                )
+                renders.append(render_pkg["render"])
+                viewspace_points.append(render_pkg["viewspace_points"])
+                visibility_filters.append(render_pkg["visibility_filter"])
+                radiis.append(render_pkg["radii"])
+                if render_pkg.__contains__("normal"):
+                    normals.append(render_pkg["normal"])
+                if (
+                    render_pkg.__contains__("pred_normal")
+                    and render_pkg["pred_normal"] is not None
+                ):
+                    pred_normals.append(render_pkg["pred_normal"])
+                if render_pkg.__contains__("depth"):
+                    depths.append(render_pkg["depth"])
+                if render_pkg.__contains__("mask"):
+                    masks.append(render_pkg["mask"])
+                if render_pkg.__contains__("lang"):
+                    langs.append(render_pkg["lang"])
+        outputs = {
+            "comp_rgb": torch.stack(renders, dim=0).permute(0, 2, 3, 1),
+            "lang": torch.stack(langs, dim=0).permute(0, 2, 3, 1),
+            "viewspace_points": viewspace_points,
+            "visibility_filter": visibility_filters,
+            "radii": radiis,
+        }
+        if len(normals) > 0:
+            outputs.update(
+                {
+                    "comp_normal": torch.stack(normals, dim=0).permute(0, 2, 3, 1),
+                }
+            )
+        if len(pred_normals) > 0:
+            outputs.update(
+                {
+                    "comp_pred_normal": torch.stack(pred_normals, dim=0).permute(
+                        0, 2, 3, 1
+                    ),
+                }
+            )
+        if len(depths) > 0:
+            outputs.update(
+                {
+                    "comp_depth": torch.stack(depths, dim=0).permute(0, 2, 3, 1),
+                }
+            )
+        if len(masks) > 0:
+            outputs.update(
+                {
+                    "comp_mask": torch.stack(masks, dim=0).permute(0, 2, 3, 1),
+                }
+            )
+        return outputs

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/system/gaussian_mvdream.py ADDED Viewed

	@@ -0,0 +1,249 @@

+import os
+from dataclasses import dataclass, field
+import numpy as np
+import threestudio
+import torch
+from threestudio.systems.base import BaseLift3DSystem
+from threestudio.systems.utils import parse_optimizer, parse_scheduler
+from threestudio.utils.loss import tv_loss
+from threestudio.utils.typing import *
+from ..geometry.gaussian_base import BasicPointCloud
+@threestudio.register("gaussian-splatting-mvdream-system")
+class MVDreamSystem(BaseLift3DSystem):
+    @dataclass
+    class Config(BaseLift3DSystem.Config):
+        visualize_samples: bool = False
+    cfg: Config
+    def configure(self) -> None:
+        # set up geometry, material, background, renderer
+        super().configure()
+        self.automatic_optimization = False
+        self.guidance = threestudio.find(self.cfg.guidance_type)(self.cfg.guidance)
+        self.prompt_processor = threestudio.find(self.cfg.prompt_processor_type)(
+            self.cfg.prompt_processor
+        )
+        self.prompt_utils = self.prompt_processor()
+    def configure_optimizers(self):
+        optim = self.geometry.optimizer
+        if hasattr(self, "merged_optimizer"):
+            return [optim]
+        if hasattr(self.cfg.optimizer, "name"):
+            net_optim = parse_optimizer(self.cfg.optimizer, self)
+            optim = self.geometry.merge_optimizer(net_optim)
+            self.merged_optimizer = True
+        else:
+            self.merged_optimizer = False
+        return [optim]
+    def on_load_checkpoint(self, checkpoint):
+        num_pts = checkpoint["state_dict"]["geometry._xyz"].shape[0]
+        pcd = BasicPointCloud(
+            points=np.zeros((num_pts, 3)),
+            colors=np.zeros((num_pts, 3)),
+            normals=np.zeros((num_pts, 3)),
+        )
+        self.geometry.create_from_pcd(pcd, 10)
+        self.geometry.training_setup()
+        return
+    def forward(self, batch: Dict[str, Any]) -> Dict[str, Any]:
+        self.geometry.update_learning_rate(self.global_step)
+        outputs = self.renderer.batch_forward(batch)
+        return outputs
+    def training_step(self, batch, batch_idx):
+        opt = self.optimizers()
+        out = self(batch)
+        visibility_filter = out["visibility_filter"]
+        radii = out["radii"]
+        guidance_inp = out["comp_rgb"]
+        viewspace_point_tensor = out["viewspace_points"]
+        guidance_out = self.guidance(
+            guidance_inp, self.prompt_utils, **batch, rgb_as_latents=False
+        )
+        loss_sds = 0.0
+        loss = 0.0
+        self.log(
+            "gauss_num",
+            int(self.geometry.get_xyz.shape[0]),
+            on_step=True,
+            on_epoch=True,
+            prog_bar=True,
+            logger=True,
+        )
+        for name, value in guidance_out.items():
+            self.log(f"train/{name}", value)
+            if name.startswith("loss_"):
+                loss_sds += value * self.C(
+                    self.cfg.loss[name.replace("loss_", "lambda_")]
+                )
+        xyz_mean = None
+        if self.cfg.loss["lambda_position"] > 0.0:
+            xyz_mean = self.geometry.get_xyz.norm(dim=-1)
+            loss_position = xyz_mean.mean()
+            self.log(f"train/loss_position", loss_position)
+            loss += self.C(self.cfg.loss["lambda_position"]) * loss_position
+        if self.cfg.loss["lambda_opacity"] > 0.0:
+            scaling = self.geometry.get_scaling.norm(dim=-1)
+            loss_opacity = (
+                scaling.detach().unsqueeze(-1) * self.geometry.get_opacity
+            ).sum()
+            self.log(f"train/loss_opacity", loss_opacity)
+            loss += self.C(self.cfg.loss["lambda_opacity"]) * loss_opacity
+        if self.cfg.loss["lambda_sparsity"] > 0.0:
+            loss_sparsity = (out["comp_mask"] ** 2 + 0.01).sqrt().mean()
+            self.log("train/loss_sparsity", loss_sparsity)
+            loss += loss_sparsity * self.C(self.cfg.loss.lambda_sparsity)
+        if self.cfg.loss["lambda_scales"] > 0.0:
+            scale_sum = torch.sum(self.geometry.get_scaling)
+            self.log(f"train/scales", scale_sum)
+            loss += self.C(self.cfg.loss["lambda_scales"]) * scale_sum
+        if self.cfg.loss["lambda_tv_loss"] > 0.0:
+            loss_tv = self.C(self.cfg.loss["lambda_tv_loss"]) * tv_loss(
+                out["comp_rgb"].permute(0, 3, 1, 2)
+            )
+            self.log(f"train/loss_tv", loss_tv)
+            loss += loss_tv
+        if (
+            out.__contains__("comp_depth")
+            and self.cfg.loss["lambda_depth_tv_loss"] > 0.0
+        ):
+            loss_depth_tv = self.C(self.cfg.loss["lambda_depth_tv_loss"]) * (
+                tv_loss(out["comp_depth"].permute(0, 3, 1, 2))
+            )
+            self.log(f"train/loss_depth_tv", loss_depth_tv)
+            loss += loss_depth_tv
+        if out.__contains__("comp_pred_normal"):
+            loss_pred_normal = torch.nn.functional.mse_loss(
+                out["comp_pred_normal"], out["comp_normal"].detach()
+            )
+            loss += loss_pred_normal
+        for name, value in self.cfg.loss.items():
+            self.log(f"train_params/{name}", self.C(value))
+        loss_sds.backward(retain_graph=True)
+        iteration = self.global_step
+        self.geometry.update_states(
+            iteration,
+            visibility_filter,
+            radii,
+            viewspace_point_tensor,
+        )
+        if loss > 0:
+            loss.backward()
+        opt.step()
+        opt.zero_grad(set_to_none=True)
+        return {"loss": loss_sds}
+    def validation_step(self, batch, batch_idx):
+        out = self(batch)
+        # import pdb; pdb.set_trace()
+        self.save_image_grid(
+            f"it{self.global_step}-{batch['index'][0]}.png",
+            [
+                {
+                    "type": "rgb",
+                    "img": out["comp_rgb"][0],
+                    "kwargs": {"data_format": "HWC"},
+                },
+            ]
+            + (
+                [
+                    {
+                        "type": "rgb",
+                        "img": out["comp_normal"][0],
+                        "kwargs": {"data_format": "HWC", "data_range": (0, 1)},
+                    }
+                ]
+                if "comp_normal" in out
+                else []
+            )
+            + (
+                [
+                    {
+                        "type": "rgb",
+                        "img": out["comp_pred_normal"][0],
+                        "kwargs": {"data_format": "HWC", "data_range": (0, 1)},
+                    }
+                ]
+                if "comp_pred_normal" in out
+                else []
+            ),
+            name="validation_step",
+            step=self.global_step,
+        )
+    def on_validation_epoch_end(self):
+        pass
+    def test_step(self, batch, batch_idx):
+        out = self(batch)
+        self.save_image_grid(
+            f"it{self.global_step}-test/{batch['index'][0]}.png",
+            [
+                {
+                    "type": "rgb",
+                    "img": out["comp_rgb"][0],
+                    "kwargs": {"data_format": "HWC"},
+                },
+            ]
+            + (
+                [
+                    {
+                        "type": "rgb",
+                        "img": out["comp_normal"][0],
+                        "kwargs": {"data_format": "HWC", "data_range": (0, 1)},
+                    }
+                ]
+                if "comp_normal" in out
+                else []
+            )
+            + (
+                [
+                    {
+                        "type": "rgb",
+                        "img": out["comp_pred_normal"][0],
+                        "kwargs": {"data_format": "HWC", "data_range": (0, 1)},
+                    }
+                ]
+                if "comp_pred_normal" in out
+                else []
+            ),
+            name="test_step",
+            step=self.global_step,
+        )
+        if batch["index"][0] == 0:
+            save_path = self.get_save_path("point_cloud.ply")
+            self.geometry.save_ply(save_path)
+    def on_test_epoch_end(self):
+        self.save_img_sequence(
+            f"it{self.true_global_step}-test",
+            f"it{self.true_global_step}-test",
+            "(\d+)\.png",
+            save_format="mp4",
+            fps=30,
+            name="test",
+            step=self.true_global_step,
+        )

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/system/gaussian_splatting.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import math
+from dataclasses import dataclass
+import numpy as np
+import threestudio
+import torch
+from threestudio.systems.base import BaseLift3DSystem
+from threestudio.systems.utils import parse_optimizer, parse_scheduler
+from threestudio.utils.loss import tv_loss
+from threestudio.utils.ops import get_cam_info_gaussian
+from threestudio.utils.typing import *
+from torch.cuda.amp import autocast
+from ..geometry.gaussian_base import BasicPointCloud, Camera
+@threestudio.register("gaussian-splatting-system")
+class GaussianSplatting(BaseLift3DSystem):
+    @dataclass
+    class Config(BaseLift3DSystem.Config):
+        visualize_samples: bool = False
+    cfg: Config
+    def configure(self) -> None:
+        # set up geometry, material, background, renderer
+        super().configure()
+        self.automatic_optimization = False
+        self.guidance = threestudio.find(self.cfg.guidance_type)(self.cfg.guidance)
+        self.prompt_processor = threestudio.find(self.cfg.prompt_processor_type)(
+            self.cfg.prompt_processor
+        )
+        self.prompt_utils = self.prompt_processor()
+    def configure_optimizers(self):
+        optim = self.geometry.optimizer
+        if hasattr(self, "merged_optimizer"):
+            return [optim]
+        if hasattr(self.cfg.optimizer, "name"):
+            net_optim = parse_optimizer(self.cfg.optimizer, self)
+            optim = self.geometry.merge_optimizer(net_optim)
+            self.merged_optimizer = True
+        else:
+            self.merged_optimizer = False
+        return [optim]
+    def forward(self, batch: Dict[str, Any]) -> Dict[str, Any]:
+        self.geometry.update_learning_rate(self.global_step)
+        outputs = self.renderer.batch_forward(batch)
+        return outputs
+    def on_fit_start(self) -> None:
+        super().on_fit_start()
+    def training_step(self, batch, batch_idx):
+        opt = self.optimizers()
+        out = self(batch)
+        visibility_filter = out["visibility_filter"]
+        radii = out["radii"]
+        guidance_inp = out["comp_rgb"]
+        # import pdb; pdb.set_trace()
+        viewspace_point_tensor = out["viewspace_points"]
+        guidance_out = self.guidance(
+            guidance_inp, self.prompt_utils, **batch, rgb_as_latents=False
+        )
+        loss_sds = 0.0
+        loss = 0.0
+        self.log(
+            "gauss_num",
+            int(self.geometry.get_xyz.shape[0]),
+            on_step=True,
+            on_epoch=True,
+            prog_bar=True,
+            logger=True,
+        )
+        for name, value in guidance_out.items():
+            self.log(f"train/{name}", value)
+            if name.startswith("loss_"):
+                loss_sds += value * self.C(
+                    self.cfg.loss[name.replace("loss_", "lambda_")]
+                )
+        xyz_mean = None
+        if self.cfg.loss["lambda_position"] > 0.0:
+            xyz_mean = self.geometry.get_xyz.norm(dim=-1)
+            loss_position = xyz_mean.mean()
+            self.log(f"train/loss_position", loss_position)
+            loss += self.C(self.cfg.loss["lambda_position"]) * loss_position
+        if self.cfg.loss["lambda_opacity"] > 0.0:
+            scaling = self.geometry.get_scaling.norm(dim=-1)
+            loss_opacity = (
+                scaling.detach().unsqueeze(-1) * self.geometry.get_opacity
+            ).sum()
+            self.log(f"train/loss_opacity", loss_opacity)
+            loss += self.C(self.cfg.loss["lambda_opacity"]) * loss_opacity
+        if self.cfg.loss["lambda_scales"] > 0.0:
+            scale_sum = torch.sum(self.geometry.get_scaling)
+            self.log(f"train/scales", scale_sum)
+            loss += self.C(self.cfg.loss["lambda_scales"]) * scale_sum
+        if self.cfg.loss["lambda_tv_loss"] > 0.0:
+            loss_tv = self.C(self.cfg.loss["lambda_tv_loss"]) * tv_loss(
+                out["comp_rgb"].permute(0, 3, 1, 2)
+            )
+            self.log(f"train/loss_tv", loss_tv)
+            loss += loss_tv
+        if (
+            out.__contains__("comp_depth")
+            and self.cfg.loss["lambda_depth_tv_loss"] > 0.0
+        ):
+            loss_depth_tv = self.C(self.cfg.loss["lambda_depth_tv_loss"]) * (
+                tv_loss(out["comp_normal"].permute(0, 3, 1, 2))
+                + tv_loss(out["comp_depth"].permute(0, 3, 1, 2))
+            )
+            self.log(f"train/loss_depth_tv", loss_depth_tv)
+            loss += loss_depth_tv
+        for name, value in self.cfg.loss.items():
+            self.log(f"train_params/{name}", self.C(value))
+        loss_sds.backward(retain_graph=True)
+        iteration = self.global_step
+        self.geometry.update_states(
+            iteration,
+            visibility_filter,
+            radii,
+            viewspace_point_tensor,
+        )
+        if loss > 0:
+            loss.backward()
+        opt.step()
+        opt.zero_grad(set_to_none=True)
+        return {"loss": loss_sds}
+    def validation_step(self, batch, batch_idx):
+        out = self(batch)
+        # import pdb; pdb.set_trace()
+        self.save_image_grid(
+            f"it{self.global_step}-{batch['index'][0]}.png",
+            [
+                {
+                    "type": "rgb",
+                    "img": out["comp_rgb"][0],
+                    "kwargs": {"data_format": "HWC"},
+                },
+            ]
+            + (
+                [
+                    {
+                        "type": "rgb",
+                        "img": out["comp_normal"][0],
+                        "kwargs": {"data_format": "HWC", "data_range": (0, 1)},
+                    }
+                ]
+                if "comp_normal" in out
+                else []
+            ),
+            name="validation_step",
+            step=self.global_step,
+        )
+    def on_validation_epoch_end(self):
+        pass
+    def test_step(self, batch, batch_idx):
+        out = self(batch)
+        self.save_image_grid(
+            f"it{self.global_step}-test/{batch['index'][0]}.png",
+            [
+                {
+                    "type": "rgb",
+                    "img": out["comp_rgb"][0],
+                    "kwargs": {"data_format": "HWC"},
+                },
+            ]
+            + (
+                [
+                    {
+                        "type": "rgb",
+                        "img": out["comp_normal"][0],
+                        "kwargs": {"data_format": "HWC", "data_range": (0, 1)},
+                    }
+                ]
+                if "comp_normal" in out
+                else []
+            ),
+            name="test_step",
+            step=self.global_step,
+        )
+        if batch["index"][0] == 0:
+            save_path = self.get_save_path("point_cloud.ply")
+            self.geometry.save_ply(save_path)
+    def on_test_epoch_end(self):
+        self.save_img_sequence(
+            f"it{self.global_step}-test",
+            f"it{self.global_step}-test",
+            "(\d+)\.png",
+            save_format="mp4",
+            fps=30,
+            name="test",
+            step=self.global_step,
+        )
+    def on_load_checkpoint(self, ckpt_dict) -> None:
+        num_pts = ckpt_dict["state_dict"]["geometry._xyz"].shape[0]
+        pcd = BasicPointCloud(
+            points=np.zeros((num_pts, 3)),
+            colors=np.zeros((num_pts, 3)),
+            normals=np.zeros((num_pts, 3)),
+        )
+        self.geometry.create_from_pcd(pcd, 10)
+        self.geometry.training_setup()
+        super().on_load_checkpoint(ckpt_dict)

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/system/gaussian_zero123.py ADDED Viewed

	@@ -0,0 +1,339 @@

+import os
+import random
+from dataclasses import dataclass, field
+import numpy as np
+import threestudio
+import torch
+import torch.nn.functional as F
+from threestudio.systems.base import BaseLift3DSystem
+from threestudio.systems.utils import parse_optimizer, parse_scheduler
+from threestudio.utils.loss import tv_loss
+from threestudio.utils.ops import get_cam_info_gaussian
+from threestudio.utils.typing import *
+from torch.cuda.amp import autocast
+from torchmetrics import PearsonCorrCoef
+from ..geometry.gaussian_base import BasicPointCloud, Camera
+@threestudio.register("gaussian-splatting-zero123-system")
+class Zero123(BaseLift3DSystem):
+    @dataclass
+    class Config(BaseLift3DSystem.Config):
+        freq: dict = field(default_factory=dict)
+        refinement: bool = False
+        ambient_ratio_min: float = 0.5
+        back_ground_color: Tuple[float, float, float] = (1, 1, 1)
+    cfg: Config
+    def configure(self):
+        # create geometry, material, background, renderer
+        super().configure()
+        self.automatic_optimization = False
+    def configure_optimizers(self):
+        optim = self.geometry.optimizer
+        if hasattr(self, "merged_optimizer"):
+            return [optim]
+        if hasattr(self.cfg.optimizer, "name"):
+            net_optim = parse_optimizer(self.cfg.optimizer, self)
+            optim = self.geometry.merge_optimizer(net_optim)
+            self.merged_optimizer = True
+        else:
+            self.merged_optimizer = False
+        return [optim]
+    def on_load_checkpoint(self, checkpoint):
+        num_pts = checkpoint["state_dict"]["geometry._xyz"].shape[0]
+        pcd = BasicPointCloud(
+            points=np.zeros((num_pts, 3)),
+            colors=np.zeros((num_pts, 3)),
+            normals=np.zeros((num_pts, 3)),
+        )
+        self.geometry.create_from_pcd(pcd, 10)
+        self.geometry.training_setup()
+        return
+    def forward(self, batch: Dict[str, Any]) -> Dict[str, Any]:
+        self.geometry.update_learning_rate(self.global_step)
+        outputs = self.renderer.batch_forward(batch)
+        return outputs
+    def on_fit_start(self) -> None:
+        super().on_fit_start()
+        # no prompt processor
+        self.guidance = threestudio.find(self.cfg.guidance_type)(self.cfg.guidance)
+        # visualize all training images
+        all_images = self.trainer.datamodule.train_dataloader().dataset.get_all_images()
+        self.save_image_grid(
+            "all_training_images.png",
+            [
+                {"type": "rgb", "img": image, "kwargs": {"data_format": "HWC"}}
+                for image in all_images
+            ],
+            name="on_fit_start",
+            step=self.true_global_step,
+        )
+        self.pearson = PearsonCorrCoef().to(self.device)
+    def training_substep(self, batch, batch_idx, guidance: str):
+        """
+        Args:
+            guidance: one of "ref" (reference image supervision), "zero123"
+        """
+        if guidance == "ref":
+            ambient_ratio = 1.0
+            shading = "diffuse"
+            batch["shading"] = shading
+        elif guidance == "zero123":
+            batch = batch["random_camera"]
+            ambient_ratio = (
+                self.cfg.ambient_ratio_min
+                + (1 - self.cfg.ambient_ratio_min) * random.random()
+            )
+        batch["ambient_ratio"] = ambient_ratio
+        out = self(batch)
+        loss_prefix = f"loss_{guidance}_"
+        loss_terms = {}
+        def set_loss(name, value):
+            loss_terms[f"{loss_prefix}{name}"] = value
+        guidance_eval = (
+            guidance == "zero123"
+            and self.cfg.freq.guidance_eval > 0
+            and self.true_global_step % self.cfg.freq.guidance_eval == 0
+        )
+        if guidance == "ref":
+            gt_mask = batch["mask"]
+            gt_rgb = batch["rgb"]
+            # color loss
+            gt_rgb = gt_rgb * gt_mask.float()
+            set_loss("rgb", F.mse_loss(gt_rgb, out["comp_rgb"] * gt_mask.float()))
+            # mask loss
+            set_loss("mask", F.mse_loss(gt_mask.float(), out["comp_mask"]))
+            # depth loss
+            if self.C(self.cfg.loss.lambda_depth) > 0:
+                valid_gt_depth = batch["ref_depth"][gt_mask.squeeze(-1)].unsqueeze(1)
+                valid_pred_depth = out["comp_depth"][gt_mask].unsqueeze(1)
+                with torch.no_grad():
+                    A = torch.cat(
+                        [valid_gt_depth, torch.ones_like(valid_gt_depth)], dim=-1
+                    )  # [B, 2]
+                    X = torch.linalg.lstsq(A, valid_pred_depth).solution  # [2, 1]
+                    valid_gt_depth = A @ X  # [B, 1]
+                set_loss("depth", F.mse_loss(valid_gt_depth, valid_pred_depth))
+            # relative depth loss
+            if self.C(self.cfg.loss.lambda_depth_rel) > 0:
+                valid_gt_depth = batch["ref_depth"][gt_mask.squeeze(-1)]  # [B,]
+                valid_pred_depth = out["comp_depth"][gt_mask]  # [B,]
+                set_loss(
+                    "depth_rel", 1 - self.pearson(valid_pred_depth, valid_gt_depth)
+                )
+            # normal loss
+            if self.C(self.cfg.loss.lambda_normal) > 0:
+                valid_gt_normal = (
+                    1 - 2 * batch["ref_normal"][gt_mask.squeeze(-1)]
+                )  # [B, 3]
+                valid_pred_normal = (
+                    2 * out["comp_normal"][gt_mask.squeeze(-1)] - 1
+                )  # [B, 3]
+                set_loss(
+                    "normal",
+                    1 - F.cosine_similarity(valid_pred_normal, valid_gt_normal).mean(),
+                )
+        elif guidance == "zero123":
+            # zero123
+            guidance_out = self.guidance(
+                out["comp_rgb"],
+                **batch,
+                rgb_as_latents=False,
+                guidance_eval=guidance_eval,
+            )
+            # claforte: TODO: rename the loss_terms keys
+            set_loss("sds", guidance_out["loss_sds"])
+        if self.C(self.cfg.loss.lambda_normal_smooth) > 0:
+            if "comp_normal" not in out:
+                raise ValueError(
+                    "comp_normal is required for 2D normal smooth loss, no comp_normal is found in the output."
+                )
+            normal = out["comp_normal"]
+            set_loss(
+                "normal_smooth",
+                (normal[:, 1:, :, :] - normal[:, :-1, :, :]).square().mean()
+                + (normal[:, :, 1:, :] - normal[:, :, :-1, :]).square().mean(),
+            )
+        loss = 0.0
+        for name, value in loss_terms.items():
+            self.log(f"train/{name}", value)
+            if name.startswith(loss_prefix):
+                loss_weighted = value * self.C(
+                    self.cfg.loss[name.replace(loss_prefix, "lambda_")]
+                )
+                self.log(f"train/{name}_w", loss_weighted)
+                loss += loss_weighted
+        for name, value in self.cfg.loss.items():
+            self.log(f"train_params/{name}", self.C(value))
+        self.log(f"train/loss_{guidance}", loss)
+        out.update({"loss": loss})
+        return out
+    def training_step(self, batch, batch_idx):
+        opt = self.optimizers()
+        if self.cfg.freq.get("ref_or_zero123", "accumulate") == "accumulate":
+            do_ref = True
+            do_zero123 = True
+        elif self.cfg.freq.get("ref_or_zero123", "accumulate") == "alternate":
+            do_ref = (
+                self.true_global_step < self.cfg.freq.ref_only_steps
+                or self.true_global_step % self.cfg.freq.n_ref == 0
+            )
+            do_zero123 = not do_ref
+        total_loss = 0.0
+        if do_zero123:
+            out = self.training_substep(batch, batch_idx, guidance="zero123")
+            total_loss += out["loss"]
+        if do_ref:
+            out = self.training_substep(batch, batch_idx, guidance="ref")
+            total_loss += out["loss"]
+        self.log("train/loss", total_loss, prog_bar=True)
+        visibility_filter = out["visibility_filter"]
+        radii = out["radii"]
+        guidance_inp = out["comp_rgb"]
+        viewspace_point_tensor = out["viewspace_points"]
+        total_loss.backward()
+        iteration = self.global_step
+        self.geometry.update_states(
+            iteration,
+            visibility_filter,
+            radii,
+            viewspace_point_tensor,
+        )
+        opt.step()
+        opt.zero_grad(set_to_none=True)
+        return {"loss": total_loss}
+    def validation_step(self, batch, batch_idx):
+        out = self(batch)
+        self.save_image_grid(
+            f"it{self.true_global_step}-val/{batch['index'][0]}.png",
+            (
+                [
+                    {
+                        "type": "rgb",
+                        "img": batch["rgb"][0],
+                        "kwargs": {"data_format": "HWC"},
+                    }
+                ]
+                if "rgb" in batch
+                else []
+            )
+            + [
+                {
+                    "type": "rgb",
+                    "img": out["comp_rgb"][0],
+                    "kwargs": {"data_format": "HWC"},
+                },
+            ]
+            + (
+                [
+                    {
+                        "type": "rgb",
+                        "img": out["comp_normal"][0],
+                        "kwargs": {"data_format": "HWC", "data_range": (0, 1)},
+                    }
+                ]
+                if "comp_normal" in out
+                else []
+            ),
+            # claforte: TODO: don't hardcode the frame numbers to record... read them from cfg instead.
+            name=f"validation_step_batchidx_{batch_idx}"
+            if batch_idx in [0, 7, 15, 23, 29]
+            else None,
+            step=self.true_global_step,
+        )
+    def on_validation_epoch_end(self):
+        filestem = f"it{self.true_global_step}-val"
+        self.save_img_sequence(
+            filestem,
+            filestem,
+            "(\d+)\.png",
+            save_format="mp4",
+            fps=30,
+            name="validation_epoch_end",
+            step=self.true_global_step,
+        )
+    def test_step(self, batch, batch_idx):
+        out = self(batch)
+        self.save_image_grid(
+            f"it{self.true_global_step}-test/{batch['index'][0]}.png",
+            (
+                [
+                    {
+                        "type": "rgb",
+                        "img": batch["rgb"][0],
+                        "kwargs": {"data_format": "HWC"},
+                    }
+                ]
+                if "rgb" in batch
+                else []
+            )
+            + [
+                {
+                    "type": "rgb",
+                    "img": out["comp_rgb"][0],
+                    "kwargs": {"data_format": "HWC"},
+                },
+            ]
+            + (
+                [
+                    {
+                        "type": "rgb",
+                        "img": out["comp_normal"][0],
+                        "kwargs": {"data_format": "HWC", "data_range": (0, 1)},
+                    }
+                ]
+                if "comp_normal" in out
+                else []
+            ),
+            name="test_step",
+            step=self.true_global_step,
+        )
+    def on_test_epoch_end(self):
+        self.save_img_sequence(
+            f"it{self.true_global_step}-test",
+            f"it{self.true_global_step}-test",
+            "(\d+)\.png",
+            save_format="mp4",
+            fps=30,
+            name="test",
+            step=self.true_global_step,
+        )

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/system/scene_lang.py ADDED Viewed

	@@ -0,0 +1,528 @@

+import math
+from dataclasses import dataclass, field
+import os
+import collections
+import random
+import numpy as np
+import threestudio
+import torch
+import cv2
+from sklearn.cluster import KMeans
+import torchvision
+from PIL import Image
+from transformers import pipeline
+from threestudio.systems.base import BaseLift3DSystem
+from threestudio.systems.utils import parse_optimizer, parse_scheduler
+from threestudio.utils.loss import tv_loss
+from threestudio.utils.ops import get_cam_info_gaussian
+from threestudio.utils.typing import *
+from torch.cuda.amp import autocast
+from tqdm.contrib import tenumerate
+from tqdm import tqdm, trange
+from ..geometry.gaussian_base import BasicPointCloud, Camera
+from ..utils.sam_clip import SamClip
+from ..utils.ae import Autoencoder_dataset, Autoencoder
+from torch.utils.data import Dataset, DataLoader
+def l2_loss(network_output, gt):
+    return ((network_output - gt) ** 2).mean()
+def cos_loss(network_output, gt):
+    return 1 - torch.nn.functional.cosine_similarity(network_output, gt, dim=0).mean()
+@threestudio.register("scene-lang-system")
+class SceneLang(BaseLift3DSystem):
+    @dataclass
+    class Config(BaseLift3DSystem.Config):
+        visualize_samples: bool = False
+        distill_lang_freq: int = 800
+        outpaint_step: int = 300
+        sam_clip: dict = field(default_factory=dict)
+        encoder_hidden_dims: Optional[List] = field(default_factory=list)
+        decoder_hidden_dims: Optional[List] = field(default_factory=list)
+        ae_epoch: int = 100
+        distill_lang_epoch: int = 100
+        sam_clip_ae_lr: float = 3e-4
+        densify: bool = True
+        distill_interval: int = 2
+        xyz_noise_ratio: Any = None
+        drop_ooi_ratio: Any = field(default_factory=dict)
+        empty_prompt: str = "empty"
+        side_prompt: str = "empty"
+        crop_with_lang: bool = True
+        rotate_aug_scale: int = 15
+    cfg: Config
+    def configure(self) -> None:
+        # set up geometry, material, background, renderer
+        super().configure()
+        self.automatic_optimization = False
+        self.geometry.prompt = self.cfg.prompt_processor.prompt
+        self.geometry.empty_prompt = self.cfg.empty_prompt
+        self.geometry.side_prompt = self.cfg.side_prompt
+        self.guidance = threestudio.find(self.cfg.guidance_type)(self.cfg.guidance)
+        self.prompt_processor = threestudio.find(self.cfg.prompt_processor_type)(
+            self.cfg.prompt_processor
+        )
+        self.prompt_utils = self.prompt_processor()
+        self.cfg.prompt_processor.prompt = self.cfg.empty_prompt
+        self.bg_prompt_processor = threestudio.find(self.cfg.prompt_processor_type)(
+            self.cfg.prompt_processor
+        )
+        self.bg_prompt_utils = self.bg_prompt_processor()
+        self.sam_clip = SamClip(self.cfg.sam_clip)
+        self.sam_clip_ae = Autoencoder(self.cfg.encoder_hidden_dims, self.cfg.decoder_hidden_dims).cuda()
+    def configure_optimizers(self):
+        optim = self.geometry.optimizer
+        if hasattr(self, "merged_optimizer"):
+            return [optim]
+        if hasattr(self.cfg.optimizer, "name"):
+            net_optim = parse_optimizer(self.cfg.optimizer, self)
+            optim = self.geometry.merge_optimizer(net_optim)
+            self.merged_optimizer = True
+        else:
+            self.merged_optimizer = False
+        return [optim]
+    def on_save_checkpoint(self, checkpoint):
+        if 'optimizer_states' in checkpoint.keys():
+            del checkpoint['optimizer_states']
+        del_keys = [k for k in checkpoint['state_dict'].keys() if 'sam' in k]
+        for k in del_keys:
+            del checkpoint['state_dict'][k]
+    def forward(self, batch: Dict[str, Any]) -> Dict[str, Any]:
+        self.geometry.update_learning_rate(self.global_step)
+        outputs = self.renderer.batch_forward(batch)
+        return outputs
+    def on_fit_start(self) -> None:
+        super().on_fit_start()
+    def training_step(self, batch, batch_idx):
+        self.geometry.noise_ratio = self.C(self.cfg.xyz_noise_ratio)
+        if random.random() < self.C(self.cfg.drop_ooi_ratio):
+            self.geometry._opacity_mask = (sum(self.geometry.ooi_masks)==0).float()
+        else:
+            self.geometry._opacity_mask = None
+        if self.true_global_step > 0 and self.true_global_step == self.cfg.distill_lang_freq :  # finish rgb phase
+            self.distill_language_feature()
+        if self.true_global_step == self.cfg.outpaint_step:
+            self.outpaint()
+        apply_rotate = False
+        if self.true_global_step > self.cfg.distill_lang_freq:
+            apply_rotate = random.random() < 0.5
+            self.geometry.random_rotate(self.cfg.rotate_aug_scale, apply_rotate)
+        opt = self.optimizers()
+        out = self(batch)
+        visibility_filter = out["visibility_filter"]
+        radii = out["radii"]
+        guidance_inp = out["comp_rgb"]
+        viewspace_point_tensor = out["viewspace_points"]
+        if self.geometry._opacity_mask is None:
+            pu = self.prompt_utils
+        else:
+            pu = self.bg_prompt_utils
+        guidance_out = self.guidance(
+            guidance_inp, pu, **batch, rgb_as_latents=False
+        )
+        loss_sds = 0.0
+        loss = 0.0
+        self.log(
+            "gauss_num",
+            int(self.geometry.get_xyz.shape[0]),
+            on_step=True,
+            on_epoch=True,
+            prog_bar=True,
+            logger=True,
+        )
+        if self.cfg.loss["lambda_ref"] > 0.0:
+            ref_img = self.cfg.geometry.geometry_convert_from[len("depth:"):]
+            ref_img = torch.tensor(np.array(Image.open(ref_img).resize((self.dataset.cfg.width, self.dataset.cfg.height)))[None] / 255, device = out['comp_rgb'].device)
+            bg_ref_img = torch.tensor(self.geometry.bg_image[None] / 255, device = out['comp_rgb'].device)
+            bg_ref_img_mask = torch.from_numpy(self.geometry.bg_image_mask[None, ..., None].astype(float)).cuda()
+            if self.geometry._opacity_mask is None:
+                if not apply_rotate:
+                    l1loss = torch.nn.L1Loss()(out['comp_rgb'][0:1], ref_img)    # only calculate the first view (zero view)
+                    self.log(f"train/recon_front_view", l1loss)
+                    loss += l1loss * self.cfg.loss["lambda_ref"]
+                    if self.true_global_step > self.cfg.outpaint_step:
+                        for view_idx in [0, -1]:
+                            self.geometry._opacity_mask = None
+                            sample = self.trainer.val_dataloaders.dataset[view_idx]
+                            for k in sample.keys():
+                                try:
+                                    sample[k] = sample[k].cuda()[None]
+                                except:
+                                    pass
+                            output = self(sample)
+                            rgb = output['comp_rgb']
+                            target = self.outpaint_view[view_idx]
+                            # loss += torch.nn.L1Loss()(rgb, target) * self.cfg.loss["lambda_ref"]
+                            loss += (torch.nn.L1Loss(reduction='none')(rgb, target) * self.outpaint_mask[view_idx]).mean() * self.cfg.loss["lambda_ref"]
+            else:
+                ratio = bg_ref_img_mask.sum() / bg_ref_img_mask.shape[1] /  bg_ref_img_mask.shape[2]
+                l1loss = torch.nn.L1Loss(reduction='none')(out['comp_rgb'][0:1], bg_ref_img) * bg_ref_img_mask   # only calculate the first view (zero view)
+                l1loss = l1loss.mean() / ratio
+                loss += l1loss * self.cfg.loss["lambda_ref"]
+        if self.cfg.loss["lambda_scaling"] > 0.0:
+            scaling_loss = self.geometry.get_scaling.mean()
+            loss += scaling_loss * self.cfg.loss["lambda_scaling"]
+        for name, value in guidance_out.items():
+            self.log(f"train/{name}", value)
+            if name.startswith("loss_"):
+                loss_sds += value * self.C(
+                    self.cfg.loss[name.replace("loss_", "lambda_")]
+                )
+        loss = loss + loss_sds
+        iteration = self.global_step
+        opt.zero_grad()
+        if loss > 0:
+            loss.backward(retain_graph=True)
+        if self.cfg.densify:
+            self.geometry.update_states(
+                iteration,
+                visibility_filter,
+                radii,
+                viewspace_point_tensor,
+            )
+        opt.step()
+        opt.zero_grad(set_to_none=True)
+        self.log("train/loss", loss)
+        return {"loss": loss}
+    def validation_step(self, batch, batch_idx):
+        self.geometry._opacity_mask = None
+        out = self(batch)
+        mask, _ = self.geometry.project_pc(batch['c2w'], H=self.dataset.cfg.height, W=self.dataset.cfg.width)
+        self.save_image_grid(
+            f"it{self.global_step}-val/{batch['index'][0]}.png",
+            [
+                {
+                    "type": "rgb",
+                    "img": out["comp_rgb"][0],
+                    "kwargs": {"data_format": "HWC"},
+                },
+            ]
+            + (
+                [
+                    {
+                        "type": "rgb",
+                        "img": out["comp_normal"][0],
+                        "kwargs": {"data_format": "HWC", "data_range": (0, 1)},
+                    }
+                ]
+                if "comp_normal" in out
+                else []
+            ),
+            name="validation_step",
+            step=self.global_step,
+        )
+    def on_validation_epoch_end(self):
+        self.save_img_sequence(
+            f"it{self.global_step}-val",
+            f"it{self.global_step}-val",
+            "(\d+)\.png",
+            save_format="mp4",
+            fps=30,
+            name="val",
+            step=self.global_step,
+            delete_images=True,
+        )
+    def test_step(self, batch, batch_idx):
+        # remove the random rotation effect!
+        self.geometry.recover_xyzrot()
+        out = self(batch)
+        self.save_image_grid(
+            f"it{self.global_step}-test/{batch['index'][0]}.png",
+            [
+                {
+                    "type": "rgb",
+                    "img": out["comp_rgb"][0],
+                    "kwargs": {"data_format": "HWC"},
+                },
+            ]
+            + [
+                {
+                    "type": "rgb",
+                    "img": out["lang"][0],
+                    "kwargs": {"data_format": "HWC", "data_range": (out["lang"][0].min().item(), out["lang"][0].max().item())},
+                },
+            ],
+            name="test_step",
+            step=self.global_step,
+        )
+        if batch["index"][0] == 0:
+            save_path = self.get_save_path("point_cloud.ply")
+            self.geometry.save_ply(save_path)
+    def on_test_epoch_end(self):
+        self.save_img_sequence(
+            f"it{self.global_step}-test",
+            f"it{self.global_step}-test",
+            "(\d+)\.png",
+            save_format="mp4",
+            fps=30,
+            name="test",
+            step=self.global_step,
+        )
+    def on_load_checkpoint(self, ckpt_dict) -> None:
+        for key in self.state_dict().keys():
+            if 'sam' in key:
+                ckpt_dict["state_dict"][key] = self.state_dict()[key]
+        num_pts = ckpt_dict["state_dict"]["geometry._xyz"].shape[0]
+        pcd = BasicPointCloud(
+            points=np.zeros((num_pts, 3)),
+            colors=np.zeros((num_pts, 3)),
+            normals=np.zeros((num_pts, 3)),
+        )
+        self.geometry.create_from_pcd(pcd, 10)
+        self.geometry.training_setup()
+        super().on_load_checkpoint(ckpt_dict)
+    def outpaint(self) -> None:
+        threestudio.info("Start outpainting.")
+        self.outpaint_view = dict()
+        self.outpaint_mask = dict()
+        cnt = 0
+        for view_idx in [0, -1]:
+            self.geometry._opacity_mask = None
+            sample = self.trainer.val_dataloaders.dataset[view_idx]
+            for k in sample.keys():
+                try:
+                    sample[k] = sample[k].cuda()[None]
+                except:
+                    pass
+            output = self(sample)
+            rgb = (output['comp_rgb'][0] * 255).detach().cpu().numpy().astype(np.uint8)
+            rgb = Image.fromarray(rgb)
+            mask, depth = self.geometry.project_pc(sample['c2w'], H=512, W=512)
+            mask = ~mask[0].cpu().numpy()
+            mask = Image.fromarray(mask)
+            c2w = sample['c2w']
+            rgb, mask = self.geometry.add_pc_from_novel_view(rgb, mask, depth, c2w, save_path=os.path.join(self._save_dir[:-4], f'{cnt}.ply'))
+            rgb.save(os.path.join(self._save_dir[:-4], f"outpaint_{cnt}.png"))
+            mask.save(os.path.join(self._save_dir[:-4], f"mask_{cnt}.png"))
+            cnt += 1
+            self.outpaint_view[view_idx] = torch.tensor(np.array(rgb), device='cuda')[None] / 255
+            self.outpaint_mask[view_idx] = torch.tensor(np.array(mask).astype(float), device='cuda')[None, ..., None]
+    def distill_language_feature(self) -> None:
+        threestudio.info("Start distilling language feature.")
+        self.geometry._opacity_mask = None
+        total_embed = []
+        total_feat = []
+        total_flag = []
+        for idx in trange(0, len(self.trainer.val_dataloaders.dataset), self.cfg.distill_interval):
+            sample = self.trainer.val_dataloaders.dataset[idx]
+            for k in sample.keys():
+                try:
+                    sample[k] = sample[k].cuda()[None]
+                except:
+                    pass
+            output = self(sample)
+            rgb = output['comp_rgb']    #shape: 1, 512, 512, 3
+            rgb = (rgb.permute(0, 3, 1, 2) * 255).type(torch.uint8)
+            try:
+                embed, seg, mask= self.sam_clip(rgb)   # feat's shape: N * H * W
+                total_embed.append(embed)
+                total_feat.append(seg)
+                total_flag.append(idx)
+            except:
+                threestudio.info(f'except caught during language distillation at {idx}')
+                pass
+        # train VAE
+        threestudio.info("Start training autoencoder.")
+        dataset = Autoencoder_dataset(torch.cat(total_embed, 0).float().numpy())
+        dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=0, drop_last=False)
+        optimizer = torch.optim.Adam(self.sam_clip_ae.parameters(), lr=self.cfg.sam_clip_ae_lr)
+        self.sam_clip_ae.train()
+        for epoch in tqdm(range(self.cfg.ae_epoch)):
+            for idx,  data in enumerate(dataloader):
+                data = data.cuda()
+                mid = self.sam_clip_ae.encode(data)
+                _data = self.sam_clip_ae.decode(mid)
+                l2loss = l2_loss(_data, data)
+                cosloss = cos_loss(_data, data)
+                loss = l2loss + cosloss * 0.001
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+        self.sam_clip_ae.eval()
+        mids = dict()
+        with torch.no_grad():
+            zero_tensor = torch.zeros([1, 512], dtype=float)
+            for idx, seg, embed in zip(total_flag, total_feat, total_embed):
+                embeds = torch.cat([embed, zero_tensor], 0).float().cuda()
+                embeds = self.sam_clip_ae.encode(embeds)
+                mid = embeds[seg[:]].squeeze(0).reshape(self.dataset.cfg.height, self.dataset.cfg.width, -1)
+                mids[idx] = mid
+                rgb = ((mid - mid.min()) / (mid.max() - mid.min())).cpu()
+                if self.sam_clip.cfg.vis_pca_feature:
+                    self.save_image_grid(f"it{self.global_step}-ae/{idx}.png",
+                        [
+                            {
+                                "type": "rgb",
+                                "img": rgb,
+                                "kwargs": {"data_format": "HWC"},
+                            },
+                        ],
+                        name="ae",
+                        step=self.global_step,
+                    )
+            if self.sam_clip.cfg.vis_pca_feature:
+                self.save_img_sequence(
+                    f"it{self.global_step}-ae",
+                    f"it{self.global_step}-ae",
+                    "(\d+)\.png",
+                    save_format="mp4",
+                    fps=30,
+                    name="ae",
+                    step=self.global_step,
+                )
+        threestudio.info("Start training Lang feature.")
+        # distill lang feature
+        self.geometry.lang_training_setup()
+        opt = self.geometry.lang_optimizer
+        idx_list = list(mids.keys())
+        sample_dict = dict()
+        for idx, sample in enumerate(self.trainer.val_dataloaders.dataset):
+            for k in sample.keys():
+                try:
+                    sample[k] = sample[k].cuda()[None]
+                except:
+                    pass
+            sample_dict[idx] = sample
+        for epoch in trange(self.cfg.distill_lang_epoch):
+            random.shuffle(idx_list)
+            for idx in idx_list:
+                sample = sample_dict[idx]
+                lang = self(sample)["lang"]
+                mid = mids[idx][None]
+                loss = l2_loss(mid, lang)
+                opt.zero_grad()
+                loss.backward()
+                opt.step()
+            if (epoch + 1) % 30 == 0:
+                opt.state = collections.defaultdict(dict)
+        self.renderer.training=False
+        with torch.no_grad():
+            lang_min, lang_max = None, None
+            for idx, sample in sample_dict.items():
+                lang = self(sample)["lang"][0]
+                if lang_min is None:
+                    lang_min, lang_max = lang.min().item(), lang.max().item()
+                self.save_image_grid(f"it{self.global_step}-feat/{idx}.png",
+                    [
+                        {
+                            "type": "rgb",
+                            "img": lang,
+                            "kwargs": {"data_format": "HWC", "data_range": (lang_min, lang_max)},
+                        },
+                    ],
+                    name=f"feat",
+                    step=self.global_step,
+                )
+        self.renderer.training=True
+        self.save_img_sequence(
+            f"it{self.global_step}-feat",
+            f"it{self.global_step}-feat",
+            "(\d+)\.png",
+            save_format="mp4",
+            fps=30,
+            name=f"feat",
+            step=self.global_step,
+        )
+        self.geometry.training_setup()
+        threestudio.info("Use Lang feature to crop pts")
+        if self.cfg.crop_with_lang:
+            p = 2
+            if self.geometry._delete_mask is None:
+                self.geometry._delete_mask = torch.ones_like(self.geometry.ooi_masks[0])
+            for ooi_idx, ooi_mask in enumerate(self.geometry.ooi_masks):
+                threestudio.info(self.geometry.ooi_masks[ooi_idx].sum())
+                idx = torch.arange(len(ooi_mask), device='cuda')[ooi_mask.bool()]
+                lang_feat = self.geometry.get_language_feature[ooi_mask.bool()]
+                lang_feat = lang_feat / (lang_feat.norm(2, dim=-1, keepdim=True) + 0.1)
+                original_ooi_mask = ooi_mask.clone()
+                # filter with color by KMeans
+                kmeans = KMeans(n_init='auto', n_clusters=10)
+                kmeans.fit(lang_feat.detach().cpu())
+                labels = kmeans.labels_
+                _ = [(labels==i).sum() for i in np.unique(labels)]
+                max_label = _.index(max(_))
+                dist = ((kmeans.cluster_centers_ - kmeans.cluster_centers_[max_label:max_label+1]) **2).sum(-1)**.5
+                for label, num in enumerate(_):
+                    if dist[label] > 0.3:
+                        ooi_mask[idx[labels == label]] = False
+                        self.geometry._delete_mask[idx[labels == label]] = 0.
+                p = 1
+                # filter with color by Gaussian
+                mean, std = lang_feat.mean(0), lang_feat.std(0)
+                outlier = torch.logical_or(lang_feat < mean -  p * std, lang_feat > mean + p * std).sum(-1) > 0
+                ooi_mask[idx[outlier]] = False
+                self.geometry._delete_mask[idx[outlier]] = 0.
+                p = 3
+                # filter with RGB by Gaussian
+                rgb =self.geometry.get_features[original_ooi_mask.bool()][:, 0]
+                mean, std = rgb.mean(0), rgb.std(0)
+                outlier = torch.logical_or(rgb < mean -  p * std, rgb > mean + p * std).sum(-1) > 0
+                ooi_mask[idx[outlier]] = False
+                self.geometry._delete_mask[idx[outlier]] = 0.
+    def load_state_dict(self, state_dict, strict=True):
+        i = 0
+        while 1:
+            if f'geometry.ooi_masks_{i}' not in state_dict.keys():
+                break
+            self.geometry.register_buffer(f'ooi_masks_{i}', state_dict[f'geometry.ooi_masks_{i}'])
+            i += 1
+        self.geometry.register_buffer('_delete_mask', state_dict['geometry._delete_mask'])
+        return super().load_state_dict(state_dict, strict)

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/utils/__init__.py ADDED Viewed

File without changes

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/utils/ae.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import os
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset
+class Autoencoder_dataset(Dataset):
+    def __init__(self, data):
+        self.data = data
+    def __getitem__(self, index):
+        data = torch.tensor(self.data[index])
+        return data
+    def __len__(self):
+        return self.data.shape[0]
+class Autoencoder(nn.Module):
+    def __init__(self, encoder_hidden_dims, decoder_hidden_dims):
+        super(Autoencoder, self).__init__()
+        encoder_layers = []
+        for i in range(len(encoder_hidden_dims)):
+            if i == 0:
+                encoder_layers.append(nn.Linear(512, encoder_hidden_dims[i]))
+            else:
+                encoder_layers.append(torch.nn.GroupNorm(2, encoder_hidden_dims[i-1]))
+                # encoder_layers.append(torch.nn.BatchNorm1d(encoder_hidden_dims[i-1]))
+                encoder_layers.append(nn.ReLU())
+                encoder_layers.append(nn.Linear(encoder_hidden_dims[i-1], encoder_hidden_dims[i]))
+        self.encoder = nn.ModuleList(encoder_layers)
+        decoder_layers = []
+        for i in range(len(decoder_hidden_dims)):
+            if i == 0:
+                decoder_layers.append(nn.Linear(encoder_hidden_dims[-1], decoder_hidden_dims[i]))
+            else:
+                encoder_layers.append(torch.nn.GroupNorm(2, decoder_hidden_dims[i-1]))
+                # encoder_layers.append(torch.nn.BatchNorm1d(decoder_hidden_dims[i-1]))
+                decoder_layers.append(nn.ReLU())
+                decoder_layers.append(nn.Linear(decoder_hidden_dims[i-1], decoder_hidden_dims[i]))
+        self.decoder = nn.ModuleList(decoder_layers)
+    def forward(self, x):
+        for m in self.encoder:
+            x = m(x)
+        x = x / x.norm(2, dim=-1, keepdim=True)
+        for m in self.decoder:
+            x = m(x)
+        # x = x / x.norm(2, dim=-1, keepdim=True)
+        return x
+    def encode(self, x):
+        for m in self.encoder:
+            x = m(x)
+        x = x / x.norm(2, dim=-1, keepdim=True)
+        return x
+    def decode(self, x):
+        for m in self.decoder:
+            x = m(x)
+        # x = x / x.norm(2, dim=-1, keepdim=True)
+        return x

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/custom/threestudio-3dgs/utils/sam_clip.py ADDED Viewed

	@@ -0,0 +1,366 @@

+from dataclasses import dataclass, field
+import pytorch_lightning as pl
+from threestudio.utils.config import parse_structured
+from threestudio.utils.base import Updateable, update_if_possible
+from threestudio.utils.saving import SaverMixin
+from threestudio.utils.typing import *
+import open_clip
+import torch
+import torchvision
+from torch import nn
+import cv2
+import numpy as np
+from sklearn.decomposition import PCA
+from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
+from mobile_sam import sam_model_registry as m_sam_model_registry
+from mobile_sam import SamAutomaticMaskGenerator as m_SamAutomaticMaskGenerator
+from mobile_sam import SamPredictor as m_SamPredictor
+@dataclass
+class OpenCLIPNetworkConfig:
+    _target: Type = field(default_factory=lambda: OpenCLIPNetwork)
+    clip_model_type: str = "ViT-B-16"
+    clip_model_pretrained: str = "laion2b_s34b_b88k"
+    clip_n_dims: int = 512
+    negatives: Tuple[str] = ("object", "things", "stuff", "texture")
+    positives: Tuple[str] = ("",)
+class OpenCLIPNetwork(nn.Module):
+    def __init__(self, config: OpenCLIPNetworkConfig):
+        super().__init__()
+        self.config = config
+        self.process = torchvision.transforms.Compose(
+            [
+                torchvision.transforms.Resize((224, 224)),
+                torchvision.transforms.Normalize(
+                    mean=[0.48145466, 0.4578275, 0.40821073],
+                    std=[0.26862954, 0.26130258, 0.27577711],
+                ),
+            ]
+        )
+        model, _, _ = open_clip.create_model_and_transforms(
+            self.config.clip_model_type,  # e.g., ViT-B-16
+            pretrained=self.config.clip_model_pretrained,  # e.g., laion2b_s34b_b88k
+            precision="fp16",
+        )
+        model.eval()
+        self.tokenizer = open_clip.get_tokenizer(self.config.clip_model_type)
+        self.model = model.to("cuda")
+        self.clip_n_dims = self.config.clip_n_dims
+        self.positives = self.config.positives
+        self.negatives = self.config.negatives
+        with torch.no_grad():
+            tok_phrases = torch.cat([self.tokenizer(phrase) for phrase in self.positives]).to("cuda")
+            self.pos_embeds = model.encode_text(tok_phrases)
+            tok_phrases = torch.cat([self.tokenizer(phrase) for phrase in self.negatives]).to("cuda")
+            self.neg_embeds = model.encode_text(tok_phrases)
+        self.pos_embeds /= self.pos_embeds.norm(dim=-1, keepdim=True)
+        self.neg_embeds /= self.neg_embeds.norm(dim=-1, keepdim=True)
+        assert (
+            self.pos_embeds.shape[1] == self.neg_embeds.shape[1]
+        ), "Positive and negative embeddings must have the same dimensionality"
+        assert (
+            self.pos_embeds.shape[1] == self.clip_n_dims
+        ), "Embedding dimensionality must match the model dimensionality"
+    @property
+    def name(self) -> str:
+        return "openclip_{}_{}".format(self.config.clip_model_type, self.config.clip_model_pretrained)
+    @property
+    def embedding_dim(self) -> int:
+        return self.config.clip_n_dims
+    def gui_cb(self,element):
+        self.set_positives(element.value.split(";"))
+    def set_positives(self, text_list):
+        self.positives = text_list
+        with torch.no_grad():
+            tok_phrases = torch.cat([self.tokenizer(phrase) for phrase in self.positives]).to("cuda")
+            self.pos_embeds = self.model.encode_text(tok_phrases)
+        self.pos_embeds /= self.pos_embeds.norm(dim=-1, keepdim=True)
+    def get_relevancy(self, embed: torch.Tensor, positive_id: int) -> torch.Tensor:
+        phrases_embeds = torch.cat([self.pos_embeds, self.neg_embeds], dim=0)
+        p = phrases_embeds.to(embed.dtype)  # phrases x 512
+        output = torch.mm(embed, p.T)  # rays x phrases
+        positive_vals = output[..., positive_id : positive_id + 1]  # rays x 1
+        negative_vals = output[..., len(self.positives) :]  # rays x N_phrase
+        repeated_pos = positive_vals.repeat(1, len(self.negatives))  # rays x N_phrase
+        sims = torch.stack((repeated_pos, negative_vals), dim=-1)  # rays x N-phrase x 2
+        softmax = torch.softmax(10 * sims, dim=-1)  # rays x n-phrase x 2
+        best_id = softmax[..., 0].argmin(dim=1)  # rays x 2
+        return torch.gather(softmax, 1, best_id[..., None, None].expand(best_id.shape[0], len(self.negatives), 2))[:, 0, :]
+    def encode_image(self, input):
+        processed_input = self.process(input).half()
+        return self.model.encode_image(processed_input)
+def get_seg_img(mask, image):
+    image = image.copy()
+    image[mask['segmentation']==0] = np.array([0, 0,  0], dtype=np.uint8)
+    x,y,w,h = np.int32(mask['bbox'])
+    seg_img = image[y:y+h, x:x+w, ...]
+    return seg_img
+def pad_img(img):
+    h, w, _ = img.shape
+    l = max(w,h)
+    pad = np.zeros((l,l,3), dtype=np.uint8)
+    if h > w:
+        pad[:,(h-w)//2:(h-w)//2 + w, :] = img
+    else:
+        pad[(w-h)//2:(w-h)//2 + h, :, :] = img
+    return pad
+def filter(keep: torch.Tensor, masks_result) -> None:
+    keep = keep.int().cpu().numpy()
+    result_keep = []
+    for i, m in enumerate(masks_result):
+        if i in keep: result_keep.append(m)
+    return result_keep
+def sava_numpy(save_path, data):
+    save_path_s = save_path + '_s.npy'
+    save_path_f = save_path + '_f.npy'
+    np.save(save_path_s, data['seg_maps'].numpy())
+    np.save(save_path_f, data['feature'].numpy())
+def mask_nms(masks, scores, iou_thr=0.7, score_thr=0.1, inner_thr=0.2, **kwargs):
+    """
+    Perform mask non-maximum suppression (NMS) on a set of masks based on their scores.
+    Args:
+        masks (torch.Tensor): has shape (num_masks, H, W)
+        scores (torch.Tensor): The scores of the masks, has shape (num_masks,)
+        iou_thr (float, optional): The threshold for IoU.
+        score_thr (float, optional): The threshold for the mask scores.
+        inner_thr (float, optional): The threshold for the overlap rate.
+        **kwargs: Additional keyword arguments.
+    Returns:
+        selected_idx (torch.Tensor): A tensor representing the selected indices of the masks after NMS.
+    """
+    scores, idx = scores.sort(0, descending=True)
+    num_masks = idx.shape[0]
+    masks_ord = masks[idx.view(-1), :]
+    masks_area = torch.sum(masks_ord, dim=(1, 2), dtype=torch.float)
+    iou_matrix = torch.zeros((num_masks,) * 2, dtype=torch.float, device=masks.device)
+    inner_iou_matrix = torch.zeros((num_masks,) * 2, dtype=torch.float, device=masks.device)
+    for i in range(num_masks):
+        for j in range(i, num_masks):
+            intersection = torch.sum(torch.logical_and(masks_ord[i], masks_ord[j]), dtype=torch.float)
+            union = torch.sum(torch.logical_or(masks_ord[i], masks_ord[j]), dtype=torch.float)
+            iou = intersection / union
+            iou_matrix[i, j] = iou
+            # select mask pairs that may have a severe internal relationship
+            if intersection / masks_area[i] < 0.5 and intersection / masks_area[j] >= 0.85:
+                inner_iou = 1 - (intersection / masks_area[j]) * (intersection / masks_area[i])
+                inner_iou_matrix[i, j] = inner_iou
+            if intersection / masks_area[i] >= 0.85 and intersection / masks_area[j] < 0.5:
+                inner_iou = 1 - (intersection / masks_area[j]) * (intersection / masks_area[i])
+                inner_iou_matrix[j, i] = inner_iou
+    iou_matrix.triu_(diagonal=1)
+    iou_max, _ = iou_matrix.max(dim=0)
+    inner_iou_matrix_u = torch.triu(inner_iou_matrix, diagonal=1)
+    inner_iou_max_u, _ = inner_iou_matrix_u.max(dim=0)
+    inner_iou_matrix_l = torch.tril(inner_iou_matrix, diagonal=1)
+    inner_iou_max_l, _ = inner_iou_matrix_l.max(dim=0)
+    keep = iou_max <= iou_thr
+    keep_conf = scores > score_thr
+    keep_inner_u = inner_iou_max_u <= 1 - inner_thr
+    keep_inner_l = inner_iou_max_l <= 1 - inner_thr
+    # If there are no masks with scores above threshold, the top 3 masks are selected
+    if keep_conf.sum() == 0:
+        index = scores.topk(3).indices
+        keep_conf[index, 0] = True
+    if keep_inner_u.sum() == 0:
+        index = scores.topk(3).indices
+        keep_inner_u[index, 0] = True
+    if keep_inner_l.sum() == 0:
+        index = scores.topk(3).indices
+        keep_inner_l[index, 0] = True
+    keep *= keep_conf
+    keep *= keep_inner_u
+    keep *= keep_inner_l
+    selected_idx = idx[keep]
+    return selected_idx
+def masks_update(*args, **kwargs):
+    # remove redundant masks based on the scores and overlap rate between masks
+    masks_new = ()
+    for masks_lvl in (args):
+        seg_pred =  torch.from_numpy(np.stack([m['segmentation'] for m in masks_lvl], axis=0))
+        iou_pred = torch.from_numpy(np.stack([m['predicted_iou'] for m in masks_lvl], axis=0))
+        stability = torch.from_numpy(np.stack([m['stability_score'] for m in masks_lvl], axis=0))
+        scores = stability * iou_pred
+        keep_mask_nms = mask_nms(seg_pred, scores, **kwargs)
+        masks_lvl = filter(keep_mask_nms, masks_lvl)
+        masks_new += (masks_lvl,)
+    return masks_new
+def sam_encoder(image, mask_generator):
+    image = image.detach().cpu()
+    image = cv2.cvtColor(image[0].permute(1,2,0).numpy().astype(np.uint8), cv2.COLOR_BGR2RGB)
+    # pre-compute masks
+    masks_l = mask_generator.generate(image)
+    # pre-compute postprocess
+    masks_l = masks_update(masks_l, iou_thr=0.8, score_thr=0.7, inner_thr=0.5)[0]
+    def mask2segmap(masks, image):
+        seg_img_list = []
+        seg_map = -np.ones(image.shape[:2], dtype=np.int32)
+        for i in range(len(masks)):
+            mask = masks[i]
+            seg_img = get_seg_img(mask, image)
+            pad_seg_img = cv2.resize(pad_img(seg_img), (224,224))
+            seg_img_list.append(pad_seg_img)
+            seg_map[masks[i]['segmentation']] = i
+        seg_imgs = np.stack(seg_img_list, axis=0) # b,H,W,3
+        seg_imgs = (torch.from_numpy(seg_imgs.astype("float32")).permute(0,3,1,2) / 255.0).to('cuda')
+        return seg_imgs, seg_map
+    seg_images, seg_maps = {}, {}
+    seg_images['l'], seg_maps['l'] = mask2segmap(masks_l, image)
+    # 0:default 1:s 2:m 3:l
+    return seg_images, seg_maps
+class SamClip(pl.LightningModule, Updateable, SaverMixin):
+    @dataclass
+    class Config:
+        clip_model_type: str = "ViT-B-16"
+        clip_model_pretrained: str = "laion2b_s34b_b88k"
+        clip_n_dims: int = 512
+        sam_ckpt_path: str = "ckpts/sam_vit_h_4b8939.pth"
+        feature_level: int = 3
+        vis_pca_feature: bool = True
+        use_mobile_sam: bool = True
+    cfg: Config
+    def __init__(self, cfg) -> None:
+        super().__init__()
+        self.cfg = parse_structured(self.Config, cfg)
+        self.model = OpenCLIPNetwork(OpenCLIPNetworkConfig)
+        self.clip_n_dims = self.cfg.clip_n_dims
+        self.tokenizer = open_clip.get_tokenizer(self.cfg.clip_model_type)
+        sam = sam_model_registry["vit_h"](checkpoint=self.cfg.sam_ckpt_path).to('cuda')
+        self.mask_generator = SamAutomaticMaskGenerator(
+            model=sam,
+            points_per_side=32,
+            points_per_batch=64,
+            pred_iou_thresh=0.7,
+            box_nms_thresh=0.7,
+            stability_score_thresh=0.85,
+            crop_n_layers=1,
+            crop_n_points_downscale_factor=1,
+            min_mask_region_area=100,
+        )
+        model_type = "vit_t"
+        sam_checkpoint = "./ckpts/mobile_sam.pt"
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        mobile_sam = m_sam_model_registry[model_type](checkpoint=sam_checkpoint)
+        mobile_sam.to(device=device)
+        mobile_sam.eval()
+        self.m_mask_generator = m_SamAutomaticMaskGenerator(mobile_sam)
+        # self.estimator = PCA(n_components=3)
+        # self.has_fit = False
+        self.mask_generator.predictor.model.to('cuda')
+        self.m_mask_generator.predictor.model.to('cuda')
+    def _embed_clip_sam_tiles(self, image, sam_encoder):
+        aug_imgs = torch.cat([image])
+        if self.cfg.use_mobile_sam:
+            seg_images, seg_map = sam_encoder(aug_imgs, self.m_mask_generator)
+        else:
+            seg_images, seg_map = sam_encoder(aug_imgs, self.mask_generator)
+        clip_embeds = {}
+        # types = ['default', 's', 'm', 'l']
+        types = ['l']
+        for mode in types:
+            tiles = seg_images[mode]
+            tiles = tiles.to("cuda")
+            with torch.no_grad():
+                clip_embed = self.model.encode_image(tiles)
+            clip_embed /= clip_embed.norm(dim=-1, keepdim=True)
+            clip_embeds[mode] = clip_embed.detach().cpu().half()
+        return clip_embeds, seg_map
+    def forward(self, img):
+        embed_size=512
+        seg_maps = []
+        total_lengths = []
+        timer = 0
+        img_embeds = torch.zeros((len(img), 100, embed_size))
+        seg_maps = torch.zeros((len(img), 1, *img.shape[2:]))
+        img_embed, seg_map = self._embed_clip_sam_tiles(img, sam_encoder)
+        lengths = [len(v) for k, v in img_embed.items()]
+        total_length = sum(lengths)
+        # total_lengths.append(total_length)
+        # if total_length > img_embeds.shape[1]:
+        #     pad = total_length - img_embeds.shape[1]
+        #     img_embeds = torch.cat([
+        #         img_embeds,
+        #         torch.zeros((len(image_list), pad, embed_size))
+        #     ], dim=1)
+        # img_embed = torch.cat([v for k, v in img_embed.items()], dim=0)
+        # assert img_embed.shape[0] == total_length
+        img_embeds[0, :total_length] = img_embed['l']
+        # seg_map_tensor = []
+        # lengths_cumsum = lengths.copy()
+        # for j in range(1, len(lengths)):
+        #     lengths_cumsum[j] += lengths_cumsum[j-1]
+        # for j, (k, v) in enumerate(seg_map.items()):
+        #     if j == 0:
+        #         seg_map_tensor.append(torch.from_numpy(v))
+        #         continue
+        #     assert v.max() == lengths[j] - 1, f"{j}, {v.max()}, {lengths[j]-1}"
+        #     v[v != -1] += lengths_cumsum[j-1]
+        #     seg_map_tensor.append(torch.from_numpy(v))
+        # seg_map = torch.stack(seg_map_tensor, dim=0)
+        seg_maps[0] = torch.from_numpy(seg_map['l'])
+        # self.mask_generator.predictor.model.to('cpu')
+        feature_map = img_embeds[0]   # 300, 512
+        seg_map = seg_maps[0]    # 4, 512, 512
+        image_height, image_width = seg_map.shape[1:]
+        y, x = torch.meshgrid(torch.arange(0, image_height), torch.arange(0, image_width))
+        x = x.reshape(-1, 1)
+        y = y.reshape(-1, 1)
+        seg = seg_map[:, y, x].squeeze(-1).long()
+        mask = seg != -1
+        point_feature1 = feature_map[seg[:]].squeeze(0)
+        mask = mask[:].reshape(1, image_height, image_width)
+        return img_embed['l'], seg, mask
+        # point_feature = point_feature1.reshape(image_height, image_width, -1).permute(2, 0, 1)
+        # return img_embed['l'], point_feature, mask

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/bear_background.png ADDED Viewed

Git LFS Details

SHA256: 950496f640077d2d1b3f28cf8f2ecaeb56bc641b2c19f6a8107e6d428f5da17f
Pointer size: 132 Bytes
Size of remote file: 1.24 MB

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/bear_composite.png ADDED Viewed

Git LFS Details

SHA256: 1445582663ddb516915adbfac9f33ba2d95e554d76a1f4b164ef9f119061be74
Pointer size: 132 Bytes
Size of remote file: 1.13 MB

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/bear_layers.png ADDED Viewed

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/boy_background.png ADDED Viewed

Git LFS Details

SHA256: e7221341ebcc6084cf6ef9521324bea45658cebb9a3a4de487ef0f17bd83235a
Pointer size: 132 Bytes
Size of remote file: 1.26 MB

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/boy_composite.png ADDED Viewed

Git LFS Details

SHA256: 4f9a04bee8f5de415251558a4401c7f597ee3e6c2dc989ddf974a9104243c8dc
Pointer size: 132 Bytes
Size of remote file: 1.17 MB

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/boy_layers.png ADDED Viewed

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/corgi_background.png ADDED Viewed

Git LFS Details

SHA256: 2e7c7c2ab126d4d26c2258160d859e03291ef745fae2e05540ac60bc8976e7d9
Pointer size: 132 Bytes
Size of remote file: 1.34 MB

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/corgi_composite.png ADDED Viewed

Git LFS Details

SHA256: 6c00e3156ad6e929df5abe236b7d98772b13142551e740866317e5e829f3bf03
Pointer size: 132 Bytes
Size of remote file: 1.22 MB

000000000017.1/gs-sds-generation/3DitScene@20250207-015119/code/examples/corgi_layers.png ADDED Viewed