Upload folder using huggingface_hub
Browse files- config.json +7 -7
- depth_projector/config.json +1 -1
- depth_projector/model.safetensors +1 -1
- depth_tower/config.json +1 -1
- depth_tower/model.safetensors +1 -1
- llm/config.json +1 -1
- llm/model.safetensors +1 -1
- mm_projector/config.json +1 -1
- mm_projector/model.safetensors +1 -1
- runs/Jun14_06-32-28_job-9be7e87b-6698-4594-a832-c824906e4803-master-0/events.out.tfevents.1749883238.job-9be7e87b-6698-4594-a832-c824906e4803-master-0 +3 -0
- trainer_state.json +0 -0
- vision_tower/config.json +1 -1
- vision_tower/model.safetensors +1 -1
config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"Ubit": 100,
|
3 |
"_attn_implementation_autoset": true,
|
4 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
5 |
"architectures": [
|
6 |
"LlavaLlamaModel"
|
7 |
],
|
@@ -17,7 +17,7 @@
|
|
17 |
"depth_projector": "mlp_downsample_3x3_fix",
|
18 |
"depth_projector_cfg": {
|
19 |
"_attn_implementation_autoset": false,
|
20 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
21 |
"add_cross_attention": false,
|
22 |
"architectures": [
|
23 |
"MultimodalProjector"
|
@@ -85,7 +85,7 @@
|
|
85 |
"depth_tower": "/share/project/zhouenshen/hpfs/ckpt/vlm/paligemma-siglip-so400m-patch14-448",
|
86 |
"depth_tower_cfg": {
|
87 |
"_attn_implementation_autoset": false,
|
88 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
89 |
"add_cross_attention": false,
|
90 |
"architectures": [
|
91 |
"SiglipVisionModel"
|
@@ -185,7 +185,7 @@
|
|
185 |
"interpolate_mode": "linear",
|
186 |
"llm_cfg": {
|
187 |
"_attn_implementation_autoset": false,
|
188 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
189 |
"add_cross_attention": false,
|
190 |
"architectures": [
|
191 |
"Qwen2ForCausalLM"
|
@@ -278,7 +278,7 @@
|
|
278 |
"mm_projector": "mlp_downsample_3x3_fix",
|
279 |
"mm_projector_cfg": {
|
280 |
"_attn_implementation_autoset": false,
|
281 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
282 |
"add_cross_attention": false,
|
283 |
"architectures": [
|
284 |
"MultimodalProjector"
|
@@ -366,7 +366,7 @@
|
|
366 |
"refine_mlp_blocksize": false,
|
367 |
"refine_residual_fp": false,
|
368 |
"refine_row_blocksize": 4,
|
369 |
-
"resume_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
370 |
"row_blocksize": -1,
|
371 |
"row_blocksize_optimizer": 1,
|
372 |
"s2": false,
|
@@ -395,7 +395,7 @@
|
|
395 |
"vision_tower": "/share/project/zhouenshen/hpfs/ckpt/vlm/paligemma-siglip-so400m-patch14-448",
|
396 |
"vision_tower_cfg": {
|
397 |
"_attn_implementation_autoset": false,
|
398 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
399 |
"add_cross_attention": false,
|
400 |
"architectures": [
|
401 |
"SiglipVisionModel"
|
|
|
1 |
{
|
2 |
"Ubit": 100,
|
3 |
"_attn_implementation_autoset": true,
|
4 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model",
|
5 |
"architectures": [
|
6 |
"LlavaLlamaModel"
|
7 |
],
|
|
|
17 |
"depth_projector": "mlp_downsample_3x3_fix",
|
18 |
"depth_projector_cfg": {
|
19 |
"_attn_implementation_autoset": false,
|
20 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/depth_projector",
|
21 |
"add_cross_attention": false,
|
22 |
"architectures": [
|
23 |
"MultimodalProjector"
|
|
|
85 |
"depth_tower": "/share/project/zhouenshen/hpfs/ckpt/vlm/paligemma-siglip-so400m-patch14-448",
|
86 |
"depth_tower_cfg": {
|
87 |
"_attn_implementation_autoset": false,
|
88 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/depth_tower",
|
89 |
"add_cross_attention": false,
|
90 |
"architectures": [
|
91 |
"SiglipVisionModel"
|
|
|
185 |
"interpolate_mode": "linear",
|
186 |
"llm_cfg": {
|
187 |
"_attn_implementation_autoset": false,
|
188 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/llm",
|
189 |
"add_cross_attention": false,
|
190 |
"architectures": [
|
191 |
"Qwen2ForCausalLM"
|
|
|
278 |
"mm_projector": "mlp_downsample_3x3_fix",
|
279 |
"mm_projector_cfg": {
|
280 |
"_attn_implementation_autoset": false,
|
281 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/mm_projector",
|
282 |
"add_cross_attention": false,
|
283 |
"architectures": [
|
284 |
"MultimodalProjector"
|
|
|
366 |
"refine_mlp_blocksize": false,
|
367 |
"refine_residual_fp": false,
|
368 |
"refine_row_blocksize": 4,
|
369 |
+
"resume_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model",
|
370 |
"row_blocksize": -1,
|
371 |
"row_blocksize_optimizer": 1,
|
372 |
"s2": false,
|
|
|
395 |
"vision_tower": "/share/project/zhouenshen/hpfs/ckpt/vlm/paligemma-siglip-so400m-patch14-448",
|
396 |
"vision_tower_cfg": {
|
397 |
"_attn_implementation_autoset": false,
|
398 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/vision_tower",
|
399 |
"add_cross_attention": false,
|
400 |
"architectures": [
|
401 |
"SiglipVisionModel"
|
depth_projector/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
3 |
"architectures": [
|
4 |
"MultimodalProjector"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/depth_projector",
|
3 |
"architectures": [
|
4 |
"MultimodalProjector"
|
5 |
],
|
depth_projector/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 87068272
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:077b99b727dfd07aaf60280baaad16be7560c71c05f0811b58bd5b593aca73d3
|
3 |
size 87068272
|
depth_tower/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
3 |
"architectures": [
|
4 |
"SiglipVisionModel"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/depth_tower",
|
3 |
"architectures": [
|
4 |
"SiglipVisionModel"
|
5 |
],
|
depth_tower/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 826707904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:640aecc60714538d98ecc6ab817454446075b11c86915d836d0e50e7d5cff6a2
|
3 |
size 826707904
|
llm/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
3 |
"architectures": [
|
4 |
"Qwen2ForCausalLM"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/llm",
|
3 |
"architectures": [
|
4 |
"Qwen2ForCausalLM"
|
5 |
],
|
llm/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3086594696
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bfb140450d88d5698d7af3ae8c87af7d7d356fdefe94d66a5ff5b319218071d
|
3 |
size 3086594696
|
mm_projector/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
3 |
"architectures": [
|
4 |
"MultimodalProjector"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/mm_projector",
|
3 |
"architectures": [
|
4 |
"MultimodalProjector"
|
5 |
],
|
mm_projector/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 87068272
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c84546f1ff8591f43cc2862dd80837d90c83e2c4f2e9cce9f5f951793433f13
|
3 |
size 87068272
|
runs/Jun14_06-32-28_job-9be7e87b-6698-4594-a832-c824906e4803-master-0/events.out.tfevents.1749883238.job-9be7e87b-6698-4594-a832-c824906e4803-master-0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c41e42347a0db68c8f9b4a94eabb475bfcbde2b18e864eaf47d46c0f41755ab9
|
3 |
+
size 4633872
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
vision_tower/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator
|
3 |
"architectures": [
|
4 |
"SiglipVisionModel"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/share/project/zhouenshen/hpfs/code/VILA/runs/train/NVILA-Lite-2B-depth-sft-new_placement+new_simulator/model/vision_tower",
|
3 |
"architectures": [
|
4 |
"SiglipVisionModel"
|
5 |
],
|
vision_tower/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 826707904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86c782eccce96224a8e0402a3676bb1dbc3d77b279b3c5392b96e99fe008443d
|
3 |
size 826707904
|