HeegerGao commited on
Commit
5b508c7
·
1 Parent(s): f952f27

upload vlm

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ LIBERO_checkpoints filter=lfs diff=lfs merge=lfs -text
37
+ Qwen2.5-VLM filter=lfs diff=lfs merge=lfs -text
38
+ README.md filter=lfs diff=lfs merge=lfs -text
Qwen2.5-VLM/qwen25-dinosiglip-224px+0_5b+stage-finetune+x42/checkpoints/latest-checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc63d956a7e9ee853b2ef74b812454bc71e011cbf65f98c79da5a29e9bf2a61b
3
+ size 3992031074
Qwen2.5-VLM/qwen25-dinosiglip-224px+0_5b+stage-finetune+x42/config.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 42
2
+ hf_token: .hf_token
3
+ run_root_dir: runs
4
+ wandb_project: qwen-vlm
5
+ wandb_entity: null
6
+ model:
7
+ model_id: qwen25-dinosiglip-224px+0_5b
8
+ pretrained_checkpoint: null
9
+ llm_backbone_id: qwen25-0_5b
10
+ inference_mode: false
11
+ vision_backbone_id: dinosiglip-vit-so-224px
12
+ image_resize_strategy: resize-naive
13
+ default_image_size: 224
14
+ image_sequence_len: 1
15
+ training:
16
+ stage: finetune
17
+ epochs: 2
18
+ max_steps: null
19
+ global_batch_size: 32
20
+ per_device_batch_size: 4
21
+ learning_rate: 2.0e-05
22
+ weight_decay: 0.1
23
+ max_grad_norm: 1.0
24
+ lr_scheduler_type: linear-warmup+cosine-decay
25
+ warmup_ratio: 0.03
26
+ enable_gradient_checkpointing: true
27
+ enable_mixed_precision_training: true
28
+ reduce_in_full_precision: false
29
+ sharding_strategy: full-shard
30
+ dataset:
31
+ dataset_id: llava-v15
32
+ dataset_root_dir: dataset/
33
+ finetune_stage_components:
34
+ - llava-v1.5-instruct/llava_v1_5_mix665k.json
35
+ - llava-v1.5-instruct/
36
+ run_id: qwen25-dinosiglip-224px+0_5b+stage-finetune+x42
Qwen2.5-VLM/qwen25-dinosiglip-224px+1_5b+stage-finetune+x42/checkpoints/latest-checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c133f7c5ed5e21469f33dbcb885cdf4533d71f7609cdc4ddab82e3bbe4c28319
3
+ size 8590225842
Qwen2.5-VLM/qwen25-dinosiglip-224px+1_5b+stage-finetune+x42/config.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 42
2
+ hf_token: .hf_token
3
+ run_root_dir: runs
4
+ wandb_project: qwen-vlm
5
+ wandb_entity: null
6
+ model:
7
+ model_id: qwen25-dinosiglip-224px+1_5b
8
+ pretrained_checkpoint: null
9
+ llm_backbone_id: qwen25-1_5b
10
+ inference_mode: false
11
+ vision_backbone_id: dinosiglip-vit-so-224px
12
+ image_resize_strategy: resize-naive
13
+ default_image_size: 224
14
+ image_sequence_len: 1
15
+ training:
16
+ stage: finetune
17
+ epochs: 2
18
+ max_steps: null
19
+ global_batch_size: 32
20
+ per_device_batch_size: 4
21
+ learning_rate: 2.0e-05
22
+ weight_decay: 0.1
23
+ max_grad_norm: 1.0
24
+ lr_scheduler_type: linear-warmup+cosine-decay
25
+ warmup_ratio: 0.03
26
+ enable_gradient_checkpointing: true
27
+ enable_mixed_precision_training: true
28
+ reduce_in_full_precision: false
29
+ sharding_strategy: full-shard
30
+ dataset:
31
+ dataset_id: llava-v15
32
+ dataset_root_dir: dataset/
33
+ finetune_stage_components:
34
+ - llava-v1.5-instruct/llava_v1_5_mix665k.json
35
+ - llava-v1.5-instruct/
36
+ run_id: qwen25-dinosiglip-224px+1_5b+stage-finetune+x42
Qwen2.5-VLM/qwen25-dinosiglip-224px+3b+stage-finetune+x42/checkpoints/latest-checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c29bf8a3f68153dda1289c52f1a3f556e34cb5278f1a3e58645e766534edb70b
3
+ size 15081086078
Qwen2.5-VLM/qwen25-dinosiglip-224px+3b+stage-finetune+x42/config.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 42
2
+ hf_token: .hf_token
3
+ run_root_dir: runs
4
+ wandb_project: qwen-vlm
5
+ wandb_entity: null
6
+ model:
7
+ model_id: qwen25-dinosiglip-224px+3b
8
+ pretrained_checkpoint: null
9
+ llm_backbone_id: qwen25-3b
10
+ inference_mode: false
11
+ vision_backbone_id: dinosiglip-vit-so-224px
12
+ image_resize_strategy: resize-naive
13
+ default_image_size: 224
14
+ image_sequence_len: 1
15
+ training:
16
+ stage: finetune
17
+ epochs: 2
18
+ max_steps: null
19
+ global_batch_size: 32
20
+ per_device_batch_size: 4
21
+ learning_rate: 2.0e-05
22
+ weight_decay: 0.1
23
+ max_grad_norm: 1.0
24
+ lr_scheduler_type: linear-warmup+cosine-decay
25
+ warmup_ratio: 0.03
26
+ enable_gradient_checkpointing: true
27
+ enable_mixed_precision_training: true
28
+ reduce_in_full_precision: false
29
+ sharding_strategy: full-shard
30
+ dataset:
31
+ dataset_id: llava-v15
32
+ dataset_root_dir: dataset/
33
+ finetune_stage_components:
34
+ - llava-v1.5-instruct/llava_v1_5_mix665k.json
35
+ - llava-v1.5-instruct/
36
+ run_id: qwen25-dinosiglip-224px+3b+stage-finetune+x42
Qwen2.5-VLM/qwen25-dinosiglip-224px+7b+stage-finetune+x42/checkpoints/latest-checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d5bcd68dc1fab956f576ce260a26dad67a662c51639553828a910a975a5925b
3
+ size 31996269478
Qwen2.5-VLM/qwen25-dinosiglip-224px+7b+stage-finetune+x42/config.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 42
2
+ hf_token: .hf_token
3
+ run_root_dir: runs
4
+ wandb_project: qwen-vlm
5
+ wandb_entity: null
6
+ model:
7
+ model_id: qwen25-dinosiglip-224px+7b
8
+ pretrained_checkpoint: null
9
+ llm_backbone_id: qwen25-7b
10
+ inference_mode: false
11
+ vision_backbone_id: dinosiglip-vit-so-224px
12
+ image_resize_strategy: resize-naive
13
+ default_image_size: 224
14
+ image_sequence_len: 1
15
+ training:
16
+ stage: finetune
17
+ epochs: 2
18
+ max_steps: null
19
+ global_batch_size: 32
20
+ per_device_batch_size: 4
21
+ learning_rate: 2.0e-05
22
+ weight_decay: 0.1
23
+ max_grad_norm: 1.0
24
+ lr_scheduler_type: linear-warmup+cosine-decay
25
+ warmup_ratio: 0.03
26
+ enable_gradient_checkpointing: true
27
+ enable_mixed_precision_training: true
28
+ reduce_in_full_precision: false
29
+ sharding_strategy: full-shard
30
+ dataset:
31
+ dataset_id: llava-v15
32
+ dataset_root_dir: dataset/
33
+ finetune_stage_components:
34
+ - llava-v1.5-instruct/llava_v1_5_mix665k.json
35
+ - llava-v1.5-instruct/
36
+ run_id: qwen25-dinosiglip-224px+7b+stage-finetune+x42
README.md CHANGED
@@ -1,3 +1,3 @@
1
- ---
2
- license: mit
3
- ---
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8d7a46d41a1a37fe4f0a5f637bf55c649310185329127d8a2204632e480be17
3
+ size 24