Add files using upload-large-folder tool
Browse files- .gitattributes +9 -0
- Qwen2.5-1.5B/.metadata +3 -0
- Qwen2.5-1.5B/__0_0.distcp +3 -0
- Qwen2.5-1.5B/__1_0.distcp +3 -0
- Qwen2.5-1.5B/__2_0.distcp +3 -0
- Qwen2.5-1.5B/__3_0.distcp +3 -0
- Qwen2.5-1.5B/__4_0.distcp +3 -0
- Qwen2.5-1.5B/__5_0.distcp +3 -0
- Qwen2.5-1.5B/__6_0.distcp +3 -0
- Qwen2.5-1.5B/__7_0.distcp +3 -0
- Qwen2.5-1.5B/train_params.yaml +57 -0
.gitattributes
CHANGED
@@ -33,3 +33,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
Qwen2.5-1.5B/.metadata filter=lfs diff=lfs merge=lfs -text
|
37 |
+
Qwen2.5-1.5B/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
38 |
+
Qwen2.5-1.5B/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
39 |
+
Qwen2.5-1.5B/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
40 |
+
Qwen2.5-1.5B/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
41 |
+
Qwen2.5-1.5B/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
42 |
+
Qwen2.5-1.5B/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
43 |
+
Qwen2.5-1.5B/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
44 |
+
Qwen2.5-1.5B/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
Qwen2.5-1.5B/.metadata
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b09b64774765b2c6e16f29304f5cf3b2c0c48d4a647683fdb36fcfcd2c43eab9
|
3 |
+
size 520434
|
Qwen2.5-1.5B/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0aecd67dbd4f600d25446177fad532d4884c61ef4a60d7101fa0b8f20e73d43e
|
3 |
+
size 888538516
|
Qwen2.5-1.5B/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:169852281a434038b77a0b4e5f30b6e542be8d9ae996507c0c028f7f1448ed66
|
3 |
+
size 888538516
|
Qwen2.5-1.5B/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b06e6b137ee0e0417caeeea476030725c357da85e34a82d8704e798a2349617
|
3 |
+
size 888538516
|
Qwen2.5-1.5B/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09607fd6d6bd5bb7260a16215e1e72a40de99a19788db2efe25fd1ed52c4a984
|
3 |
+
size 888538516
|
Qwen2.5-1.5B/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15aed2aa0661ebd4ab218fb6991bb4aff1504df5c30ed90efac8f2180de460be
|
3 |
+
size 888538516
|
Qwen2.5-1.5B/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4324100ac7283ca7981b6aa9d7decb711723aa349de7d839fd1850dcfa09869
|
3 |
+
size 888538516
|
Qwen2.5-1.5B/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bec0628cf04555fcfb286bf60d2dda1d52c03cb5bf162e905b4747419789c941
|
3 |
+
size 888538516
|
Qwen2.5-1.5B/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41f4fb289d9eee3c4339844e5b946b88aef54b94a075fe37209ca55a8063bc40
|
3 |
+
size 888452500
|
Qwen2.5-1.5B/train_params.yaml
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
add_ctxemb: 'False'
|
2 |
+
add_vocab: 'False'
|
3 |
+
all_gather: 'true'
|
4 |
+
batch_size_training: '16'
|
5 |
+
checkpoint_type: StateDictType.SHARDED_STATE_DICT
|
6 |
+
clipping_norm: '-1.0'
|
7 |
+
compare: 'False'
|
8 |
+
cpu_np_head: 'False'
|
9 |
+
ctx_proj_layer: 'False'
|
10 |
+
ctx_use_peft: 'False'
|
11 |
+
dataset: llava_qwen_selfrag_single_dataset
|
12 |
+
dist_checkpoint_folder: qwen1_5.full.prompt_llava.no_mask.no_context.without_sp_epoch2
|
13 |
+
dist_checkpoint_root_folder: /apdcephfs_sh2/share_300000800/user/kaixinma/amylee/RedPajama-Data/img/results
|
14 |
+
enable_fsdp: 'True'
|
15 |
+
freeze_ctx_encoder: 'False'
|
16 |
+
freeze_layers: 'False'
|
17 |
+
freeze_question_encoder: 'False'
|
18 |
+
from_hf: 'False'
|
19 |
+
fsdp_activation_checkpointing: 'True'
|
20 |
+
gamma: '0.85'
|
21 |
+
load_np_head: 'False'
|
22 |
+
low_cpu_fsdp: 'True'
|
23 |
+
lr: 2e-05
|
24 |
+
memory_bank_length: '0'
|
25 |
+
micro_batch_size: '2'
|
26 |
+
mixed_precision: 'True'
|
27 |
+
model_name: Qwen/Qwen2.5-1.5B
|
28 |
+
model_use_peft: 'False'
|
29 |
+
natural_form: 'False'
|
30 |
+
np_weight: '100.0'
|
31 |
+
num_epochs: '3'
|
32 |
+
num_freeze_layers: '1'
|
33 |
+
num_workers_dataloader: '1'
|
34 |
+
one_gpu: 'False'
|
35 |
+
optimizer: AdamW
|
36 |
+
output_dir: peft_checkpoint
|
37 |
+
peft_method: None
|
38 |
+
pure_bf16: 'False'
|
39 |
+
quantization: 'False'
|
40 |
+
question_proj_layer: 'False'
|
41 |
+
resume_epoch: '0'
|
42 |
+
ret_checkpoint_folder: ''
|
43 |
+
ret_first: 'False'
|
44 |
+
retriever: ''
|
45 |
+
run_validation: 'True'
|
46 |
+
save_model: 'True'
|
47 |
+
save_optimizer: 'False'
|
48 |
+
seed: '2'
|
49 |
+
sharding_strategy: ShardingStrategy.FULL_SHARD
|
50 |
+
single: 'False'
|
51 |
+
target_modules: ''
|
52 |
+
token_name: Qwen/Qwen2.5-1.5B
|
53 |
+
train: 'True'
|
54 |
+
use_fast_kernels: 'False'
|
55 |
+
use_fp16: 'False'
|
56 |
+
val_batch_size: '1'
|
57 |
+
weight_decay: '0.0'
|