Add files using upload-large-folder tool
Browse files- .gitattributes +5 -0
- Llama-2-7b-hf/.metadata +3 -0
- Llama-2-7b-hf/__0_0.distcp +3 -0
- Llama-2-7b-hf/__1_0.distcp +3 -0
- Llama-2-7b-hf/__2_0.distcp +3 -0
- Llama-2-7b-hf/__3_0.distcp +3 -0
- Llama-2-7b-hf/config.json +27 -0
- Llama-2-7b-hf/train_params.yaml +57 -0
.gitattributes
CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
Llama-2-7b-hf/.metadata filter=lfs diff=lfs merge=lfs -text
|
37 |
+
Llama-2-7b-hf/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
38 |
+
Llama-2-7b-hf/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
39 |
+
Llama-2-7b-hf/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
40 |
+
Llama-2-7b-hf/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
Llama-2-7b-hf/.metadata
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e842139133e778fb3c5e840a6b95ac1e6da912cb86a4fe0c57f0cb4d7335e30
|
3 |
+
size 256920
|
Llama-2-7b-hf/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffcf8c910d072a5027e4623d11661e7ce87244320cd94280329fb8e76ff50ed7
|
3 |
+
size 6738791764
|
Llama-2-7b-hf/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70a6152e37b688d330d7657bb3ce5364b388a3d4cd03486a539be6b3dbbed322
|
3 |
+
size 6738791764
|
Llama-2-7b-hf/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33a30ee4e4c8ea4d1b1f326ec98483711dde2ca2f32def134c85a671be194551
|
3 |
+
size 6738791764
|
Llama-2-7b-hf/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75154908cf53670dbe051d6222679d49996909606f8a14b7f6d10ee0cbc2b06e
|
3 |
+
size 6738693460
|
Llama-2-7b-hf/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "meta-llama/Llama-2-7b-hf",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 4096,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 11008,
|
13 |
+
"max_position_embeddings": 4096,
|
14 |
+
"model_type": "llama",
|
15 |
+
"num_attention_heads": 32,
|
16 |
+
"num_hidden_layers": 32,
|
17 |
+
"num_key_value_heads": 32,
|
18 |
+
"pretraining_tp": 1,
|
19 |
+
"rms_norm_eps": 1e-05,
|
20 |
+
"rope_scaling": null,
|
21 |
+
"rope_theta": 10000.0,
|
22 |
+
"tie_word_embeddings": false,
|
23 |
+
"torch_dtype": "float32",
|
24 |
+
"transformers_version": "4.35.2",
|
25 |
+
"use_cache": true,
|
26 |
+
"vocab_size": 32004
|
27 |
+
}
|
Llama-2-7b-hf/train_params.yaml
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
add_ctxemb: 'False'
|
2 |
+
add_vocab: 'False'
|
3 |
+
all_gather: 'true'
|
4 |
+
batch_size_training: '16'
|
5 |
+
checkpoint_type: StateDictType.SHARDED_STATE_DICT
|
6 |
+
clipping_norm: '-1.0'
|
7 |
+
compare: 'False'
|
8 |
+
cpu_np_head: 'False'
|
9 |
+
ctx_proj_layer: 'False'
|
10 |
+
ctx_use_peft: 'False'
|
11 |
+
dataset: llava_selfrag_single_dataset
|
12 |
+
dist_checkpoint_folder: full.prompt_llava.context_mask.with_context.without_sp.key_original_epoch2
|
13 |
+
dist_checkpoint_root_folder: /apdcephfs_sh2/share_300000800/user/kaixinma/amylee/RedPajama-Data/img/results
|
14 |
+
enable_fsdp: 'True'
|
15 |
+
freeze_ctx_encoder: 'False'
|
16 |
+
freeze_layers: 'False'
|
17 |
+
freeze_question_encoder: 'False'
|
18 |
+
from_hf: 'False'
|
19 |
+
fsdp_activation_checkpointing: 'True'
|
20 |
+
gamma: '0.85'
|
21 |
+
load_np_head: 'False'
|
22 |
+
low_cpu_fsdp: 'True'
|
23 |
+
lr: 2e-05
|
24 |
+
memory_bank_length: '0'
|
25 |
+
micro_batch_size: '16'
|
26 |
+
mixed_precision: 'True'
|
27 |
+
model_name: meta-llama/Llama-2-7b-hf
|
28 |
+
model_use_peft: 'False'
|
29 |
+
natural_form: 'True'
|
30 |
+
np_weight: '100.0'
|
31 |
+
num_epochs: '3'
|
32 |
+
num_freeze_layers: '1'
|
33 |
+
num_workers_dataloader: '1'
|
34 |
+
one_gpu: 'False'
|
35 |
+
optimizer: AdamW
|
36 |
+
output_dir: peft_checkpoint
|
37 |
+
peft_method: None
|
38 |
+
pure_bf16: 'False'
|
39 |
+
quantization: 'False'
|
40 |
+
question_proj_layer: 'False'
|
41 |
+
resume_epoch: '0'
|
42 |
+
ret_checkpoint_folder: ''
|
43 |
+
ret_first: 'False'
|
44 |
+
retriever: ''
|
45 |
+
run_validation: 'True'
|
46 |
+
save_model: 'True'
|
47 |
+
save_optimizer: 'False'
|
48 |
+
seed: '2'
|
49 |
+
sharding_strategy: ShardingStrategy.FULL_SHARD
|
50 |
+
single: 'False'
|
51 |
+
target_modules: ''
|
52 |
+
token_name: meta-llama/Llama-2-7b-hf
|
53 |
+
train: 'True'
|
54 |
+
use_fast_kernels: 'False'
|
55 |
+
use_fp16: 'False'
|
56 |
+
val_batch_size: '1'
|
57 |
+
weight_decay: '0.0'
|