robertou2 commited on
Commit
1051b75
·
verified ·
1 Parent(s): 27addaf

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -199,4 +199,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
- - PEFT 0.14.0
 
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
+ - PEFT 0.12.0
adapter_config.json CHANGED
@@ -3,8 +3,6 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "microsoft/Phi-3.5-mini-instruct",
5
  "bias": "none",
6
- "eva_config": null,
7
- "exclude_modules": null,
8
  "fan_in_fan_out": false,
9
  "inference_mode": true,
10
  "init_lora_weights": true,
@@ -12,20 +10,19 @@
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
- "lora_alpha": 64,
16
- "lora_bias": false,
17
- "lora_dropout": 0.0001,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
22
- "r": 32,
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "o_proj",
27
- "qkv_proj",
28
  "down_proj",
 
 
29
  "gate_up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "microsoft/Phi-3.5-mini-instruct",
5
  "bias": "none",
 
 
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
 
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
 
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "down_proj",
24
+ "qkv_proj",
25
+ "o_proj",
26
  "gate_up_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:722e0d11c18df2484130fccf458fb994d57d4dee4423f6ff233ab20595ac5492
3
- size 201361312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f038e0934a3168a134372d76747c35963d810535ac44238c96f526ba5d73444d
3
+ size 50365768
added_tokens.json CHANGED
@@ -2,6 +2,7 @@
2
  "<|assistant|>": 32001,
3
  "<|endoftext|>": 32000,
4
  "<|end|>": 32007,
 
5
  "<|placeholder1|>": 32002,
6
  "<|placeholder2|>": 32003,
7
  "<|placeholder3|>": 32004,
 
2
  "<|assistant|>": 32001,
3
  "<|endoftext|>": 32000,
4
  "<|end|>": 32007,
5
+ "<|pad|>": 32011,
6
  "<|placeholder1|>": 32002,
7
  "<|placeholder2|>": 32003,
8
  "<|placeholder3|>": 32004,
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ef9fd6ce3475d48ab32d6a7526e8f236b02b149d6e84fd712103775f348a974
3
- size 402868986
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c2c06a3aa791be14685b5d59899a877c559219322620a519bd1b84ef483910
3
+ size 100878458
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dde5f077a5393538b5aed42dc077f2c26a3ecb3009a6cd8323a0963c172eeafe
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:936b713051d3954452ec3bf4371217942ece2f8826b34ec15d76739514f6eb2c
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a16bc59ca2ad7c9d866e071897b87e0c2309c5def808c0078c92b1caa75df182
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a492b6d65e6e851d97e2025279f088a6a581867a0b4272350b086ae52aef4d06
3
  size 1064
special_tokens_map.json CHANGED
@@ -7,14 +7,14 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|endoftext|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<|endoftext|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|end|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<|pad|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
@@ -98,7 +112,7 @@
98
  "content": "<|end|>",
99
  "single_word": false,
100
  "lstrip": false,
101
- "rstrip": true,
102
  "normalized": false,
103
  "special": true
104
  },
@@ -128,6 +142,15 @@
128
  "rstrip": true,
129
  "normalized": false,
130
  "special": true
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
  "normalizer": {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 2048,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 2048
12
+ },
13
+ "direction": "Left",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 32011,
16
+ "pad_type_id": 0,
17
+ "pad_token": "<|pad|>"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
112
  "content": "<|end|>",
113
  "single_word": false,
114
  "lstrip": false,
115
+ "rstrip": false,
116
  "normalized": false,
117
  "special": true
118
  },
 
142
  "rstrip": true,
143
  "normalized": false,
144
  "special": true
145
+ },
146
+ {
147
+ "id": 32011,
148
+ "content": "<|pad|>",
149
+ "single_word": false,
150
+ "lstrip": false,
151
+ "rstrip": false,
152
+ "normalized": false,
153
+ "special": true
154
  }
155
  ],
156
  "normalizer": {
tokenizer_config.json CHANGED
@@ -87,7 +87,7 @@
87
  "content": "<|end|>",
88
  "lstrip": false,
89
  "normalized": false,
90
- "rstrip": true,
91
  "single_word": false,
92
  "special": true
93
  },
@@ -114,16 +114,24 @@
114
  "rstrip": true,
115
  "single_word": false,
116
  "special": true
 
 
 
 
 
 
 
 
117
  }
118
  },
119
  "bos_token": "<s>",
120
  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
121
  "clean_up_tokenization_spaces": false,
122
- "eos_token": "<|endoftext|>",
123
  "extra_special_tokens": {},
124
  "legacy": false,
125
  "model_max_length": 131072,
126
- "pad_token": "<|endoftext|>",
127
  "padding_side": "left",
128
  "sp_model_kwargs": {},
129
  "tokenizer_class": "LlamaTokenizer",
 
87
  "content": "<|end|>",
88
  "lstrip": false,
89
  "normalized": false,
90
+ "rstrip": false,
91
  "single_word": false,
92
  "special": true
93
  },
 
114
  "rstrip": true,
115
  "single_word": false,
116
  "special": true
117
+ },
118
+ "32011": {
119
+ "content": "<|pad|>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": true
125
  }
126
  },
127
  "bos_token": "<s>",
128
  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
129
  "clean_up_tokenization_spaces": false,
130
+ "eos_token": "<|end|>",
131
  "extra_special_tokens": {},
132
  "legacy": false,
133
  "model_max_length": 131072,
134
+ "pad_token": "<|pad|>",
135
  "padding_side": "left",
136
  "sp_model_kwargs": {},
137
  "tokenizer_class": "LlamaTokenizer",
trainer_state.json CHANGED
@@ -1,240 +1,17 @@
1
  {
2
- "best_global_step": 99,
3
- "best_metric": 0.6646606922149658,
4
- "best_model_checkpoint": "//outputs/task7_microsoft/Phi-3.5-mini-instruct/checkpoint-99",
5
- "epoch": 11.0,
6
  "eval_steps": 500,
7
- "global_step": 99,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.5882352941176471,
14
- "grad_norm": 0.5897260308265686,
15
- "learning_rate": 1e-05,
16
- "loss": 0.8043,
17
- "step": 5
18
- },
19
- {
20
- "epoch": 1.0,
21
- "eval_loss": 0.7620137929916382,
22
- "eval_runtime": 3.3723,
23
- "eval_samples_per_second": 4.448,
24
- "eval_steps_per_second": 0.593,
25
- "step": 9
26
- },
27
- {
28
- "epoch": 1.1176470588235294,
29
- "grad_norm": 0.5304206609725952,
30
- "learning_rate": 9.931806517013612e-06,
31
- "loss": 0.7621,
32
- "step": 10
33
- },
34
- {
35
- "epoch": 1.7058823529411766,
36
- "grad_norm": 0.3766116499900818,
37
- "learning_rate": 9.729086208503174e-06,
38
- "loss": 0.7587,
39
- "step": 15
40
- },
41
- {
42
- "epoch": 2.0,
43
- "eval_loss": 0.7274001240730286,
44
- "eval_runtime": 3.3712,
45
- "eval_samples_per_second": 4.449,
46
- "eval_steps_per_second": 0.593,
47
- "step": 18
48
- },
49
- {
50
- "epoch": 2.235294117647059,
51
- "grad_norm": 0.2997392416000366,
52
- "learning_rate": 9.397368756032445e-06,
53
- "loss": 0.7073,
54
- "step": 20
55
- },
56
- {
57
- "epoch": 2.8235294117647056,
58
- "grad_norm": 0.2789791226387024,
59
- "learning_rate": 8.94570254698197e-06,
60
- "loss": 0.6531,
61
- "step": 25
62
- },
63
- {
64
- "epoch": 3.0,
65
- "eval_loss": 0.7077590823173523,
66
- "eval_runtime": 3.3715,
67
- "eval_samples_per_second": 4.449,
68
- "eval_steps_per_second": 0.593,
69
- "step": 27
70
- },
71
- {
72
- "epoch": 3.3529411764705883,
73
- "grad_norm": 0.28947126865386963,
74
- "learning_rate": 8.386407858128707e-06,
75
- "loss": 0.7029,
76
- "step": 30
77
- },
78
- {
79
- "epoch": 3.9411764705882355,
80
- "grad_norm": 0.2775668799877167,
81
- "learning_rate": 7.734740790612137e-06,
82
- "loss": 0.6033,
83
- "step": 35
84
- },
85
- {
86
- "epoch": 4.0,
87
- "eval_loss": 0.6937279105186462,
88
- "eval_runtime": 3.3722,
89
- "eval_samples_per_second": 4.448,
90
- "eval_steps_per_second": 0.593,
91
- "step": 36
92
- },
93
- {
94
- "epoch": 4.470588235294118,
95
- "grad_norm": 0.24069756269454956,
96
- "learning_rate": 7.008477123264849e-06,
97
- "loss": 0.6549,
98
- "step": 40
99
- },
100
- {
101
- "epoch": 5.0,
102
- "grad_norm": 0.31259897351264954,
103
- "learning_rate": 6.227427435703997e-06,
104
- "loss": 0.694,
105
- "step": 45
106
- },
107
- {
108
- "epoch": 5.0,
109
- "eval_loss": 0.6824610829353333,
110
- "eval_runtime": 3.3719,
111
- "eval_samples_per_second": 4.449,
112
- "eval_steps_per_second": 0.593,
113
- "step": 45
114
- },
115
- {
116
- "epoch": 5.588235294117647,
117
- "grad_norm": 0.25407281517982483,
118
- "learning_rate": 5.412896727361663e-06,
119
- "loss": 0.6865,
120
- "step": 50
121
- },
122
- {
123
- "epoch": 6.0,
124
- "eval_loss": 0.6763660311698914,
125
- "eval_runtime": 3.3717,
126
- "eval_samples_per_second": 4.449,
127
- "eval_steps_per_second": 0.593,
128
- "step": 54
129
- },
130
- {
131
- "epoch": 6.117647058823529,
132
- "grad_norm": 0.2767919600009918,
133
- "learning_rate": 4.587103272638339e-06,
134
- "loss": 0.6081,
135
- "step": 55
136
- },
137
- {
138
- "epoch": 6.705882352941177,
139
- "grad_norm": 0.24282197654247284,
140
- "learning_rate": 3.7725725642960047e-06,
141
- "loss": 0.6577,
142
- "step": 60
143
- },
144
- {
145
- "epoch": 7.0,
146
- "eval_loss": 0.6713435649871826,
147
- "eval_runtime": 3.3715,
148
- "eval_samples_per_second": 4.449,
149
- "eval_steps_per_second": 0.593,
150
- "step": 63
151
- },
152
- {
153
- "epoch": 7.235294117647059,
154
- "grad_norm": 0.1744387447834015,
155
- "learning_rate": 2.991522876735154e-06,
156
- "loss": 0.5941,
157
- "step": 65
158
- },
159
- {
160
- "epoch": 7.823529411764706,
161
- "grad_norm": 0.20212271809577942,
162
- "learning_rate": 2.265259209387867e-06,
163
- "loss": 0.6509,
164
- "step": 70
165
- },
166
- {
167
- "epoch": 8.0,
168
- "eval_loss": 0.6677358150482178,
169
- "eval_runtime": 3.3723,
170
- "eval_samples_per_second": 4.448,
171
- "eval_steps_per_second": 0.593,
172
- "step": 72
173
- },
174
- {
175
- "epoch": 8.352941176470589,
176
- "grad_norm": 0.16120634973049164,
177
- "learning_rate": 1.6135921418712959e-06,
178
- "loss": 0.5923,
179
- "step": 75
180
- },
181
- {
182
- "epoch": 8.941176470588236,
183
- "grad_norm": 0.2318679690361023,
184
- "learning_rate": 1.0542974530180327e-06,
185
- "loss": 0.6438,
186
- "step": 80
187
- },
188
- {
189
- "epoch": 9.0,
190
- "eval_loss": 0.6655252575874329,
191
- "eval_runtime": 3.3713,
192
- "eval_samples_per_second": 4.449,
193
- "eval_steps_per_second": 0.593,
194
- "step": 81
195
- },
196
- {
197
- "epoch": 9.470588235294118,
198
- "grad_norm": 0.19574101269245148,
199
- "learning_rate": 6.026312439675553e-07,
200
- "loss": 0.6359,
201
- "step": 85
202
- },
203
- {
204
- "epoch": 10.0,
205
- "grad_norm": 0.22642117738723755,
206
- "learning_rate": 2.7091379149682683e-07,
207
- "loss": 0.5741,
208
- "step": 90
209
- },
210
- {
211
- "epoch": 10.0,
212
- "eval_loss": 0.6652756929397583,
213
- "eval_runtime": 3.3709,
214
- "eval_samples_per_second": 4.45,
215
- "eval_steps_per_second": 0.593,
216
- "step": 90
217
- },
218
- {
219
- "epoch": 10.588235294117647,
220
- "grad_norm": 0.2666153013706207,
221
- "learning_rate": 6.819348298638839e-08,
222
- "loss": 0.6734,
223
- "step": 95
224
- },
225
- {
226
- "epoch": 11.0,
227
- "eval_loss": 0.6646606922149658,
228
- "eval_runtime": 3.3717,
229
- "eval_samples_per_second": 4.449,
230
- "eval_steps_per_second": 0.593,
231
- "step": 99
232
- }
233
- ],
234
- "logging_steps": 5,
235
- "max_steps": 100,
236
  "num_input_tokens_seen": 0,
237
- "num_train_epochs": 13,
238
  "save_steps": 500,
239
  "stateful_callbacks": {
240
  "TrainerControl": {
@@ -243,13 +20,13 @@
243
  "should_evaluate": false,
244
  "should_log": false,
245
  "should_save": true,
246
- "should_training_stop": false
247
  },
248
  "attributes": {}
249
  }
250
  },
251
- "total_flos": 5.180655973758566e+16,
252
- "train_batch_size": 2,
253
  "trial_name": null,
254
  "trial_params": null
255
  }
 
1
  {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.9481481481481482,
 
5
  "eval_steps": 500,
6
+ "global_step": 66,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
+ "log_history": [],
11
+ "logging_steps": 100,
12
+ "max_steps": 66,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "num_input_tokens_seen": 0,
14
+ "num_train_epochs": 2,
15
  "save_steps": 500,
16
  "stateful_callbacks": {
17
  "TrainerControl": {
 
20
  "should_evaluate": false,
21
  "should_log": false,
22
  "should_save": true,
23
+ "should_training_stop": true
24
  },
25
  "attributes": {}
26
  }
27
  },
28
+ "total_flos": 1.2071086917156864e+16,
29
+ "train_batch_size": 1,
30
  "trial_name": null,
31
  "trial_params": null
32
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02dca0ceed349b196a13d4f3de83bdc1d637c3f4a599aaf1cc66d5744a87d6c3
3
- size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd9f76f1c27b8546e1ede6a4353769298e7cfd4ca0d040b456617a994550ac6
3
+ size 5304