robertou2 commited on
Commit
27addaf
·
verified ·
1 Parent(s): f28818e

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -12,21 +12,21 @@
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
- "lora_alpha": 256,
16
  "lora_bias": false,
17
  "lora_dropout": 0.0001,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
22
- "r": 128,
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "o_proj",
28
- "gate_up_proj",
29
- "qkv_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
+ "lora_alpha": 64,
16
  "lora_bias": false,
17
  "lora_dropout": 0.0001,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
22
+ "r": 32,
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "qkv_proj",
28
+ "down_proj",
29
+ "gate_up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d64777516b5921ef8d744b5ebd508a3756f6ad1854cd4f846edc778151c52cea
3
- size 805341552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:722e0d11c18df2484130fccf458fb994d57d4dee4423f6ff233ab20595ac5492
3
+ size 201361312
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b026fc64ffcd926472bc17624cbfe986d3a3db1c0cb76db198da37defead598a
3
- size 1610828538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef9fd6ce3475d48ab32d6a7526e8f236b02b149d6e84fd712103775f348a974
3
+ size 402868986
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3e64dac40ba173bacca43da1c2f8d06bd604cd010c45a36fa0bb45c3bacdc23
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dde5f077a5393538b5aed42dc077f2c26a3ecb3009a6cd8323a0963c172eeafe
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fe414f5049a6513e25a7549fe53863c7bf47a5a8dee1e296c2165e39af671b8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a16bc59ca2ad7c9d866e071897b87e0c2309c5def808c0078c92b1caa75df182
3
  size 1064
trainer_state.json CHANGED
@@ -1,72 +1,234 @@
1
  {
2
- "best_global_step": 27,
3
- "best_metric": 0.5784786939620972,
4
- "best_model_checkpoint": "//outputs/task7_microsoft/Phi-3.5-mini-instruct/checkpoint-27",
5
- "epoch": 3.0,
6
  "eval_steps": 500,
7
- "global_step": 27,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.5882352941176471,
14
- "grad_norm": 0.38448742032051086,
15
- "learning_rate": 0.0001,
16
- "loss": 0.7574,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_loss": 0.6299339532852173,
22
- "eval_runtime": 3.415,
23
- "eval_samples_per_second": 4.392,
24
- "eval_steps_per_second": 0.586,
25
  "step": 9
26
  },
27
  {
28
  "epoch": 1.1176470588235294,
29
- "grad_norm": 0.339433491230011,
30
- "learning_rate": 9.931806517013612e-05,
31
- "loss": 0.6323,
32
  "step": 10
33
  },
34
  {
35
  "epoch": 1.7058823529411766,
36
- "grad_norm": 0.32222846150398254,
37
- "learning_rate": 9.729086208503174e-05,
38
- "loss": 0.5438,
39
  "step": 15
40
  },
41
  {
42
  "epoch": 2.0,
43
- "eval_loss": 0.5842064619064331,
44
- "eval_runtime": 3.4146,
45
- "eval_samples_per_second": 4.393,
46
- "eval_steps_per_second": 0.586,
47
  "step": 18
48
  },
49
  {
50
  "epoch": 2.235294117647059,
51
- "grad_norm": 0.26292550563812256,
52
- "learning_rate": 9.397368756032445e-05,
53
- "loss": 0.4495,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 2.8235294117647056,
58
- "grad_norm": 0.28129199147224426,
59
- "learning_rate": 8.945702546981969e-05,
60
- "loss": 0.3702,
61
  "step": 25
62
  },
63
  {
64
  "epoch": 3.0,
65
- "eval_loss": 0.5784786939620972,
66
- "eval_runtime": 3.4121,
67
- "eval_samples_per_second": 4.396,
68
- "eval_steps_per_second": 0.586,
69
  "step": 27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  }
71
  ],
72
  "logging_steps": 5,
@@ -86,7 +248,7 @@
86
  "attributes": {}
87
  }
88
  },
89
- "total_flos": 1.4777106488322048e+16,
90
  "train_batch_size": 2,
91
  "trial_name": null,
92
  "trial_params": null
 
1
  {
2
+ "best_global_step": 99,
3
+ "best_metric": 0.6646606922149658,
4
+ "best_model_checkpoint": "//outputs/task7_microsoft/Phi-3.5-mini-instruct/checkpoint-99",
5
+ "epoch": 11.0,
6
  "eval_steps": 500,
7
+ "global_step": 99,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.5882352941176471,
14
+ "grad_norm": 0.5897260308265686,
15
+ "learning_rate": 1e-05,
16
+ "loss": 0.8043,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_loss": 0.7620137929916382,
22
+ "eval_runtime": 3.3723,
23
+ "eval_samples_per_second": 4.448,
24
+ "eval_steps_per_second": 0.593,
25
  "step": 9
26
  },
27
  {
28
  "epoch": 1.1176470588235294,
29
+ "grad_norm": 0.5304206609725952,
30
+ "learning_rate": 9.931806517013612e-06,
31
+ "loss": 0.7621,
32
  "step": 10
33
  },
34
  {
35
  "epoch": 1.7058823529411766,
36
+ "grad_norm": 0.3766116499900818,
37
+ "learning_rate": 9.729086208503174e-06,
38
+ "loss": 0.7587,
39
  "step": 15
40
  },
41
  {
42
  "epoch": 2.0,
43
+ "eval_loss": 0.7274001240730286,
44
+ "eval_runtime": 3.3712,
45
+ "eval_samples_per_second": 4.449,
46
+ "eval_steps_per_second": 0.593,
47
  "step": 18
48
  },
49
  {
50
  "epoch": 2.235294117647059,
51
+ "grad_norm": 0.2997392416000366,
52
+ "learning_rate": 9.397368756032445e-06,
53
+ "loss": 0.7073,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 2.8235294117647056,
58
+ "grad_norm": 0.2789791226387024,
59
+ "learning_rate": 8.94570254698197e-06,
60
+ "loss": 0.6531,
61
  "step": 25
62
  },
63
  {
64
  "epoch": 3.0,
65
+ "eval_loss": 0.7077590823173523,
66
+ "eval_runtime": 3.3715,
67
+ "eval_samples_per_second": 4.449,
68
+ "eval_steps_per_second": 0.593,
69
  "step": 27
70
+ },
71
+ {
72
+ "epoch": 3.3529411764705883,
73
+ "grad_norm": 0.28947126865386963,
74
+ "learning_rate": 8.386407858128707e-06,
75
+ "loss": 0.7029,
76
+ "step": 30
77
+ },
78
+ {
79
+ "epoch": 3.9411764705882355,
80
+ "grad_norm": 0.2775668799877167,
81
+ "learning_rate": 7.734740790612137e-06,
82
+ "loss": 0.6033,
83
+ "step": 35
84
+ },
85
+ {
86
+ "epoch": 4.0,
87
+ "eval_loss": 0.6937279105186462,
88
+ "eval_runtime": 3.3722,
89
+ "eval_samples_per_second": 4.448,
90
+ "eval_steps_per_second": 0.593,
91
+ "step": 36
92
+ },
93
+ {
94
+ "epoch": 4.470588235294118,
95
+ "grad_norm": 0.24069756269454956,
96
+ "learning_rate": 7.008477123264849e-06,
97
+ "loss": 0.6549,
98
+ "step": 40
99
+ },
100
+ {
101
+ "epoch": 5.0,
102
+ "grad_norm": 0.31259897351264954,
103
+ "learning_rate": 6.227427435703997e-06,
104
+ "loss": 0.694,
105
+ "step": 45
106
+ },
107
+ {
108
+ "epoch": 5.0,
109
+ "eval_loss": 0.6824610829353333,
110
+ "eval_runtime": 3.3719,
111
+ "eval_samples_per_second": 4.449,
112
+ "eval_steps_per_second": 0.593,
113
+ "step": 45
114
+ },
115
+ {
116
+ "epoch": 5.588235294117647,
117
+ "grad_norm": 0.25407281517982483,
118
+ "learning_rate": 5.412896727361663e-06,
119
+ "loss": 0.6865,
120
+ "step": 50
121
+ },
122
+ {
123
+ "epoch": 6.0,
124
+ "eval_loss": 0.6763660311698914,
125
+ "eval_runtime": 3.3717,
126
+ "eval_samples_per_second": 4.449,
127
+ "eval_steps_per_second": 0.593,
128
+ "step": 54
129
+ },
130
+ {
131
+ "epoch": 6.117647058823529,
132
+ "grad_norm": 0.2767919600009918,
133
+ "learning_rate": 4.587103272638339e-06,
134
+ "loss": 0.6081,
135
+ "step": 55
136
+ },
137
+ {
138
+ "epoch": 6.705882352941177,
139
+ "grad_norm": 0.24282197654247284,
140
+ "learning_rate": 3.7725725642960047e-06,
141
+ "loss": 0.6577,
142
+ "step": 60
143
+ },
144
+ {
145
+ "epoch": 7.0,
146
+ "eval_loss": 0.6713435649871826,
147
+ "eval_runtime": 3.3715,
148
+ "eval_samples_per_second": 4.449,
149
+ "eval_steps_per_second": 0.593,
150
+ "step": 63
151
+ },
152
+ {
153
+ "epoch": 7.235294117647059,
154
+ "grad_norm": 0.1744387447834015,
155
+ "learning_rate": 2.991522876735154e-06,
156
+ "loss": 0.5941,
157
+ "step": 65
158
+ },
159
+ {
160
+ "epoch": 7.823529411764706,
161
+ "grad_norm": 0.20212271809577942,
162
+ "learning_rate": 2.265259209387867e-06,
163
+ "loss": 0.6509,
164
+ "step": 70
165
+ },
166
+ {
167
+ "epoch": 8.0,
168
+ "eval_loss": 0.6677358150482178,
169
+ "eval_runtime": 3.3723,
170
+ "eval_samples_per_second": 4.448,
171
+ "eval_steps_per_second": 0.593,
172
+ "step": 72
173
+ },
174
+ {
175
+ "epoch": 8.352941176470589,
176
+ "grad_norm": 0.16120634973049164,
177
+ "learning_rate": 1.6135921418712959e-06,
178
+ "loss": 0.5923,
179
+ "step": 75
180
+ },
181
+ {
182
+ "epoch": 8.941176470588236,
183
+ "grad_norm": 0.2318679690361023,
184
+ "learning_rate": 1.0542974530180327e-06,
185
+ "loss": 0.6438,
186
+ "step": 80
187
+ },
188
+ {
189
+ "epoch": 9.0,
190
+ "eval_loss": 0.6655252575874329,
191
+ "eval_runtime": 3.3713,
192
+ "eval_samples_per_second": 4.449,
193
+ "eval_steps_per_second": 0.593,
194
+ "step": 81
195
+ },
196
+ {
197
+ "epoch": 9.470588235294118,
198
+ "grad_norm": 0.19574101269245148,
199
+ "learning_rate": 6.026312439675553e-07,
200
+ "loss": 0.6359,
201
+ "step": 85
202
+ },
203
+ {
204
+ "epoch": 10.0,
205
+ "grad_norm": 0.22642117738723755,
206
+ "learning_rate": 2.7091379149682683e-07,
207
+ "loss": 0.5741,
208
+ "step": 90
209
+ },
210
+ {
211
+ "epoch": 10.0,
212
+ "eval_loss": 0.6652756929397583,
213
+ "eval_runtime": 3.3709,
214
+ "eval_samples_per_second": 4.45,
215
+ "eval_steps_per_second": 0.593,
216
+ "step": 90
217
+ },
218
+ {
219
+ "epoch": 10.588235294117647,
220
+ "grad_norm": 0.2666153013706207,
221
+ "learning_rate": 6.819348298638839e-08,
222
+ "loss": 0.6734,
223
+ "step": 95
224
+ },
225
+ {
226
+ "epoch": 11.0,
227
+ "eval_loss": 0.6646606922149658,
228
+ "eval_runtime": 3.3717,
229
+ "eval_samples_per_second": 4.449,
230
+ "eval_steps_per_second": 0.593,
231
+ "step": 99
232
  }
233
  ],
234
  "logging_steps": 5,
 
248
  "attributes": {}
249
  }
250
  },
251
+ "total_flos": 5.180655973758566e+16,
252
  "train_batch_size": 2,
253
  "trial_name": null,
254
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d8619b2675a3aadf91623d77096e075400d78b3a6660267fb11439bce8f9586
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02dca0ceed349b196a13d4f3de83bdc1d637c3f4a599aaf1cc66d5744a87d6c3
3
  size 5624