adammandic87 commited on
Commit
d6897e9
·
verified ·
1 Parent(s): 362fa39

Training in progress, step 38, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f3409b0f5efa7f40450782feb6a123591f249c238e1879c652d6d40f3459319
3
  size 50899792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40978b6045d435860bdc653f2c1ad8723d28aa9333e29555d66768df70c763bb
3
  size 50899792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3c18fce2e1841a394b8e913293aaac27b29b40846fa539e2acc0c900fb39e1c
3
  size 26231300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6d6739e3b80f0c91dc11e94b1399835f20bb5b0a93df5bd3138f9f3a59d2d13
3
  size 26231300
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f918e894c61d79473825b52a272cf41e854c27a6d9183f7c13da913c2b3b6227
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d606d31d447120bc1b4de5890ffaff6e62d8521d8976078b55323f24cb5690d3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbe7944e3134660b6c8767b8065ebe88cdbbd95d926d0c63b94c69623d39c56b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c4f0183aec085119f27cd46c60ab3f231930ae66c7ca01d0adff96b44d5e0e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.25249169435215946,
5
  "eval_steps": 19,
6
- "global_step": 19,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -156,6 +156,147 @@
156
  "eval_samples_per_second": 33.751,
157
  "eval_steps_per_second": 16.875,
158
  "step": 19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  }
160
  ],
161
  "logging_steps": 1,
@@ -175,7 +316,7 @@
175
  "attributes": {}
176
  }
177
  },
178
- "total_flos": 1599094896721920.0,
179
  "train_batch_size": 2,
180
  "trial_name": null,
181
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5049833887043189,
5
  "eval_steps": 19,
6
+ "global_step": 38,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
156
  "eval_samples_per_second": 33.751,
157
  "eval_steps_per_second": 16.875,
158
  "step": 19
159
+ },
160
+ {
161
+ "epoch": 0.26578073089701,
162
+ "grad_norm": 0.659614086151123,
163
+ "learning_rate": 0.00018888354486549237,
164
+ "loss": 0.7246,
165
+ "step": 20
166
+ },
167
+ {
168
+ "epoch": 0.27906976744186046,
169
+ "grad_norm": 0.4992068409919739,
170
+ "learning_rate": 0.00018660254037844388,
171
+ "loss": 0.5093,
172
+ "step": 21
173
+ },
174
+ {
175
+ "epoch": 0.292358803986711,
176
+ "grad_norm": 0.5019382238388062,
177
+ "learning_rate": 0.00018412535328311814,
178
+ "loss": 0.6098,
179
+ "step": 22
180
+ },
181
+ {
182
+ "epoch": 0.30564784053156147,
183
+ "grad_norm": 0.6148894429206848,
184
+ "learning_rate": 0.00018145759520503358,
185
+ "loss": 0.7219,
186
+ "step": 23
187
+ },
188
+ {
189
+ "epoch": 0.31893687707641194,
190
+ "grad_norm": 0.5921617150306702,
191
+ "learning_rate": 0.00017860530947427875,
192
+ "loss": 0.8486,
193
+ "step": 24
194
+ },
195
+ {
196
+ "epoch": 0.33222591362126247,
197
+ "grad_norm": 0.4521740674972534,
198
+ "learning_rate": 0.00017557495743542585,
199
+ "loss": 0.4504,
200
+ "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.34551495016611294,
204
+ "grad_norm": 0.5427228808403015,
205
+ "learning_rate": 0.00017237340381050703,
206
+ "loss": 0.5575,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.3588039867109635,
211
+ "grad_norm": 0.466899037361145,
212
+ "learning_rate": 0.00016900790114821122,
213
+ "loss": 0.7179,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.37209302325581395,
218
+ "grad_norm": 0.4334196448326111,
219
+ "learning_rate": 0.00016548607339452853,
220
+ "loss": 0.6399,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.3853820598006645,
225
+ "grad_norm": 0.39604687690734863,
226
+ "learning_rate": 0.00016181589862206052,
227
+ "loss": 0.4078,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.39867109634551495,
232
+ "grad_norm": 0.4122071862220764,
233
+ "learning_rate": 0.00015800569095711982,
234
+ "loss": 0.4967,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.4119601328903654,
239
+ "grad_norm": 0.473812997341156,
240
+ "learning_rate": 0.00015406408174555976,
241
+ "loss": 0.7088,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 0.42524916943521596,
246
+ "grad_norm": 0.5842433571815491,
247
+ "learning_rate": 0.00015000000000000001,
248
+ "loss": 0.7418,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.43853820598006643,
253
+ "grad_norm": 0.5841939449310303,
254
+ "learning_rate": 0.00014582265217274104,
255
+ "loss": 0.761,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 0.45182724252491696,
260
+ "grad_norm": 0.3879057765007019,
261
+ "learning_rate": 0.00014154150130018866,
262
+ "loss": 0.4902,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 0.46511627906976744,
267
+ "grad_norm": 0.6922730803489685,
268
+ "learning_rate": 0.00013716624556603274,
269
+ "loss": 0.7617,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 0.47840531561461797,
274
+ "grad_norm": 0.5831931233406067,
275
+ "learning_rate": 0.00013270679633174218,
276
+ "loss": 0.8443,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 0.49169435215946844,
281
+ "grad_norm": 0.5058356523513794,
282
+ "learning_rate": 0.00012817325568414297,
283
+ "loss": 0.6578,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 0.5049833887043189,
288
+ "grad_norm": 0.3720795512199402,
289
+ "learning_rate": 0.00012357589355094275,
290
+ "loss": 0.5019,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 0.5049833887043189,
295
+ "eval_loss": 0.690031886100769,
296
+ "eval_runtime": 0.9404,
297
+ "eval_samples_per_second": 34.029,
298
+ "eval_steps_per_second": 17.015,
299
+ "step": 38
300
  }
301
  ],
302
  "logging_steps": 1,
 
316
  "attributes": {}
317
  }
318
  },
319
+ "total_flos": 3157187360194560.0,
320
  "train_batch_size": 2,
321
  "trial_name": null,
322
  "trial_params": null