alicegoesdown commited on
Commit
6ad03a3
·
verified ·
1 Parent(s): 61e2c1b

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:846434ca50ba2a07c1aa914ac92335cf2e993e47f0a4fb98035e3767df707187
3
  size 144748392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb06faffef34c41555fea1d5089780e433d69d42990a3d68280d81f3534dd8d8
3
  size 144748392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76bd4bbf6bad97827cd2d1c96918889601fe5549f9ddbaa27e3833ca7ecf5226
3
- size 289690498
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3349182d6b3c5f7af8836559bfe4beb9f252d5cdb2f1fc30236f4044b83635cf
3
+ size 289690562
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bcf421949ef5e2cf610a12a564035f1eafe2c2459ad36aec2693cef4a5645fc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aedac496cb43fdf15db405755e31a29c804a8eb29ca1acc7f2849e933d85bfa3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2a61a0595dd1862605bddb150b0c4ebd6b684b46d33bd5e4926bf5e77255160
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef8d2900de30e6031eb67496f65d84b5428252e5a56573254c12f627baa587a8
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.0569764375686646,
3
- "best_model_checkpoint": "./output/checkpoint-300",
4
- "epoch": 0.019120458891013385,
5
  "eval_steps": 150,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -233,6 +233,119 @@
233
  "eval_samples_per_second": 12.034,
234
  "eval_steps_per_second": 12.034,
235
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  }
237
  ],
238
  "logging_steps": 10,
@@ -252,7 +365,7 @@
252
  "attributes": {}
253
  }
254
  },
255
- "total_flos": 8380319636520960.0,
256
  "train_batch_size": 32,
257
  "trial_name": null,
258
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.032205581665039,
3
+ "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 0.028680688336520075,
5
  "eval_steps": 150,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
233
  "eval_samples_per_second": 12.034,
234
  "eval_steps_per_second": 12.034,
235
  "step": 300
236
+ },
237
+ {
238
+ "epoch": 0.019757807520713832,
239
+ "grad_norm": 0.7070486545562744,
240
+ "learning_rate": 0.00012443436011049593,
241
+ "loss": 1.0582,
242
+ "step": 310
243
+ },
244
+ {
245
+ "epoch": 0.020395156150414276,
246
+ "grad_norm": 0.6703718304634094,
247
+ "learning_rate": 0.0001243792984780751,
248
+ "loss": 1.017,
249
+ "step": 320
250
+ },
251
+ {
252
+ "epoch": 0.021032504780114723,
253
+ "grad_norm": 0.7044450044631958,
254
+ "learning_rate": 0.00012432169323005853,
255
+ "loss": 1.0458,
256
+ "step": 330
257
+ },
258
+ {
259
+ "epoch": 0.021669853409815167,
260
+ "grad_norm": 0.6337553858757019,
261
+ "learning_rate": 0.00012426154673437223,
262
+ "loss": 1.0401,
263
+ "step": 340
264
+ },
265
+ {
266
+ "epoch": 0.022307202039515615,
267
+ "grad_norm": 0.6895059943199158,
268
+ "learning_rate": 0.00012419886146340314,
269
+ "loss": 1.0332,
270
+ "step": 350
271
+ },
272
+ {
273
+ "epoch": 0.022944550669216062,
274
+ "grad_norm": 0.6908580660820007,
275
+ "learning_rate": 0.0001241336399938972,
276
+ "loss": 1.0295,
277
+ "step": 360
278
+ },
279
+ {
280
+ "epoch": 0.023581899298916506,
281
+ "grad_norm": 0.6715300679206848,
282
+ "learning_rate": 0.00012406588500685355,
283
+ "loss": 1.0342,
284
+ "step": 370
285
+ },
286
+ {
287
+ "epoch": 0.024219247928616953,
288
+ "grad_norm": 0.6342328190803528,
289
+ "learning_rate": 0.00012399559928741435,
290
+ "loss": 1.0214,
291
+ "step": 380
292
+ },
293
+ {
294
+ "epoch": 0.0248565965583174,
295
+ "grad_norm": 0.6497303247451782,
296
+ "learning_rate": 0.00012392278572475023,
297
+ "loss": 1.0498,
298
+ "step": 390
299
+ },
300
+ {
301
+ "epoch": 0.025493945188017845,
302
+ "grad_norm": 0.6558589935302734,
303
+ "learning_rate": 0.0001238474473119416,
304
+ "loss": 1.0091,
305
+ "step": 400
306
+ },
307
+ {
308
+ "epoch": 0.026131293817718292,
309
+ "grad_norm": 0.6536452770233154,
310
+ "learning_rate": 0.00012376958714585545,
311
+ "loss": 1.051,
312
+ "step": 410
313
+ },
314
+ {
315
+ "epoch": 0.02676864244741874,
316
+ "grad_norm": 0.734528660774231,
317
+ "learning_rate": 0.0001236892084270183,
318
+ "loss": 1.0173,
319
+ "step": 420
320
+ },
321
+ {
322
+ "epoch": 0.027405991077119184,
323
+ "grad_norm": 0.6470915675163269,
324
+ "learning_rate": 0.00012360631445948448,
325
+ "loss": 1.0331,
326
+ "step": 430
327
+ },
328
+ {
329
+ "epoch": 0.02804333970681963,
330
+ "grad_norm": 0.6855731010437012,
331
+ "learning_rate": 0.00012352090865070026,
332
+ "loss": 1.0086,
333
+ "step": 440
334
+ },
335
+ {
336
+ "epoch": 0.028680688336520075,
337
+ "grad_norm": 0.6433871388435364,
338
+ "learning_rate": 0.00012343299451136397,
339
+ "loss": 1.0321,
340
+ "step": 450
341
+ },
342
+ {
343
+ "epoch": 0.028680688336520075,
344
+ "eval_loss": 1.032205581665039,
345
+ "eval_runtime": 41.0788,
346
+ "eval_samples_per_second": 12.172,
347
+ "eval_steps_per_second": 12.172,
348
+ "step": 450
349
  }
350
  ],
351
  "logging_steps": 10,
 
365
  "attributes": {}
366
  }
367
  },
368
+ "total_flos": 1.255704084086784e+16,
369
  "train_batch_size": 32,
370
  "trial_name": null,
371
  "trial_params": null