apriasmoro commited on
Commit
f1a5340
·
verified ·
1 Parent(s): 8fa6863

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06b103d26d07c4ed42fbee8ace2fe6573cc33fa4640fc5ac8cdb9f08c6422c37
3
  size 349243752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2634d7d437e55d120f9cbfc1a0a647ba5e43707198ec229840f3cbd4c2c6010d
3
  size 349243752
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c971da1af950d6b26f168fac122add84f133c26a319b4d172666d21bb888d4e9
3
  size 177909253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77d6a6963848ad083ec9f75340262560feabd272bc333bcca7686444ebcc3703
3
  size 177909253
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68059c8080c3be6efde532e424e65c8e3d05b6e6e4a6bc5308804f229fa094d9
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb0ee16dc80f0a35b0d265d0738a01e60e4a6bc16d147140edaf591ee6a31fd0
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cfb0202afd5938a45bdfdc38b8ac97c84ae18383bac3b871da4549cec70e22e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:326d14f5288c1d48dfb46fecae7e18839fcfd032bd3fac00e22ed4ca25616087
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.4975124378109453,
6
  "eval_steps": 500,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -358,6 +358,76 @@
358
  "learning_rate": 1.196412859476037e-05,
359
  "loss": 1.4087,
360
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  }
362
  ],
363
  "logging_steps": 10,
@@ -377,7 +447,7 @@
377
  "attributes": {}
378
  }
379
  },
380
- "total_flos": 3.76010719956566e+17,
381
  "train_batch_size": 24,
382
  "trial_name": null,
383
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5970149253731343,
6
  "eval_steps": 500,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
358
  "learning_rate": 1.196412859476037e-05,
359
  "loss": 1.4087,
360
  "step": 500
361
+ },
362
+ {
363
+ "epoch": 0.5074626865671642,
364
+ "grad_norm": 0.15541517734527588,
365
+ "learning_rate": 1.1961691644685907e-05,
366
+ "loss": 1.3638,
367
+ "step": 510
368
+ },
369
+ {
370
+ "epoch": 0.5174129353233831,
371
+ "grad_norm": 0.14922229945659637,
372
+ "learning_rate": 1.195917487915494e-05,
373
+ "loss": 1.4186,
374
+ "step": 520
375
+ },
376
+ {
377
+ "epoch": 0.527363184079602,
378
+ "grad_norm": 0.1509064882993698,
379
+ "learning_rate": 1.1956578331862066e-05,
380
+ "loss": 1.3244,
381
+ "step": 530
382
+ },
383
+ {
384
+ "epoch": 0.5373134328358209,
385
+ "grad_norm": 0.15153075754642487,
386
+ "learning_rate": 1.1953902037570002e-05,
387
+ "loss": 1.3692,
388
+ "step": 540
389
+ },
390
+ {
391
+ "epoch": 0.5472636815920398,
392
+ "grad_norm": 0.18627804517745972,
393
+ "learning_rate": 1.1951146032109126e-05,
394
+ "loss": 1.3707,
395
+ "step": 550
396
+ },
397
+ {
398
+ "epoch": 0.5572139303482587,
399
+ "grad_norm": 0.1417003720998764,
400
+ "learning_rate": 1.1948310352376988e-05,
401
+ "loss": 1.3976,
402
+ "step": 560
403
+ },
404
+ {
405
+ "epoch": 0.5671641791044776,
406
+ "grad_norm": 0.14316676557064056,
407
+ "learning_rate": 1.1945395036337829e-05,
408
+ "loss": 1.4397,
409
+ "step": 570
410
+ },
411
+ {
412
+ "epoch": 0.5771144278606966,
413
+ "grad_norm": 0.11908440291881561,
414
+ "learning_rate": 1.1942400123022057e-05,
415
+ "loss": 1.3614,
416
+ "step": 580
417
+ },
418
+ {
419
+ "epoch": 0.5870646766169154,
420
+ "grad_norm": 0.15239761769771576,
421
+ "learning_rate": 1.1939325652525737e-05,
422
+ "loss": 1.4346,
423
+ "step": 590
424
+ },
425
+ {
426
+ "epoch": 0.5970149253731343,
427
+ "grad_norm": 0.14442642033100128,
428
+ "learning_rate": 1.193617166601005e-05,
429
+ "loss": 1.3687,
430
+ "step": 600
431
  }
432
  ],
433
  "logging_steps": 10,
 
447
  "attributes": {}
448
  }
449
  },
450
+ "total_flos": 4.5084597833170944e+17,
451
  "train_batch_size": 24,
452
  "trial_name": null,
453
  "trial_params": null