bhuvanmdev commited on
Commit
c5608a6
·
verified ·
1 Parent(s): a4e6abe

Training in progress, step 1980, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:736ce521e7280bd2c0975bc603cc94348d6eccc9990efd864828bf986063810d
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e0f85bcaec40105033574eeb2ea2d00ffa4f19c4827469d435c8d983135ba9
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5d973aa7ba038a74b7db64dcfe60c4781913554719fcda63eaae556dc3e290b
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb367bf17d757f09f15d71ca1434661c0b521cf6dcc22f86c9a6a04cb5338c65
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f02d6e86658ff6b5c2087c9efdb8f98bc5474610047eb47a7fd80ecf387cb826
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2472623869c21ab6aa274a60ff7028939f11ff92530a722bec38d4e2fae101e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07fafb27edca5562e76184af344027e6c5895e3f1fc53905ab247cd738263f94
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12668937a663fe1d51e58a4a78fe6002afedcdaa7ad142ebb2d8568de4c073cc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7012522361359571,
5
  "eval_steps": 500,
6
- "global_step": 1960,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1575,14 +1575,30 @@
1575
  "loss": 0.3683,
1576
  "num_input_tokens_seen": 1326200,
1577
  "step": 1960
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1578
  }
1579
  ],
1580
  "logging_steps": 10,
1581
  "max_steps": 2795,
1582
- "num_input_tokens_seen": 1326200,
1583
  "num_train_epochs": 1,
1584
  "save_steps": 20,
1585
- "total_flos": 2.98215546679296e+16,
1586
  "train_batch_size": 1,
1587
  "trial_name": null,
1588
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7084078711985689,
5
  "eval_steps": 500,
6
+ "global_step": 1980,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1575
  "loss": 0.3683,
1576
  "num_input_tokens_seen": 1326200,
1577
  "step": 1960
1578
+ },
1579
+ {
1580
+ "epoch": 0.7048300536672629,
1581
+ "grad_norm": 0.3898317217826843,
1582
+ "learning_rate": 5.9033989266547405e-05,
1583
+ "loss": 0.4181,
1584
+ "num_input_tokens_seen": 1333316,
1585
+ "step": 1970
1586
+ },
1587
+ {
1588
+ "epoch": 0.7084078711985689,
1589
+ "grad_norm": 0.2924859821796417,
1590
+ "learning_rate": 5.831842576028623e-05,
1591
+ "loss": 0.3828,
1592
+ "num_input_tokens_seen": 1339225,
1593
+ "step": 1980
1594
  }
1595
  ],
1596
  "logging_steps": 10,
1597
  "max_steps": 2795,
1598
+ "num_input_tokens_seen": 1339225,
1599
  "num_train_epochs": 1,
1600
  "save_steps": 20,
1601
+ "total_flos": 3.01144409215488e+16,
1602
  "train_batch_size": 1,
1603
  "trial_name": null,
1604
  "trial_params": null