bhuvanmdev commited on
Commit
f85a02d
·
verified ·
1 Parent(s): a81c32a

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09e0f85bcaec40105033574eeb2ea2d00ffa4f19c4827469d435c8d983135ba9
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f86dd9371b781155ba643a548b4de3e51acb102f7bc37f5826e1d50947b2f0
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb367bf17d757f09f15d71ca1434661c0b521cf6dcc22f86c9a6a04cb5338c65
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04e3e81d18fadc24063e623e9d4cc0d12eb575ca5424a5a8f3ebf2648240568b
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2472623869c21ab6aa274a60ff7028939f11ff92530a722bec38d4e2fae101e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2191a9ab0ab41e50985488533d64dae12c867360e85f064297a87e2f978e6536
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12668937a663fe1d51e58a4a78fe6002afedcdaa7ad142ebb2d8568de4c073cc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16253ca7555b24e595f70e545999ce4ef3d7193309d16f3cd6b13443ca20da3e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7084078711985689,
5
  "eval_steps": 500,
6
- "global_step": 1980,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1591,14 +1591,30 @@
1591
  "loss": 0.3828,
1592
  "num_input_tokens_seen": 1339225,
1593
  "step": 1980
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1594
  }
1595
  ],
1596
  "logging_steps": 10,
1597
  "max_steps": 2795,
1598
- "num_input_tokens_seen": 1339225,
1599
  "num_train_epochs": 1,
1600
  "save_steps": 20,
1601
- "total_flos": 3.01144409215488e+16,
1602
  "train_batch_size": 1,
1603
  "trial_name": null,
1604
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7155635062611807,
5
  "eval_steps": 500,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1591
  "loss": 0.3828,
1592
  "num_input_tokens_seen": 1339225,
1593
  "step": 1980
1594
+ },
1595
+ {
1596
+ "epoch": 0.7119856887298748,
1597
+ "grad_norm": 0.3090393543243408,
1598
+ "learning_rate": 5.7602862254025045e-05,
1599
+ "loss": 0.3932,
1600
+ "num_input_tokens_seen": 1346104,
1601
+ "step": 1990
1602
+ },
1603
+ {
1604
+ "epoch": 0.7155635062611807,
1605
+ "grad_norm": 0.4194253981113434,
1606
+ "learning_rate": 5.6887298747763865e-05,
1607
+ "loss": 0.4041,
1608
+ "num_input_tokens_seen": 1352944,
1609
+ "step": 2000
1610
  }
1611
  ],
1612
  "logging_steps": 10,
1613
  "max_steps": 2795,
1614
+ "num_input_tokens_seen": 1352944,
1615
  "num_train_epochs": 1,
1616
  "save_steps": 20,
1617
+ "total_flos": 3.042293278438195e+16,
1618
  "train_batch_size": 1,
1619
  "trial_name": null,
1620
  "trial_params": null