sandernotenbaert commited on
Commit
16d608c
·
verified ·
1 Parent(s): d5f6ad3

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f95c23974ed9109b85d1dad46b75fc294d57305fa7210035c0b8f9d6d02471c8
3
  size 83393928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:553847da08f4d99da7d19a6b3de5c0bfaddfb192c11023e43e98976448986428
3
  size 83393928
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73ca5659e648d8d2948c2a5236ec358ca29f5a01082e4ad9696138a21a331db0
3
  size 166834490
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:815f38c8915ea63b4c636ccf0bb20219fa79e6d05a12c0e52cd97f9f17abe0a8
3
  size 166834490
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fab47d19446cd54935782c0ac0c7342531b306e31812fb709d49481db05d461
3
  size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e784b60e22cd0340bb430f54e442a2113831330be18f972ee8b8be0beda9874d
3
  size 13990
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff6c50a5de085f119dc1e92af1ad0ce825f5b55602192739818f51a65a78e8d6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b297641bdf7639a42fd0ab19dc974295f49e08948c929302cee801e3646dd894
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0094758886014536,
6
  "eval_steps": 500,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -51,6 +51,49 @@
51
  "eval_samples_per_second": 7.608,
52
  "eval_steps_per_second": 3.804,
53
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  }
55
  ],
56
  "logging_steps": 100,
@@ -70,7 +113,7 @@
70
  "attributes": {}
71
  }
72
  },
73
- "total_flos": 512075305648128.0,
74
  "train_batch_size": 2,
75
  "trial_name": null,
76
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0189517772029072,
6
  "eval_steps": 500,
7
+ "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
51
  "eval_samples_per_second": 7.608,
52
  "eval_steps_per_second": 3.804,
53
  "step": 500
54
+ },
55
+ {
56
+ "epoch": 0.011371066321744321,
57
+ "grad_norm": 1.2209495306015015,
58
+ "learning_rate": 4.981079988376354e-05,
59
+ "loss": 1.0478,
60
+ "step": 600
61
+ },
62
+ {
63
+ "epoch": 0.013266244042035042,
64
+ "grad_norm": 1.0413841009140015,
65
+ "learning_rate": 4.9779213887730734e-05,
66
+ "loss": 1.0083,
67
+ "step": 700
68
+ },
69
+ {
70
+ "epoch": 0.015161421762325763,
71
+ "grad_norm": 1.0854493379592896,
72
+ "learning_rate": 4.974762789169794e-05,
73
+ "loss": 0.9881,
74
+ "step": 800
75
+ },
76
+ {
77
+ "epoch": 0.017056599482616484,
78
+ "grad_norm": 0.9769160747528076,
79
+ "learning_rate": 4.9716041895665136e-05,
80
+ "loss": 0.9694,
81
+ "step": 900
82
+ },
83
+ {
84
+ "epoch": 0.0189517772029072,
85
+ "grad_norm": 1.0413130521774292,
86
+ "learning_rate": 4.9684455899632343e-05,
87
+ "loss": 0.9495,
88
+ "step": 1000
89
+ },
90
+ {
91
+ "epoch": 0.0189517772029072,
92
+ "eval_loss": 0.9161216616630554,
93
+ "eval_runtime": 8628.0851,
94
+ "eval_samples_per_second": 10.872,
95
+ "eval_steps_per_second": 5.436,
96
+ "step": 1000
97
  }
98
  ],
99
  "logging_steps": 100,
 
113
  "attributes": {}
114
  }
115
  },
116
+ "total_flos": 1026720523419648.0,
117
  "train_batch_size": 2,
118
  "trial_name": null,
119
  "trial_params": null