SystemAdmin123 commited on
Commit
015871b
·
verified ·
1 Parent(s): 6524679

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f5dd95401fc3b99c31feb93554bc64e099ff8726b9ae2b76642755b56a78788
3
  size 4972163696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4830c7dfae77af1baf72bf90cd6854788975df823f1b1ec49037d3331f4b713
3
  size 4972163696
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de9da9da4489bbbed52369b08304f129f60af591f846bd1c657c6a76a786dab
3
  size 2669366920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:922d604842e24363921eaa52716294005a05f1ccdec8b6716513c45ad8592ea7
3
  size 2669366920
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2508c5face8bf2392945c63d45c7f260daaaf486f9e1c94da0dd42bd621a7a58
3
  size 7762295162
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa9debffe532ffb09a3d501e8e6ea700bc44ef490a68be3084838733c4fb61d2
3
  size 7762295162
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd0b45e6fd019f23318ffd2d46ef8cf6d2a160038f49f06fc17960b67863906f
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02b8d06323d7782eba280757f4398dbd280b072f40187b91d87fe4f49d7e6828
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e71e2096e47d5825ef2ce323a1cf303b37363e73043bd284a3ba35f73c6da6e
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fe37d9f1cbca235c339a9316b5b88784b3b45f0c10ef4f8070517c1de6878f5
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:220a99772b3d8f35e3e108e580fbc19089c4a43d7f11750324b648fbd4e2c7d7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f07ce69a2abeb161a8250a6ab5003f92f86de92b90dabd232da31b8c613fe6b7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4150943396226414,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,49 @@
144
  "eval_samples_per_second": 38.266,
145
  "eval_steps_per_second": 4.793,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -163,7 +206,7 @@
163
  "attributes": {}
164
  }
165
  },
166
- "total_flos": 5.484351812376986e+16,
167
  "train_batch_size": 4,
168
  "trial_name": null,
169
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.8867924528301887,
5
  "eval_steps": 50,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 38.266,
145
  "eval_steps_per_second": 4.793,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 1.509433962264151,
150
+ "grad_norm": 1.234375,
151
+ "learning_rate": 0.0001970941817426052,
152
+ "loss": 1.1987,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 1.6037735849056602,
157
+ "grad_norm": 1.1328125,
158
+ "learning_rate": 0.00019645406355025565,
159
+ "loss": 1.1701,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 1.6981132075471699,
164
+ "grad_norm": 1.1171875,
165
+ "learning_rate": 0.0001957515340994193,
166
+ "loss": 1.1686,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 1.7924528301886793,
171
+ "grad_norm": 1.1796875,
172
+ "learning_rate": 0.00019498704796656018,
173
+ "loss": 1.1835,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 1.8867924528301887,
178
+ "grad_norm": 1.171875,
179
+ "learning_rate": 0.00019416109981763526,
180
+ "loss": 1.4218,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 1.8867924528301887,
185
+ "eval_loss": 1.8310290575027466,
186
+ "eval_runtime": 38.9304,
187
+ "eval_samples_per_second": 38.556,
188
+ "eval_steps_per_second": 4.829,
189
+ "step": 200
190
  }
191
  ],
192
  "logging_steps": 10,
 
206
  "attributes": {}
207
  }
208
  },
209
+ "total_flos": 7.313993787611546e+16,
210
  "train_batch_size": 4,
211
  "trial_name": null,
212
  "trial_params": null