Ubuntu commited on
Commit
cbdbedb
·
1 Parent(s): 97554c7
Files changed (2) hide show
  1. pytorch_model.bin +1 -1
  2. trainer_state.json +99 -3
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2c13d1f6aca9d6c804c0bc9298ed055733e600a46b1bdca00020324eba63c6a
3
  size 24673403925
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:015647113cba9eeeeb86480d56b6cad9c3aa139fa1fcc711038a96c643d2f922
3
  size 24673403925
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0411962160512664,
5
- "global_step": 540,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -228,11 +228,107 @@
228
  "eval_samples_per_second": 3.268,
229
  "eval_steps_per_second": 0.418,
230
  "step": 540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  }
232
  ],
233
  "max_steps": 13108,
234
  "num_train_epochs": 1,
235
- "total_flos": 7509129560064.0,
236
  "trial_name": null,
237
  "trial_params": null
238
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0595056454073848,
5
+ "global_step": 780,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
228
  "eval_samples_per_second": 3.268,
229
  "eval_steps_per_second": 0.418,
230
  "step": 540
231
+ },
232
+ {
233
+ "epoch": 0.04,
234
+ "eval_loss": 2.2439846992492676,
235
+ "eval_runtime": 95.3723,
236
+ "eval_samples_per_second": 3.282,
237
+ "eval_steps_per_second": 0.419,
238
+ "step": 560
239
+ },
240
+ {
241
+ "epoch": 0.04,
242
+ "eval_loss": 2.22643780708313,
243
+ "eval_runtime": 95.2142,
244
+ "eval_samples_per_second": 3.287,
245
+ "eval_steps_per_second": 0.42,
246
+ "step": 580
247
+ },
248
+ {
249
+ "epoch": 0.05,
250
+ "eval_loss": 2.2366464138031006,
251
+ "eval_runtime": 94.7461,
252
+ "eval_samples_per_second": 3.304,
253
+ "eval_steps_per_second": 0.422,
254
+ "step": 600
255
+ },
256
+ {
257
+ "epoch": 0.05,
258
+ "eval_loss": 2.2283596992492676,
259
+ "eval_runtime": 94.7042,
260
+ "eval_samples_per_second": 3.305,
261
+ "eval_steps_per_second": 0.422,
262
+ "step": 620
263
+ },
264
+ {
265
+ "epoch": 0.05,
266
+ "eval_loss": 2.228659152984619,
267
+ "eval_runtime": 95.4845,
268
+ "eval_samples_per_second": 3.278,
269
+ "eval_steps_per_second": 0.419,
270
+ "step": 640
271
+ },
272
+ {
273
+ "epoch": 0.05,
274
+ "eval_loss": 2.2173023223876953,
275
+ "eval_runtime": 96.1967,
276
+ "eval_samples_per_second": 3.254,
277
+ "eval_steps_per_second": 0.416,
278
+ "step": 660
279
+ },
280
+ {
281
+ "epoch": 0.05,
282
+ "eval_loss": 2.209789276123047,
283
+ "eval_runtime": 95.9623,
284
+ "eval_samples_per_second": 3.262,
285
+ "eval_steps_per_second": 0.417,
286
+ "step": 680
287
+ },
288
+ {
289
+ "epoch": 0.05,
290
+ "eval_loss": 2.2206969261169434,
291
+ "eval_runtime": 96.1394,
292
+ "eval_samples_per_second": 3.256,
293
+ "eval_steps_per_second": 0.416,
294
+ "step": 700
295
+ },
296
+ {
297
+ "epoch": 0.05,
298
+ "eval_loss": 2.2181010246276855,
299
+ "eval_runtime": 95.7693,
300
+ "eval_samples_per_second": 3.268,
301
+ "eval_steps_per_second": 0.418,
302
+ "step": 720
303
+ },
304
+ {
305
+ "epoch": 0.06,
306
+ "eval_loss": 2.202101707458496,
307
+ "eval_runtime": 94.766,
308
+ "eval_samples_per_second": 3.303,
309
+ "eval_steps_per_second": 0.422,
310
+ "step": 740
311
+ },
312
+ {
313
+ "epoch": 0.06,
314
+ "eval_loss": 2.196211099624634,
315
+ "eval_runtime": 96.1177,
316
+ "eval_samples_per_second": 3.256,
317
+ "eval_steps_per_second": 0.416,
318
+ "step": 760
319
+ },
320
+ {
321
+ "epoch": 0.06,
322
+ "eval_loss": 2.19002103805542,
323
+ "eval_runtime": 95.6171,
324
+ "eval_samples_per_second": 3.273,
325
+ "eval_steps_per_second": 0.418,
326
+ "step": 780
327
  }
328
  ],
329
  "max_steps": 13108,
330
  "num_train_epochs": 1,
331
+ "total_flos": 11051923931136.0,
332
  "trial_name": null,
333
  "trial_params": null
334
  }