CocoRoF commited on
Commit
7c2b743
·
verified ·
1 Parent(s): fb2acab

Training in progress, step 15000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a650c90512a42dd4683e8a973d7b2a127d961133cf781e96a2f0bedbf19c86ff
3
  size 962707376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb2c6c1cadce7ce52b13a926bfbafa64e446b86e5b4b7ec3eba9dfd5cad410f6
3
  size 962707376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9968735e9e258ea0ca015e23b0b8b0c42c3599f8a7328f83e1fd380c91894e9b
3
  size 61870586
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b855713affff09aec742fce2d37ecba061f0936159fdaf7e21c3bd0641619255
3
  size 61870586
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70a8bad0a644766abc3efe2e19d0f0855eb6b4e5b56ecabe95af68e9bc0f2d75
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e255daa5ad5f11069df3a999246b0ccdc09247bd43432e7fdcd3a70ab5d06348
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab7cf4dee40cabd63b41ea11b2d5474174351dcc28ca3f8824100288377df3d5
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fba2973ecba0f00ec98a89ba1c3d4441c0d63876f4e6205a3677f60062c512c
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bcb873a9c3fc793985c939b6c1983b22259cb938f5837ad9557e8686e8fb37e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:995a7178a2eebf115a7587c3e6c91ed487a1801369e20d9a59c790e542ab8b74
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7d663db8d609f40e7f1b8517b937f6ab700284f01dc91d7fd7acdf1e743e492
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8acf023b229f8625b4ebc78e4ac75d24c6c079231c7dd2b05ff59df4dcb92a3f
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff2eeebb1ff1345658711a7b947405ee1bda725b6384a44aad58e2e369e255d2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac3a306a2182134e3345739eaf8e920ee1bbaf2cd67b334a1c9dd75d75464588
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.457188218832016,
3
- "best_model_checkpoint": "/workspace/plateer_classifier_v0.1_result/checkpoint-10000",
4
- "epoch": 0.05856078096657497,
5
  "eval_steps": 5000,
6
- "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -305,6 +305,155 @@
305
  "eval_samples_per_second": 210.492,
306
  "eval_steps_per_second": 6.578,
307
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  }
309
  ],
310
  "logging_steps": 250,
@@ -324,7 +473,7 @@
324
  "attributes": {}
325
  }
326
  },
327
- "total_flos": 2.591444624862085e+18,
328
  "train_batch_size": 8,
329
  "trial_name": null,
330
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.4201970100402832,
3
+ "best_model_checkpoint": "/workspace/plateer_classifier_v0.1_result/checkpoint-15000",
4
+ "epoch": 0.08784117144986245,
5
  "eval_steps": 5000,
6
+ "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
305
  "eval_samples_per_second": 210.492,
306
  "eval_steps_per_second": 6.578,
307
  "step": 10000
308
+ },
309
+ {
310
+ "epoch": 0.06002480049073934,
311
+ "grad_norm": 13.643828392028809,
312
+ "learning_rate": 0.0001997014219778306,
313
+ "loss": 0.456,
314
+ "step": 10250
315
+ },
316
+ {
317
+ "epoch": 0.06148882001490372,
318
+ "grad_norm": 13.211404800415039,
319
+ "learning_rate": 0.00019939040320473745,
320
+ "loss": 0.4666,
321
+ "step": 10500
322
+ },
323
+ {
324
+ "epoch": 0.06295283953906809,
325
+ "grad_norm": 11.1001615524292,
326
+ "learning_rate": 0.00019907938443164432,
327
+ "loss": 0.4495,
328
+ "step": 10750
329
+ },
330
+ {
331
+ "epoch": 0.06441685906323247,
332
+ "grad_norm": 8.222249984741211,
333
+ "learning_rate": 0.00019876836565855117,
334
+ "loss": 0.4483,
335
+ "step": 11000
336
+ },
337
+ {
338
+ "epoch": 0.06588087858739684,
339
+ "grad_norm": 13.589752197265625,
340
+ "learning_rate": 0.0001984585909605504,
341
+ "loss": 0.4438,
342
+ "step": 11250
343
+ },
344
+ {
345
+ "epoch": 0.06734489811156122,
346
+ "grad_norm": 9.988068580627441,
347
+ "learning_rate": 0.00019814757218745724,
348
+ "loss": 0.447,
349
+ "step": 11500
350
+ },
351
+ {
352
+ "epoch": 0.0688089176357256,
353
+ "grad_norm": 8.311960220336914,
354
+ "learning_rate": 0.0001978365534143641,
355
+ "loss": 0.4476,
356
+ "step": 11750
357
+ },
358
+ {
359
+ "epoch": 0.07027293715988997,
360
+ "grad_norm": 8.099685668945312,
361
+ "learning_rate": 0.00019752553464127094,
362
+ "loss": 0.4477,
363
+ "step": 12000
364
+ },
365
+ {
366
+ "epoch": 0.07173695668405435,
367
+ "grad_norm": 8.23130989074707,
368
+ "learning_rate": 0.00019721451586817782,
369
+ "loss": 0.4385,
370
+ "step": 12250
371
+ },
372
+ {
373
+ "epoch": 0.07320097620821871,
374
+ "grad_norm": 10.875362396240234,
375
+ "learning_rate": 0.00019690349709508467,
376
+ "loss": 0.4345,
377
+ "step": 12500
378
+ },
379
+ {
380
+ "epoch": 0.07466499573238308,
381
+ "grad_norm": 9.479572296142578,
382
+ "learning_rate": 0.00019659247832199152,
383
+ "loss": 0.4345,
384
+ "step": 12750
385
+ },
386
+ {
387
+ "epoch": 0.07612901525654746,
388
+ "grad_norm": 11.883151054382324,
389
+ "learning_rate": 0.0001962814595488984,
390
+ "loss": 0.4241,
391
+ "step": 13000
392
+ },
393
+ {
394
+ "epoch": 0.07759303478071183,
395
+ "grad_norm": 8.15208911895752,
396
+ "learning_rate": 0.00019597044077580524,
397
+ "loss": 0.4335,
398
+ "step": 13250
399
+ },
400
+ {
401
+ "epoch": 0.07905705430487621,
402
+ "grad_norm": 9.323240280151367,
403
+ "learning_rate": 0.0001956594220027121,
404
+ "loss": 0.4396,
405
+ "step": 13500
406
+ },
407
+ {
408
+ "epoch": 0.08052107382904058,
409
+ "grad_norm": 7.250824928283691,
410
+ "learning_rate": 0.00019534840322961897,
411
+ "loss": 0.4376,
412
+ "step": 13750
413
+ },
414
+ {
415
+ "epoch": 0.08198509335320496,
416
+ "grad_norm": 12.220071792602539,
417
+ "learning_rate": 0.0001950373844565258,
418
+ "loss": 0.4323,
419
+ "step": 14000
420
+ },
421
+ {
422
+ "epoch": 0.08344911287736934,
423
+ "grad_norm": 8.460916519165039,
424
+ "learning_rate": 0.00019472636568343266,
425
+ "loss": 0.4271,
426
+ "step": 14250
427
+ },
428
+ {
429
+ "epoch": 0.08491313240153371,
430
+ "grad_norm": 6.110500812530518,
431
+ "learning_rate": 0.0001944153469103395,
432
+ "loss": 0.4253,
433
+ "step": 14500
434
+ },
435
+ {
436
+ "epoch": 0.08637715192569809,
437
+ "grad_norm": 10.618386268615723,
438
+ "learning_rate": 0.00019410432813724636,
439
+ "loss": 0.427,
440
+ "step": 14750
441
+ },
442
+ {
443
+ "epoch": 0.08784117144986245,
444
+ "grad_norm": 9.827556610107422,
445
+ "learning_rate": 0.00019379330936415324,
446
+ "loss": 0.4254,
447
+ "step": 15000
448
+ },
449
+ {
450
+ "epoch": 0.08784117144986245,
451
+ "eval_accuracy": 0.877075711565186,
452
+ "eval_loss": 0.4201970100402832,
453
+ "eval_runtime": 11537.2443,
454
+ "eval_samples_per_second": 210.503,
455
+ "eval_steps_per_second": 6.578,
456
+ "step": 15000
457
  }
458
  ],
459
  "logging_steps": 250,
 
473
  "attributes": {}
474
  }
475
  },
476
+ "total_flos": 3.8871669372931277e+18,
477
  "train_batch_size": 8,
478
  "trial_name": null,
479
  "trial_params": null