PLB commited on
Commit
dfbd5f2
·
verified ·
1 Parent(s): 6de7d76

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +88 -4
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.246376811594203,
5
  "eval_steps": 500,
6
- "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2457,6 +2457,90 @@
2457
  "learning_rate": 6.298747245393699e-07,
2458
  "loss": 0.0155,
2459
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2460
  }
2461
  ],
2462
  "logging_steps": 10,
@@ -2471,12 +2555,12 @@
2471
  "should_evaluate": false,
2472
  "should_log": false,
2473
  "should_save": true,
2474
- "should_training_stop": false
2475
  },
2476
  "attributes": {}
2477
  }
2478
  },
2479
- "total_flos": 1.269351656215514e+17,
2480
  "train_batch_size": 16,
2481
  "trial_name": null,
2482
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.50103519668737,
5
  "eval_steps": 500,
6
+ "global_step": 3623,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2457
  "learning_rate": 6.298747245393699e-07,
2458
  "loss": 0.0155,
2459
  "step": 3500
2460
+ },
2461
+ {
2462
+ "epoch": 7.267080745341615,
2463
+ "grad_norm": 0.15611431002616882,
2464
+ "learning_rate": 5.317066030354534e-07,
2465
+ "loss": 0.0213,
2466
+ "step": 3510
2467
+ },
2468
+ {
2469
+ "epoch": 7.287784679089027,
2470
+ "grad_norm": 0.24354061484336853,
2471
+ "learning_rate": 4.418295795629246e-07,
2472
+ "loss": 0.02,
2473
+ "step": 3520
2474
+ },
2475
+ {
2476
+ "epoch": 7.308488612836439,
2477
+ "grad_norm": 0.2028241902589798,
2478
+ "learning_rate": 3.6025114574734785e-07,
2479
+ "loss": 0.0124,
2480
+ "step": 3530
2481
+ },
2482
+ {
2483
+ "epoch": 7.329192546583851,
2484
+ "grad_norm": 0.2744869887828827,
2485
+ "learning_rate": 2.8697810149219686e-07,
2486
+ "loss": 0.0226,
2487
+ "step": 3540
2488
+ },
2489
+ {
2490
+ "epoch": 7.349896480331263,
2491
+ "grad_norm": 0.24214069545269012,
2492
+ "learning_rate": 2.2201655441199676e-07,
2493
+ "loss": 0.0158,
2494
+ "step": 3550
2495
+ },
2496
+ {
2497
+ "epoch": 7.370600414078675,
2498
+ "grad_norm": 0.1712854951620102,
2499
+ "learning_rate": 1.653719193232206e-07,
2500
+ "loss": 0.0161,
2501
+ "step": 3560
2502
+ },
2503
+ {
2504
+ "epoch": 7.391304347826087,
2505
+ "grad_norm": 0.22905586659908295,
2506
+ "learning_rate": 1.1704891779298344e-07,
2507
+ "loss": 0.0248,
2508
+ "step": 3570
2509
+ },
2510
+ {
2511
+ "epoch": 7.412008281573499,
2512
+ "grad_norm": 0.22215716540813446,
2513
+ "learning_rate": 7.705157774545724e-08,
2514
+ "loss": 0.0202,
2515
+ "step": 3580
2516
+ },
2517
+ {
2518
+ "epoch": 7.432712215320911,
2519
+ "grad_norm": 0.41476190090179443,
2520
+ "learning_rate": 4.538323312610615e-08,
2521
+ "loss": 0.0217,
2522
+ "step": 3590
2523
+ },
2524
+ {
2525
+ "epoch": 7.453416149068323,
2526
+ "grad_norm": 0.13812510669231415,
2527
+ "learning_rate": 2.2046523623853178e-08,
2528
+ "loss": 0.0169,
2529
+ "step": 3600
2530
+ },
2531
+ {
2532
+ "epoch": 7.474120082815735,
2533
+ "grad_norm": 0.2017739713191986,
2534
+ "learning_rate": 7.043394450956254e-09,
2535
+ "loss": 0.0178,
2536
+ "step": 3610
2537
+ },
2538
+ {
2539
+ "epoch": 7.494824016563147,
2540
+ "grad_norm": 0.2699184715747833,
2541
+ "learning_rate": 3.7509618097120167e-10,
2542
+ "loss": 0.0182,
2543
+ "step": 3620
2544
  }
2545
  ],
2546
  "logging_steps": 10,
 
2555
  "should_evaluate": false,
2556
  "should_log": false,
2557
  "should_save": true,
2558
+ "should_training_stop": true
2559
  },
2560
  "attributes": {}
2561
  }
2562
  },
2563
+ "total_flos": 1.3139658769115318e+17,
2564
  "train_batch_size": 16,
2565
  "trial_name": null,
2566
  "trial_params": null