Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d13f91581e6c0113535614fd83d5478b864440320f3a1ede11fd41a90055f8d
 size 628216

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d679d225ca5e07bc96074f009eb38d0586a5368763bbb78f1c8bfae0e14a8bb
 size 628216

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:642475beb62f24b9c5d2beb8f3e4ba24ba1ef669c5507ef1ed018b2ccee6ed59
 size 1273162

 version https://git-lfs.github.com/spec/v1
+oid sha256:11e09ac261eeddadc9997bc627fd1ed1e786842110bc5ca577418ab8940db125
 size 1273162

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5457bb750bdf63b3180d04d62516b10f9c9e2cad261ef8019b6fd6a2526be36
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:59e46fccc4be1a8d61042b97c95ce2311a6460b315d5f85e0f154f1defc5f482
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79f5b4ba066d00b62be68b9f9ec9376def1ffea9a45f98d620f3198033186a6a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c9fd70aa4cf68daad2242bc04a8a03f22adc681e42e4ebf5294902cea9d0a87
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.766061782836914,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.010803802938634399,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 286.089,
       "eval_steps_per_second": 143.063,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 35008932741120.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.751439094543457,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.021607605877268798,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 286.089,
       "eval_steps_per_second": 143.063,
       "step": 50
+    },
+    {
+      "epoch": 0.011019878997407088,
+      "grad_norm": 0.31955963373184204,
+      "learning_rate": 9.628188298907782e-05,
+      "loss": 10.7833,
+      "step": 51
+    },
+    {
+      "epoch": 0.011235955056179775,
+      "grad_norm": 0.2600715458393097,
+      "learning_rate": 9.592434928729616e-05,
+      "loss": 10.7863,
+      "step": 52
+    },
+    {
+      "epoch": 0.011452031114952464,
+      "grad_norm": 0.25826898217201233,
+      "learning_rate": 9.555113246230442e-05,
+      "loss": 10.7734,
+      "step": 53
+    },
+    {
+      "epoch": 0.011668107173725151,
+      "grad_norm": 0.33920153975486755,
+      "learning_rate": 9.516235996730645e-05,
+      "loss": 10.7821,
+      "step": 54
+    },
+    {
+      "epoch": 0.01188418323249784,
+      "grad_norm": 0.2760711908340454,
+      "learning_rate": 9.475816456775313e-05,
+      "loss": 10.7771,
+      "step": 55
+    },
+    {
+      "epoch": 0.012100259291270527,
+      "grad_norm": 0.2651015818119049,
+      "learning_rate": 9.43386842960031e-05,
+      "loss": 10.7812,
+      "step": 56
+    },
+    {
+      "epoch": 0.012316335350043216,
+      "grad_norm": 0.3452089726924896,
+      "learning_rate": 9.39040624041849e-05,
+      "loss": 10.7577,
+      "step": 57
+    },
+    {
+      "epoch": 0.012532411408815903,
+      "grad_norm": 0.24794746935367584,
+      "learning_rate": 9.345444731527642e-05,
+      "loss": 10.7618,
+      "step": 58
+    },
+    {
+      "epoch": 0.01274848746758859,
+      "grad_norm": 0.26710182428359985,
+      "learning_rate": 9.298999257241863e-05,
+      "loss": 10.7622,
+      "step": 59
+    },
+    {
+      "epoch": 0.01296456352636128,
+      "grad_norm": 0.2514508068561554,
+      "learning_rate": 9.251085678648072e-05,
+      "loss": 10.7781,
+      "step": 60
+    },
+    {
+      "epoch": 0.013180639585133967,
+      "grad_norm": 0.2650161385536194,
+      "learning_rate": 9.201720358189464e-05,
+      "loss": 10.7779,
+      "step": 61
+    },
+    {
+      "epoch": 0.013396715643906655,
+      "grad_norm": 0.2767448127269745,
+      "learning_rate": 9.150920154077754e-05,
+      "loss": 10.7757,
+      "step": 62
+    },
+    {
+      "epoch": 0.013612791702679343,
+      "grad_norm": 0.29482796788215637,
+      "learning_rate": 9.098702414536107e-05,
+      "loss": 10.771,
+      "step": 63
+    },
+    {
+      "epoch": 0.013828867761452032,
+      "grad_norm": 0.274749755859375,
+      "learning_rate": 9.045084971874738e-05,
+      "loss": 10.7587,
+      "step": 64
+    },
+    {
+      "epoch": 0.014044943820224719,
+      "grad_norm": 0.21438376605510712,
+      "learning_rate": 8.9900861364012e-05,
+      "loss": 10.773,
+      "step": 65
+    },
+    {
+      "epoch": 0.014261019878997408,
+      "grad_norm": 0.2738189995288849,
+      "learning_rate": 8.933724690167417e-05,
+      "loss": 10.7707,
+      "step": 66
+    },
+    {
+      "epoch": 0.014477095937770095,
+      "grad_norm": 0.2180059552192688,
+      "learning_rate": 8.876019880555649e-05,
+      "loss": 10.7649,
+      "step": 67
+    },
+    {
+      "epoch": 0.014693171996542784,
+      "grad_norm": 0.2755647301673889,
+      "learning_rate": 8.816991413705516e-05,
+      "loss": 10.7556,
+      "step": 68
+    },
+    {
+      "epoch": 0.014909248055315471,
+      "grad_norm": 0.2628243863582611,
+      "learning_rate": 8.756659447784368e-05,
+      "loss": 10.7485,
+      "step": 69
+    },
+    {
+      "epoch": 0.01512532411408816,
+      "grad_norm": 0.27653780579566956,
+      "learning_rate": 8.695044586103296e-05,
+      "loss": 10.7742,
+      "step": 70
+    },
+    {
+      "epoch": 0.015341400172860847,
+      "grad_norm": 0.292253315448761,
+      "learning_rate": 8.632167870081121e-05,
+      "loss": 10.7542,
+      "step": 71
+    },
+    {
+      "epoch": 0.015557476231633534,
+      "grad_norm": 0.21869610249996185,
+      "learning_rate": 8.568050772058762e-05,
+      "loss": 10.7724,
+      "step": 72
+    },
+    {
+      "epoch": 0.015773552290406223,
+      "grad_norm": 0.20132863521575928,
+      "learning_rate": 8.502715187966455e-05,
+      "loss": 10.7621,
+      "step": 73
+    },
+    {
+      "epoch": 0.01598962834917891,
+      "grad_norm": 0.2435745745897293,
+      "learning_rate": 8.436183429846313e-05,
+      "loss": 10.7661,
+      "step": 74
+    },
+    {
+      "epoch": 0.016205704407951597,
+      "grad_norm": 0.23909196257591248,
+      "learning_rate": 8.368478218232787e-05,
+      "loss": 10.7694,
+      "step": 75
+    },
+    {
+      "epoch": 0.016421780466724288,
+      "grad_norm": 0.22482334077358246,
+      "learning_rate": 8.299622674393614e-05,
+      "loss": 10.7642,
+      "step": 76
+    },
+    {
+      "epoch": 0.016637856525496975,
+      "grad_norm": 0.2314310371875763,
+      "learning_rate": 8.229640312433937e-05,
+      "loss": 10.7584,
+      "step": 77
+    },
+    {
+      "epoch": 0.016853932584269662,
+      "grad_norm": 0.23524075746536255,
+      "learning_rate": 8.158555031266254e-05,
+      "loss": 10.7632,
+      "step": 78
+    },
+    {
+      "epoch": 0.01707000864304235,
+      "grad_norm": 0.20763476192951202,
+      "learning_rate": 8.086391106448965e-05,
+      "loss": 10.7533,
+      "step": 79
+    },
+    {
+      "epoch": 0.01728608470181504,
+      "grad_norm": 0.1953965127468109,
+      "learning_rate": 8.013173181896283e-05,
+      "loss": 10.7633,
+      "step": 80
+    },
+    {
+      "epoch": 0.017502160760587727,
+      "grad_norm": 0.1644563525915146,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 10.764,
+      "step": 81
+    },
+    {
+      "epoch": 0.017718236819360415,
+      "grad_norm": 0.2148687094449997,
+      "learning_rate": 7.863675700402526e-05,
+      "loss": 10.7566,
+      "step": 82
+    },
+    {
+      "epoch": 0.017934312878133102,
+      "grad_norm": 0.2654571831226349,
+      "learning_rate": 7.787447196714427e-05,
+      "loss": 10.7444,
+      "step": 83
+    },
+    {
+      "epoch": 0.018150388936905792,
+      "grad_norm": 0.231702521443367,
+      "learning_rate": 7.710266782362247e-05,
+      "loss": 10.753,
+      "step": 84
+    },
+    {
+      "epoch": 0.01836646499567848,
+      "grad_norm": 0.26304373145103455,
+      "learning_rate": 7.63216081438678e-05,
+      "loss": 10.7559,
+      "step": 85
+    },
+    {
+      "epoch": 0.018582541054451167,
+      "grad_norm": 0.19173169136047363,
+      "learning_rate": 7.553155965904535e-05,
+      "loss": 10.7675,
+      "step": 86
+    },
+    {
+      "epoch": 0.018798617113223854,
+      "grad_norm": 0.2556131184101105,
+      "learning_rate": 7.473279216998895e-05,
+      "loss": 10.7589,
+      "step": 87
+    },
+    {
+      "epoch": 0.01901469317199654,
+      "grad_norm": 0.22893981635570526,
+      "learning_rate": 7.392557845506432e-05,
+      "loss": 10.7599,
+      "step": 88
+    },
+    {
+      "epoch": 0.019230769230769232,
+      "grad_norm": 0.21988166868686676,
+      "learning_rate": 7.311019417701566e-05,
+      "loss": 10.7564,
+      "step": 89
+    },
+    {
+      "epoch": 0.01944684528954192,
+      "grad_norm": 0.24298734962940216,
+      "learning_rate": 7.228691778882693e-05,
+      "loss": 10.7532,
+      "step": 90
+    },
+    {
+      "epoch": 0.019662921348314606,
+      "grad_norm": 0.21422868967056274,
+      "learning_rate": 7.145603043863045e-05,
+      "loss": 10.7588,
+      "step": 91
+    },
+    {
+      "epoch": 0.019878997407087293,
+      "grad_norm": 0.18029530346393585,
+      "learning_rate": 7.061781587369519e-05,
+      "loss": 10.7508,
+      "step": 92
+    },
+    {
+      "epoch": 0.020095073465859984,
+      "grad_norm": 0.29323095083236694,
+      "learning_rate": 6.977256034352712e-05,
+      "loss": 10.7534,
+      "step": 93
+    },
+    {
+      "epoch": 0.02031114952463267,
+      "grad_norm": 0.24313682317733765,
+      "learning_rate": 6.892055250211552e-05,
+      "loss": 10.753,
+      "step": 94
+    },
+    {
+      "epoch": 0.02052722558340536,
+      "grad_norm": 0.30919376015663147,
+      "learning_rate": 6.806208330935766e-05,
+      "loss": 10.7502,
+      "step": 95
+    },
+    {
+      "epoch": 0.020743301642178046,
+      "grad_norm": 0.3091332018375397,
+      "learning_rate": 6.719744593169641e-05,
+      "loss": 10.7492,
+      "step": 96
+    },
+    {
+      "epoch": 0.020959377700950736,
+      "grad_norm": 0.31597936153411865,
+      "learning_rate": 6.632693564200416e-05,
+      "loss": 10.7353,
+      "step": 97
+    },
+    {
+      "epoch": 0.021175453759723423,
+      "grad_norm": 0.41740843653678894,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 10.753,
+      "step": 98
+    },
+    {
+      "epoch": 0.02139152981849611,
+      "grad_norm": 0.3725014328956604,
+      "learning_rate": 6.456948734446624e-05,
+      "loss": 10.72,
+      "step": 99
+    },
+    {
+      "epoch": 0.021607605877268798,
+      "grad_norm": 0.6240119934082031,
+      "learning_rate": 6.368314950360415e-05,
+      "loss": 10.7379,
+      "step": 100
+    },
+    {
+      "epoch": 0.021607605877268798,
+      "eval_loss": 10.751439094543457,
+      "eval_runtime": 27.1534,
+      "eval_samples_per_second": 287.073,
+      "eval_steps_per_second": 143.555,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 69847090200576.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null