Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ffcaf9bc3145c5f51cb827fe6859d3c4c28419733af1aecd334d65d8b0c2d4b9
 size 59933632

 version https://git-lfs.github.com/spec/v1
+oid sha256:9860b73015b15607627aceb40b08147e8f53966aa2cdaf579d26c56d260c4253
 size 59933632

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:498eb87000436e496596a10f6fe93c9f5b3301bdcb229ded9ea5e28d27f56e33
 size 31822948

 version https://git-lfs.github.com/spec/v1
+oid sha256:66a968e36f5602a15c95b5682482753dacf50dd4f7eabf0937b3718f438ec804
 size 31822948

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4404254d7022d0a9bac0c2eab18f30dd904bd65c5271c85db04be3fa96d6e5c8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:63e5a52ccff9e6b431cd0b66738dcf15797c1e46a2c0785103a072af324fa007
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9e02dc10b7239989ab9b4418ee704e53fad611ad6b77ad633028bb8eb5238dd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fc7800513a1b4dd006c457152c700dd768bb49ee4ed8e4d9665a4e42095b054
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0031681662653656063,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 27.347,
       "eval_steps_per_second": 13.673,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -392,7 +750,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6838926195032064.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0063363325307312125,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 27.347,
       "eval_steps_per_second": 13.673,
       "step": 50
+    },
+    {
+      "epoch": 0.0032315295906729185,
+      "grad_norm": 0.8580302596092224,
+      "learning_rate": 0.00017788772787621126,
+      "loss": 0.3992,
+      "step": 51
+    },
+    {
+      "epoch": 0.0032948929159802307,
+      "grad_norm": 0.7331790328025818,
+      "learning_rate": 0.00017684011108568592,
+      "loss": 0.5749,
+      "step": 52
+    },
+    {
+      "epoch": 0.003358256241287543,
+      "grad_norm": 1.4232603311538696,
+      "learning_rate": 0.0001757714869760335,
+      "loss": 0.633,
+      "step": 53
+    },
+    {
+      "epoch": 0.003421619566594855,
+      "grad_norm": 0.6469828486442566,
+      "learning_rate": 0.0001746821476984154,
+      "loss": 0.4805,
+      "step": 54
+    },
+    {
+      "epoch": 0.003484982891902167,
+      "grad_norm": 0.5833075046539307,
+      "learning_rate": 0.00017357239106731317,
+      "loss": 0.303,
+      "step": 55
+    },
+    {
+      "epoch": 0.003548346217209479,
+      "grad_norm": 0.5430967807769775,
+      "learning_rate": 0.00017244252047910892,
+      "loss": 0.3606,
+      "step": 56
+    },
+    {
+      "epoch": 0.0036117095425167913,
+      "grad_norm": 0.7270248532295227,
+      "learning_rate": 0.00017129284482913972,
+      "loss": 0.519,
+      "step": 57
+    },
+    {
+      "epoch": 0.0036750728678241035,
+      "grad_norm": 0.497071236371994,
+      "learning_rate": 0.00017012367842724887,
+      "loss": 0.3192,
+      "step": 58
+    },
+    {
+      "epoch": 0.0037384361931314157,
+      "grad_norm": 0.5522649884223938,
+      "learning_rate": 0.0001689353409118566,
+      "loss": 0.4403,
+      "step": 59
+    },
+    {
+      "epoch": 0.0038017995184387275,
+      "grad_norm": 1.4259494543075562,
+      "learning_rate": 0.00016772815716257412,
+      "loss": 0.4538,
+      "step": 60
+    },
+    {
+      "epoch": 0.0038651628437460397,
+      "grad_norm": 0.5942732095718384,
+      "learning_rate": 0.0001665024572113848,
+      "loss": 0.3762,
+      "step": 61
+    },
+    {
+      "epoch": 0.003928526169053352,
+      "grad_norm": 0.5439912676811218,
+      "learning_rate": 0.00016525857615241687,
+      "loss": 0.4122,
+      "step": 62
+    },
+    {
+      "epoch": 0.003991889494360664,
+      "grad_norm": 0.4867478311061859,
+      "learning_rate": 0.00016399685405033167,
+      "loss": 0.2925,
+      "step": 63
+    },
+    {
+      "epoch": 0.004055252819667976,
+      "grad_norm": 0.8213767409324646,
+      "learning_rate": 0.0001627176358473537,
+      "loss": 0.4217,
+      "step": 64
+    },
+    {
+      "epoch": 0.004118616144975288,
+      "grad_norm": 1.1848950386047363,
+      "learning_rate": 0.0001614212712689668,
+      "loss": 0.3365,
+      "step": 65
+    },
+    {
+      "epoch": 0.004181979470282601,
+      "grad_norm": 0.7401292324066162,
+      "learning_rate": 0.00016010811472830252,
+      "loss": 0.7397,
+      "step": 66
+    },
+    {
+      "epoch": 0.0042453427955899126,
+      "grad_norm": 0.910925030708313,
+      "learning_rate": 0.00015877852522924732,
+      "loss": 0.431,
+      "step": 67
+    },
+    {
+      "epoch": 0.004308706120897224,
+      "grad_norm": 0.8558295965194702,
+      "learning_rate": 0.00015743286626829437,
+      "loss": 0.3999,
+      "step": 68
+    },
+    {
+      "epoch": 0.004372069446204537,
+      "grad_norm": 0.5612680315971375,
+      "learning_rate": 0.0001560715057351673,
+      "loss": 0.4328,
+      "step": 69
+    },
+    {
+      "epoch": 0.004435432771511849,
+      "grad_norm": 0.38900530338287354,
+      "learning_rate": 0.00015469481581224272,
+      "loss": 0.4551,
+      "step": 70
+    },
+    {
+      "epoch": 0.004498796096819161,
+      "grad_norm": 0.5994769334793091,
+      "learning_rate": 0.0001533031728727994,
+      "loss": 0.4326,
+      "step": 71
+    },
+    {
+      "epoch": 0.004562159422126473,
+      "grad_norm": 0.5441716909408569,
+      "learning_rate": 0.00015189695737812152,
+      "loss": 0.7896,
+      "step": 72
+    },
+    {
+      "epoch": 0.004625522747433785,
+      "grad_norm": 1.3651723861694336,
+      "learning_rate": 0.0001504765537734844,
+      "loss": 0.3994,
+      "step": 73
+    },
+    {
+      "epoch": 0.004688886072741098,
+      "grad_norm": 1.0307939052581787,
+      "learning_rate": 0.00014904235038305083,
+      "loss": 0.4365,
+      "step": 74
+    },
+    {
+      "epoch": 0.004752249398048409,
+      "grad_norm": 0.5589604377746582,
+      "learning_rate": 0.00014759473930370736,
+      "loss": 0.4381,
+      "step": 75
+    },
+    {
+      "epoch": 0.004815612723355722,
+      "grad_norm": 0.6483279466629028,
+      "learning_rate": 0.0001461341162978688,
+      "loss": 1.2379,
+      "step": 76
+    },
+    {
+      "epoch": 0.004878976048663034,
+      "grad_norm": 0.688217282295227,
+      "learning_rate": 0.00014466088068528068,
+      "loss": 0.3769,
+      "step": 77
+    },
+    {
+      "epoch": 0.004942339373970346,
+      "grad_norm": 0.6985648274421692,
+      "learning_rate": 0.00014317543523384928,
+      "loss": 0.2771,
+      "step": 78
+    },
+    {
+      "epoch": 0.005005702699277658,
+      "grad_norm": 1.2414263486862183,
+      "learning_rate": 0.00014167818604952906,
+      "loss": 0.3534,
+      "step": 79
+    },
+    {
+      "epoch": 0.00506906602458497,
+      "grad_norm": 0.6788727045059204,
+      "learning_rate": 0.00014016954246529696,
+      "loss": 0.5284,
+      "step": 80
+    },
+    {
+      "epoch": 0.005132429349892283,
+      "grad_norm": 0.7818289995193481,
+      "learning_rate": 0.00013864991692924523,
+      "loss": 0.2941,
+      "step": 81
+    },
+    {
+      "epoch": 0.0051957926751995944,
+      "grad_norm": 0.6787352561950684,
+      "learning_rate": 0.00013711972489182208,
+      "loss": 0.3318,
+      "step": 82
+    },
+    {
+      "epoch": 0.005259156000506906,
+      "grad_norm": 0.971752941608429,
+      "learning_rate": 0.00013557938469225167,
+      "loss": 0.4559,
+      "step": 83
+    },
+    {
+      "epoch": 0.005322519325814219,
+      "grad_norm": 0.5854300260543823,
+      "learning_rate": 0.00013402931744416433,
+      "loss": 0.3601,
+      "step": 84
+    },
+    {
+      "epoch": 0.005385882651121531,
+      "grad_norm": 0.7647449374198914,
+      "learning_rate": 0.00013246994692046836,
+      "loss": 0.435,
+      "step": 85
+    },
+    {
+      "epoch": 0.005449245976428843,
+      "grad_norm": 0.5981462001800537,
+      "learning_rate": 0.00013090169943749476,
+      "loss": 0.4311,
+      "step": 86
+    },
+    {
+      "epoch": 0.005512609301736155,
+      "grad_norm": 0.4233483374118805,
+      "learning_rate": 0.0001293250037384465,
+      "loss": 0.415,
+      "step": 87
+    },
+    {
+      "epoch": 0.005575972627043467,
+      "grad_norm": 0.8820663094520569,
+      "learning_rate": 0.00012774029087618446,
+      "loss": 0.3958,
+      "step": 88
+    },
+    {
+      "epoch": 0.0056393359523507795,
+      "grad_norm": 0.6327323317527771,
+      "learning_rate": 0.00012614799409538198,
+      "loss": 0.6534,
+      "step": 89
+    },
+    {
+      "epoch": 0.005702699277658091,
+      "grad_norm": 0.5511671304702759,
+      "learning_rate": 0.00012454854871407994,
+      "loss": 0.3668,
+      "step": 90
+    },
+    {
+      "epoch": 0.005766062602965404,
+      "grad_norm": 0.7432234883308411,
+      "learning_rate": 0.00012294239200467516,
+      "loss": 0.3317,
+      "step": 91
+    },
+    {
+      "epoch": 0.005829425928272716,
+      "grad_norm": 0.6933262348175049,
+      "learning_rate": 0.0001213299630743747,
+      "loss": 0.3444,
+      "step": 92
+    },
+    {
+      "epoch": 0.0058927892535800275,
+      "grad_norm": 0.7745187878608704,
+      "learning_rate": 0.00011971170274514802,
+      "loss": 0.1975,
+      "step": 93
+    },
+    {
+      "epoch": 0.00595615257888734,
+      "grad_norm": 0.6152411699295044,
+      "learning_rate": 0.000118088053433211,
+      "loss": 0.5463,
+      "step": 94
+    },
+    {
+      "epoch": 0.006019515904194652,
+      "grad_norm": 1.3992512226104736,
+      "learning_rate": 0.00011645945902807341,
+      "loss": 0.4622,
+      "step": 95
+    },
+    {
+      "epoch": 0.0060828792295019645,
+      "grad_norm": 0.8983330726623535,
+      "learning_rate": 0.0001148263647711842,
+      "loss": 0.466,
+      "step": 96
+    },
+    {
+      "epoch": 0.006146242554809276,
+      "grad_norm": 0.5503897666931152,
+      "learning_rate": 0.00011318921713420691,
+      "loss": 0.37,
+      "step": 97
+    },
+    {
+      "epoch": 0.006209605880116588,
+      "grad_norm": 0.5463910102844238,
+      "learning_rate": 0.00011154846369695863,
+      "loss": 0.3198,
+      "step": 98
+    },
+    {
+      "epoch": 0.006272969205423901,
+      "grad_norm": 0.4767880141735077,
+      "learning_rate": 0.0001099045530250463,
+      "loss": 0.3526,
+      "step": 99
+    },
+    {
+      "epoch": 0.0063363325307312125,
+      "grad_norm": 1.2058966159820557,
+      "learning_rate": 0.00010825793454723325,
+      "loss": 0.4968,
+      "step": 100
+    },
+    {
+      "epoch": 0.0063363325307312125,
+      "eval_loss": 0.3803197145462036,
+      "eval_runtime": 243.4104,
+      "eval_samples_per_second": 27.304,
+      "eval_steps_per_second": 13.652,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.3677852390064128e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null