Training in progress, step 1500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +305 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:076a9e08fa23f4c8bfb5c9f047bea205e3658d1a28942b0b2595597bf2431ec3
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:84836aadd5b7668801f43f94256af1462d4f6fcd0638b1078edbab3752655c83
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec55549db4cf183d94da9ff06bd0988d76c99bcbbb7944c2410ffc0f979d1188
 size 671473763

 version https://git-lfs.github.com/spec/v1
+oid sha256:110c7016c149e8b740c8790f09dcce9198bbae6ee7cc39f1fd5d7f46e2210a01
 size 671473763

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6b8943477e33b4ca1220d2abfc5f6bc3df7264ba11576be9d2c6af84fce0ae2
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:00df7897a1fc6c054036bcebad4f3efec8761e2e58635a805d2ef197e09b731c
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a717c54c61f00318563a2243900cad87ed16f178d7acf7675538ea64c8f7c0e3
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:675c447775e07c9091c55f2dffbbf5646a2b0f067a2b30d2abcb37bd027db68f
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.036713527405118476,
   "eval_steps": 500,
-  "global_step": 1200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1205,6 +1205,307 @@
       "learning_rate": 1.1998599831119912e-05,
       "loss": 1.491,
       "step": 1197
     }
   ],
   "logging_steps": 7,
@@ -1219,12 +1520,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8.952125425975296e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0458919092563981,
   "eval_steps": 500,
+  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.1998599831119912e-05,
       "loss": 1.491,
       "step": 1197
+    },
+    {
+      "epoch": 0.03683590582980221,
+      "grad_norm": 1.5545629262924194,
+      "learning_rate": 1.1474337861210543e-05,
+      "loss": 1.4981,
+      "step": 1204
+    },
+    {
+      "epoch": 0.03705006807299873,
+      "grad_norm": 2.0076568126678467,
+      "learning_rate": 1.096029866616704e-05,
+      "loss": 1.5171,
+      "step": 1211
+    },
+    {
+      "epoch": 0.03726423031619525,
+      "grad_norm": 1.0814955234527588,
+      "learning_rate": 1.0456618646161954e-05,
+      "loss": 1.3583,
+      "step": 1218
+    },
+    {
+      "epoch": 0.03747839255939178,
+      "grad_norm": 1.3351236581802368,
+      "learning_rate": 9.963431452563332e-06,
+      "loss": 1.4998,
+      "step": 1225
+    },
+    {
+      "epoch": 0.037692554802588304,
+      "grad_norm": 1.345676064491272,
+      "learning_rate": 9.480867952470284e-06,
+      "loss": 1.4631,
+      "step": 1232
+    },
+    {
+      "epoch": 0.03790671704578483,
+      "grad_norm": 1.1110200881958008,
+      "learning_rate": 9.00905619398757e-06,
+      "loss": 1.4986,
+      "step": 1239
+    },
+    {
+      "epoch": 0.03812087928898135,
+      "grad_norm": 1.1065698862075806,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 1.4631,
+      "step": 1246
+    },
+    {
+      "epoch": 0.03833504153217788,
+      "grad_norm": 1.2057925462722778,
+      "learning_rate": 8.098185796191631e-06,
+      "loss": 1.439,
+      "step": 1253
+    },
+    {
+      "epoch": 0.0385492037753744,
+      "grad_norm": 1.4041844606399536,
+      "learning_rate": 7.659368856111926e-06,
+      "loss": 1.4684,
+      "step": 1260
+    },
+    {
+      "epoch": 0.03876336601857092,
+      "grad_norm": 1.3971853256225586,
+      "learning_rate": 7.2317869919746705e-06,
+      "loss": 1.439,
+      "step": 1267
+    },
+    {
+      "epoch": 0.03897752826176745,
+      "grad_norm": 1.2180246114730835,
+      "learning_rate": 6.815553662521185e-06,
+      "loss": 1.5764,
+      "step": 1274
+    },
+    {
+      "epoch": 0.039191690504963975,
+      "grad_norm": 1.822922706604004,
+      "learning_rate": 6.410779315161886e-06,
+      "loss": 1.4794,
+      "step": 1281
+    },
+    {
+      "epoch": 0.0394058527481605,
+      "grad_norm": 1.2133909463882446,
+      "learning_rate": 6.017571356669183e-06,
+      "loss": 1.4784,
+      "step": 1288
+    },
+    {
+      "epoch": 0.039620014991357026,
+      "grad_norm": 1.4232499599456787,
+      "learning_rate": 5.636034124677042e-06,
+      "loss": 1.405,
+      "step": 1295
+    },
+    {
+      "epoch": 0.03983417723455355,
+      "grad_norm": 1.8573203086853027,
+      "learning_rate": 5.266268859995083e-06,
+      "loss": 1.4153,
+      "step": 1302
+    },
+    {
+      "epoch": 0.04004833947775007,
+      "grad_norm": 1.6481467485427856,
+      "learning_rate": 4.908373679744316e-06,
+      "loss": 1.3081,
+      "step": 1309
+    },
+    {
+      "epoch": 0.0402625017209466,
+      "grad_norm": 2.072063446044922,
+      "learning_rate": 4.562443551321788e-06,
+      "loss": 1.2842,
+      "step": 1316
+    },
+    {
+      "epoch": 0.04047666396414312,
+      "grad_norm": 1.2372018098831177,
+      "learning_rate": 4.228570267201049e-06,
+      "loss": 1.5924,
+      "step": 1323
+    },
+    {
+      "epoch": 0.040690826207339645,
+      "grad_norm": 1.206811785697937,
+      "learning_rate": 3.90684242057498e-06,
+      "loss": 1.4247,
+      "step": 1330
+    },
+    {
+      "epoch": 0.04090498845053617,
+      "grad_norm": 1.6587467193603516,
+      "learning_rate": 3.5973453818476556e-06,
+      "loss": 1.4644,
+      "step": 1337
+    },
+    {
+      "epoch": 0.041119150693732696,
+      "grad_norm": 1.1888831853866577,
+      "learning_rate": 3.3001612759813393e-06,
+      "loss": 1.5327,
+      "step": 1344
+    },
+    {
+      "epoch": 0.04133331293692922,
+      "grad_norm": 1.0991014242172241,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 1.4681,
+      "step": 1351
+    },
+    {
+      "epoch": 0.04154747518012574,
+      "grad_norm": 1.5934925079345703,
+      "learning_rate": 2.743044005587425e-06,
+      "loss": 1.3098,
+      "step": 1358
+    },
+    {
+      "epoch": 0.04176163742332227,
+      "grad_norm": 1.4214099645614624,
+      "learning_rate": 2.4832586719889416e-06,
+      "loss": 1.4065,
+      "step": 1365
+    },
+    {
+      "epoch": 0.04197579966651879,
+      "grad_norm": 1.2822719812393188,
+      "learning_rate": 2.2360818938828187e-06,
+      "loss": 1.4728,
+      "step": 1372
+    },
+    {
+      "epoch": 0.042189961909715315,
+      "grad_norm": 1.119520664215088,
+      "learning_rate": 2.0015792595656226e-06,
+      "loss": 1.4284,
+      "step": 1379
+    },
+    {
+      "epoch": 0.042404124152911844,
+      "grad_norm": 1.3280854225158691,
+      "learning_rate": 1.7798129942530551e-06,
+      "loss": 1.4559,
+      "step": 1386
+    },
+    {
+      "epoch": 0.04261828639610837,
+      "grad_norm": 1.1883761882781982,
+      "learning_rate": 1.5708419435684462e-06,
+      "loss": 1.3757,
+      "step": 1393
+    },
+    {
+      "epoch": 0.04283244863930489,
+      "grad_norm": 1.3203167915344238,
+      "learning_rate": 1.374721557928116e-06,
+      "loss": 1.6273,
+      "step": 1400
+    },
+    {
+      "epoch": 0.04304661088250142,
+      "grad_norm": 1.2010129690170288,
+      "learning_rate": 1.191503877827621e-06,
+      "loss": 1.4944,
+      "step": 1407
+    },
+    {
+      "epoch": 0.04326077312569794,
+      "grad_norm": 1.6128370761871338,
+      "learning_rate": 1.0212375200327973e-06,
+      "loss": 1.6092,
+      "step": 1414
+    },
+    {
+      "epoch": 0.04347493536889446,
+      "grad_norm": 1.0786170959472656,
+      "learning_rate": 8.639676646793382e-07,
+      "loss": 1.4227,
+      "step": 1421
+    },
+    {
+      "epoch": 0.043689097612090985,
+      "grad_norm": 1.0348191261291504,
+      "learning_rate": 7.197360432842359e-07,
+      "loss": 1.5162,
+      "step": 1428
+    },
+    {
+      "epoch": 0.043903259855287514,
+      "grad_norm": 1.0884389877319336,
+      "learning_rate": 5.885809276723608e-07,
+      "loss": 1.5015,
+      "step": 1435
+    },
+    {
+      "epoch": 0.04411742209848404,
+      "grad_norm": 1.5776199102401733,
+      "learning_rate": 4.705371198210129e-07,
+      "loss": 1.5789,
+      "step": 1442
+    },
+    {
+      "epoch": 0.04433158434168056,
+      "grad_norm": 1.4270485639572144,
+      "learning_rate": 3.65635942625242e-07,
+      "loss": 1.5011,
+      "step": 1449
+    },
+    {
+      "epoch": 0.04454574658487709,
+      "grad_norm": 1.2836881875991821,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.4921,
+      "step": 1456
+    },
+    {
+      "epoch": 0.04475990882807361,
+      "grad_norm": 1.3851501941680908,
+      "learning_rate": 1.953693274256374e-07,
+      "loss": 1.5536,
+      "step": 1463
+    },
+    {
+      "epoch": 0.04497407107127013,
+      "grad_norm": 1.9082521200180054,
+      "learning_rate": 1.3004906962578721e-07,
+      "loss": 1.3908,
+      "step": 1470
+    },
+    {
+      "epoch": 0.04518823331446666,
+      "grad_norm": 1.3173164129257202,
+      "learning_rate": 7.796179090094891e-08,
+      "loss": 1.4139,
+      "step": 1477
+    },
+    {
+      "epoch": 0.045402395557663185,
+      "grad_norm": 1.4804590940475464,
+      "learning_rate": 3.9121312597573125e-08,
+      "loss": 1.4134,
+      "step": 1484
+    },
+    {
+      "epoch": 0.04561655780085971,
+      "grad_norm": 1.5944395065307617,
+      "learning_rate": 1.3537941026914303e-08,
+      "loss": 1.301,
+      "step": 1491
+    },
+    {
+      "epoch": 0.045830720044056236,
+      "grad_norm": 1.4432575702667236,
+      "learning_rate": 1.2184647302626583e-09,
+      "loss": 1.5252,
+      "step": 1498
     }
   ],
   "logging_steps": 7,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.119015678246912e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null