Training in progress, step 400, checkpoint
Browse files- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/optimizer.pt +2 -2
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +151 -3
    	
        last-checkpoint/model-00001-of-00002.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 4949934200
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:ef38e23f75049612d4593984a7a3ca4d8bb15ca4c375864be026d287d2b3887c
         | 
| 3 | 
             
            size 4949934200
         | 
    	
        last-checkpoint/model-00002-of-00002.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 1110862568
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:60e07fbe280b472c7130a237eb85fec83e68ee6443f3e43bba1ecad5a003ea84
         | 
| 3 | 
             
            size 1110862568
         | 
    	
        last-checkpoint/optimizer.pt
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:c3bd6b82fe6f7cb79e53c940f5432d5d84b10deb3c0f16e37914c245e9f465d4
         | 
| 3 | 
            +
            size 6159835552
         | 
    	
        last-checkpoint/rng_state_0.pth
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 15024
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:01bb9290ce3d59c6ff6ad761e9fc828c58d7a70fee34981771ade9f75e7a558c
         | 
| 3 | 
             
            size 15024
         | 
    	
        last-checkpoint/rng_state_1.pth
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 15024
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:9e8b452ae4994b1cf755a70d101c2857ab3a01f90d161421b4c6b76a66a30614
         | 
| 3 | 
             
            size 15024
         | 
    	
        last-checkpoint/rng_state_2.pth
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 15024
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f818275221580f93945cc9ffeac693f8727beebedb70d39d160511e90bb32454
         | 
| 3 | 
             
            size 15024
         | 
    	
        last-checkpoint/rng_state_3.pth
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 15024
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:7fb27e90b9efb2bc1ba3aca816751650d05e50ae0229533972456bd829f8572c
         | 
| 3 | 
             
            size 15024
         | 
    	
        last-checkpoint/scheduler.pt
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 1064
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:0e4c78e85c4ba926d25150d4aaddeaf5728dcb066f4afc01202e3e56f29a5487
         | 
| 3 | 
             
            size 1064
         | 
    	
        last-checkpoint/trainer_state.json
    CHANGED
    
    | @@ -1,9 +1,9 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
             
              "best_metric": null,
         | 
| 3 | 
             
              "best_model_checkpoint": null,
         | 
| 4 | 
            -
              "epoch":  | 
| 5 | 
             
              "eval_steps": 200,
         | 
| 6 | 
            -
              "global_step":  | 
| 7 | 
             
              "is_hyper_param_search": false,
         | 
| 8 | 
             
              "is_local_process_zero": true,
         | 
| 9 | 
             
              "is_world_process_zero": true,
         | 
| @@ -163,6 +163,154 @@ | |
| 163 | 
             
                  "eval_samples_per_second": 114.579,
         | 
| 164 | 
             
                  "eval_steps_per_second": 5.801,
         | 
| 165 | 
             
                  "step": 200
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 166 | 
             
                }
         | 
| 167 | 
             
              ],
         | 
| 168 | 
             
              "logging_steps": 10,
         | 
| @@ -182,7 +330,7 @@ | |
| 182 | 
             
                  "attributes": {}
         | 
| 183 | 
             
                }
         | 
| 184 | 
             
              },
         | 
| 185 | 
            -
              "total_flos":  | 
| 186 | 
             
              "train_batch_size": 5,
         | 
| 187 | 
             
              "trial_name": null,
         | 
| 188 | 
             
              "trial_params": null
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
             
              "best_metric": null,
         | 
| 3 | 
             
              "best_model_checkpoint": null,
         | 
| 4 | 
            +
              "epoch": 9.090909090909092,
         | 
| 5 | 
             
              "eval_steps": 200,
         | 
| 6 | 
            +
              "global_step": 400,
         | 
| 7 | 
             
              "is_hyper_param_search": false,
         | 
| 8 | 
             
              "is_local_process_zero": true,
         | 
| 9 | 
             
              "is_world_process_zero": true,
         | 
|  | |
| 163 | 
             
                  "eval_samples_per_second": 114.579,
         | 
| 164 | 
             
                  "eval_steps_per_second": 5.801,
         | 
| 165 | 
             
                  "step": 200
         | 
| 166 | 
            +
                },
         | 
| 167 | 
            +
                {
         | 
| 168 | 
            +
                  "epoch": 4.7727272727272725,
         | 
| 169 | 
            +
                  "grad_norm": 1.21875,
         | 
| 170 | 
            +
                  "learning_rate": 0.00015469481581224272,
         | 
| 171 | 
            +
                  "loss": 0.3798,
         | 
| 172 | 
            +
                  "step": 210
         | 
| 173 | 
            +
                },
         | 
| 174 | 
            +
                {
         | 
| 175 | 
            +
                  "epoch": 5.0,
         | 
| 176 | 
            +
                  "grad_norm": 1.40625,
         | 
| 177 | 
            +
                  "learning_rate": 0.00015000000000000001,
         | 
| 178 | 
            +
                  "loss": 0.3946,
         | 
| 179 | 
            +
                  "step": 220
         | 
| 180 | 
            +
                },
         | 
| 181 | 
            +
                {
         | 
| 182 | 
            +
                  "epoch": 5.2272727272727275,
         | 
| 183 | 
            +
                  "grad_norm": 0.92578125,
         | 
| 184 | 
            +
                  "learning_rate": 0.00014515333583108896,
         | 
| 185 | 
            +
                  "loss": 0.185,
         | 
| 186 | 
            +
                  "step": 230
         | 
| 187 | 
            +
                },
         | 
| 188 | 
            +
                {
         | 
| 189 | 
            +
                  "epoch": 5.454545454545454,
         | 
| 190 | 
            +
                  "grad_norm": 0.953125,
         | 
| 191 | 
            +
                  "learning_rate": 0.00014016954246529696,
         | 
| 192 | 
            +
                  "loss": 0.2086,
         | 
| 193 | 
            +
                  "step": 240
         | 
| 194 | 
            +
                },
         | 
| 195 | 
            +
                {
         | 
| 196 | 
            +
                  "epoch": 5.681818181818182,
         | 
| 197 | 
            +
                  "grad_norm": 0.9609375,
         | 
| 198 | 
            +
                  "learning_rate": 0.00013506375551927547,
         | 
| 199 | 
            +
                  "loss": 0.2037,
         | 
| 200 | 
            +
                  "step": 250
         | 
| 201 | 
            +
                },
         | 
| 202 | 
            +
                {
         | 
| 203 | 
            +
                  "epoch": 5.909090909090909,
         | 
| 204 | 
            +
                  "grad_norm": 0.94921875,
         | 
| 205 | 
            +
                  "learning_rate": 0.00012985148110016947,
         | 
| 206 | 
            +
                  "loss": 0.2147,
         | 
| 207 | 
            +
                  "step": 260
         | 
| 208 | 
            +
                },
         | 
| 209 | 
            +
                {
         | 
| 210 | 
            +
                  "epoch": 6.136363636363637,
         | 
| 211 | 
            +
                  "grad_norm": 0.67578125,
         | 
| 212 | 
            +
                  "learning_rate": 0.00012454854871407994,
         | 
| 213 | 
            +
                  "loss": 0.1516,
         | 
| 214 | 
            +
                  "step": 270
         | 
| 215 | 
            +
                },
         | 
| 216 | 
            +
                {
         | 
| 217 | 
            +
                  "epoch": 6.363636363636363,
         | 
| 218 | 
            +
                  "grad_norm": 0.64453125,
         | 
| 219 | 
            +
                  "learning_rate": 0.00011917106319237386,
         | 
| 220 | 
            +
                  "loss": 0.1077,
         | 
| 221 | 
            +
                  "step": 280
         | 
| 222 | 
            +
                },
         | 
| 223 | 
            +
                {
         | 
| 224 | 
            +
                  "epoch": 6.590909090909091,
         | 
| 225 | 
            +
                  "grad_norm": 0.640625,
         | 
| 226 | 
            +
                  "learning_rate": 0.00011373535578184082,
         | 
| 227 | 
            +
                  "loss": 0.1024,
         | 
| 228 | 
            +
                  "step": 290
         | 
| 229 | 
            +
                },
         | 
| 230 | 
            +
                {
         | 
| 231 | 
            +
                  "epoch": 6.818181818181818,
         | 
| 232 | 
            +
                  "grad_norm": 0.62109375,
         | 
| 233 | 
            +
                  "learning_rate": 0.00010825793454723325,
         | 
| 234 | 
            +
                  "loss": 0.0919,
         | 
| 235 | 
            +
                  "step": 300
         | 
| 236 | 
            +
                },
         | 
| 237 | 
            +
                {
         | 
| 238 | 
            +
                  "epoch": 7.045454545454546,
         | 
| 239 | 
            +
                  "grad_norm": 0.392578125,
         | 
| 240 | 
            +
                  "learning_rate": 0.00010275543423681621,
         | 
| 241 | 
            +
                  "loss": 0.075,
         | 
| 242 | 
            +
                  "step": 310
         | 
| 243 | 
            +
                },
         | 
| 244 | 
            +
                {
         | 
| 245 | 
            +
                  "epoch": 7.2727272727272725,
         | 
| 246 | 
            +
                  "grad_norm": 0.3515625,
         | 
| 247 | 
            +
                  "learning_rate": 9.724456576318381e-05,
         | 
| 248 | 
            +
                  "loss": 0.0312,
         | 
| 249 | 
            +
                  "step": 320
         | 
| 250 | 
            +
                },
         | 
| 251 | 
            +
                {
         | 
| 252 | 
            +
                  "epoch": 7.5,
         | 
| 253 | 
            +
                  "grad_norm": 0.3203125,
         | 
| 254 | 
            +
                  "learning_rate": 9.174206545276677e-05,
         | 
| 255 | 
            +
                  "loss": 0.0276,
         | 
| 256 | 
            +
                  "step": 330
         | 
| 257 | 
            +
                },
         | 
| 258 | 
            +
                {
         | 
| 259 | 
            +
                  "epoch": 7.7272727272727275,
         | 
| 260 | 
            +
                  "grad_norm": 0.333984375,
         | 
| 261 | 
            +
                  "learning_rate": 8.626464421815919e-05,
         | 
| 262 | 
            +
                  "loss": 0.0259,
         | 
| 263 | 
            +
                  "step": 340
         | 
| 264 | 
            +
                },
         | 
| 265 | 
            +
                {
         | 
| 266 | 
            +
                  "epoch": 7.954545454545455,
         | 
| 267 | 
            +
                  "grad_norm": 0.30859375,
         | 
| 268 | 
            +
                  "learning_rate": 8.082893680762619e-05,
         | 
| 269 | 
            +
                  "loss": 0.0246,
         | 
| 270 | 
            +
                  "step": 350
         | 
| 271 | 
            +
                },
         | 
| 272 | 
            +
                {
         | 
| 273 | 
            +
                  "epoch": 8.181818181818182,
         | 
| 274 | 
            +
                  "grad_norm": 0.22265625,
         | 
| 275 | 
            +
                  "learning_rate": 7.54514512859201e-05,
         | 
| 276 | 
            +
                  "loss": 0.0135,
         | 
| 277 | 
            +
                  "step": 360
         | 
| 278 | 
            +
                },
         | 
| 279 | 
            +
                {
         | 
| 280 | 
            +
                  "epoch": 8.409090909090908,
         | 
| 281 | 
            +
                  "grad_norm": 0.2294921875,
         | 
| 282 | 
            +
                  "learning_rate": 7.014851889983057e-05,
         | 
| 283 | 
            +
                  "loss": 0.0107,
         | 
| 284 | 
            +
                  "step": 370
         | 
| 285 | 
            +
                },
         | 
| 286 | 
            +
                {
         | 
| 287 | 
            +
                  "epoch": 8.636363636363637,
         | 
| 288 | 
            +
                  "grad_norm": 0.1884765625,
         | 
| 289 | 
            +
                  "learning_rate": 6.493624448072457e-05,
         | 
| 290 | 
            +
                  "loss": 0.0105,
         | 
| 291 | 
            +
                  "step": 380
         | 
| 292 | 
            +
                },
         | 
| 293 | 
            +
                {
         | 
| 294 | 
            +
                  "epoch": 8.863636363636363,
         | 
| 295 | 
            +
                  "grad_norm": 0.267578125,
         | 
| 296 | 
            +
                  "learning_rate": 5.983045753470308e-05,
         | 
| 297 | 
            +
                  "loss": 0.0098,
         | 
| 298 | 
            +
                  "step": 390
         | 
| 299 | 
            +
                },
         | 
| 300 | 
            +
                {
         | 
| 301 | 
            +
                  "epoch": 9.090909090909092,
         | 
| 302 | 
            +
                  "grad_norm": 0.14453125,
         | 
| 303 | 
            +
                  "learning_rate": 5.484666416891109e-05,
         | 
| 304 | 
            +
                  "loss": 0.009,
         | 
| 305 | 
            +
                  "step": 400
         | 
| 306 | 
            +
                },
         | 
| 307 | 
            +
                {
         | 
| 308 | 
            +
                  "epoch": 9.090909090909092,
         | 
| 309 | 
            +
                  "eval_loss": 4.0009965896606445,
         | 
| 310 | 
            +
                  "eval_runtime": 13.0419,
         | 
| 311 | 
            +
                  "eval_samples_per_second": 115.091,
         | 
| 312 | 
            +
                  "eval_steps_per_second": 5.827,
         | 
| 313 | 
            +
                  "step": 400
         | 
| 314 | 
             
                }
         | 
| 315 | 
             
              ],
         | 
| 316 | 
             
              "logging_steps": 10,
         | 
|  | |
| 330 | 
             
                  "attributes": {}
         | 
| 331 | 
             
                }
         | 
| 332 | 
             
              },
         | 
| 333 | 
            +
              "total_flos": 2.827003985259397e+17,
         | 
| 334 | 
             
              "train_batch_size": 5,
         | 
| 335 | 
             
              "trial_name": null,
         | 
| 336 | 
             
              "trial_params": null
         |