Training in progress, step 60, checkpoint
Browse files- last-checkpoint/model-00001-of-00003.safetensors +1 -1
- last-checkpoint/model-00002-of-00003.safetensors +1 -1
- last-checkpoint/model-00003-of-00003.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +25 -3
    	
        last-checkpoint/model-00001-of-00003.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 4938985352
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:135fec0f91a2997432c1116dfdd206a20f678c4534db200650554f4212bede69
         | 
| 3 | 
             
            size 4938985352
         | 
    	
        last-checkpoint/model-00002-of-00003.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 4947390880
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:aadcc0bd81f6261f98319b6e668eda6fdd4f3a7b4d94d1a35f2ead5bd6ae99da
         | 
| 3 | 
             
            size 4947390880
         | 
    	
        last-checkpoint/model-00003-of-00003.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 3590488816
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:aaa5f07ba2ab062b773f191935c86440580e62c7f058c55acd342ed4ea30a609
         | 
| 3 | 
             
            size 3590488816
         | 
    	
        last-checkpoint/optimizer.pt
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 13687759344
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:a567984d453c9250806dff4d56a1da85e5dae420b8e5a3ea6d834d3f13c736e7
         | 
| 3 | 
             
            size 13687759344
         | 
    	
        last-checkpoint/rng_state_0.pth
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 15024
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:0038fb6f21ca3e81c3e1dfdbad93100e39e8e615b5fad9db02349c28cf41449e
         | 
| 3 | 
             
            size 15024
         | 
    	
        last-checkpoint/rng_state_1.pth
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 15024
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:7f45ce61591188f6d757b7069c1b1c209c1709c8592fa9d63f48f22d8a29eddf
         | 
| 3 | 
             
            size 15024
         | 
    	
        last-checkpoint/rng_state_2.pth
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 15024
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:ca9915dadbc42bf9335c9ddddb4bfdf5317e9867b7d547371a8e1a633499c3d5
         | 
| 3 | 
             
            size 15024
         | 
    	
        last-checkpoint/rng_state_3.pth
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 15024
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:31f7c5ddb0b28ad2aed8e01a65edd7968d02ddf7e6ec474ed5c5da54b3a7e8a6
         | 
| 3 | 
             
            size 15024
         | 
    	
        last-checkpoint/scheduler.pt
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 1064
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:e3671bd7fce476af6c5d68b2346a8d63f3c9e317781066d2c00973b04663cf26
         | 
| 3 | 
             
            size 1064
         | 
    	
        last-checkpoint/trainer_state.json
    CHANGED
    
    | @@ -1,9 +1,9 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
             
              "best_metric": null,
         | 
| 3 | 
             
              "best_model_checkpoint": null,
         | 
| 4 | 
            -
              "epoch": 0. | 
| 5 | 
             
              "eval_steps": 20,
         | 
| 6 | 
            -
              "global_step":  | 
| 7 | 
             
              "is_hyper_param_search": false,
         | 
| 8 | 
             
              "is_local_process_zero": true,
         | 
| 9 | 
             
              "is_world_process_zero": true,
         | 
| @@ -59,6 +59,28 @@ | |
| 59 | 
             
                  "eval_samples_per_second": 35.043,
         | 
| 60 | 
             
                  "eval_steps_per_second": 4.389,
         | 
| 61 | 
             
                  "step": 40
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 62 | 
             
                }
         | 
| 63 | 
             
              ],
         | 
| 64 | 
             
              "logging_steps": 10,
         | 
| @@ -78,7 +100,7 @@ | |
| 78 | 
             
                  "attributes": {}
         | 
| 79 | 
             
                }
         | 
| 80 | 
             
              },
         | 
| 81 | 
            -
              "total_flos":  | 
| 82 | 
             
              "train_batch_size": 2,
         | 
| 83 | 
             
              "trial_name": null,
         | 
| 84 | 
             
              "trial_params": null
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
             
              "best_metric": null,
         | 
| 3 | 
             
              "best_model_checkpoint": null,
         | 
| 4 | 
            +
              "epoch": 0.5555555555555556,
         | 
| 5 | 
             
              "eval_steps": 20,
         | 
| 6 | 
            +
              "global_step": 60,
         | 
| 7 | 
             
              "is_hyper_param_search": false,
         | 
| 8 | 
             
              "is_local_process_zero": true,
         | 
| 9 | 
             
              "is_world_process_zero": true,
         | 
|  | |
| 59 | 
             
                  "eval_samples_per_second": 35.043,
         | 
| 60 | 
             
                  "eval_steps_per_second": 4.389,
         | 
| 61 | 
             
                  "step": 40
         | 
| 62 | 
            +
                },
         | 
| 63 | 
            +
                {
         | 
| 64 | 
            +
                  "epoch": 0.46296296296296297,
         | 
| 65 | 
            +
                  "grad_norm": 1.3203125,
         | 
| 66 | 
            +
                  "learning_rate": 0.00019939306773179497,
         | 
| 67 | 
            +
                  "loss": 1.8177,
         | 
| 68 | 
            +
                  "step": 50
         | 
| 69 | 
            +
                },
         | 
| 70 | 
            +
                {
         | 
| 71 | 
            +
                  "epoch": 0.5555555555555556,
         | 
| 72 | 
            +
                  "grad_norm": 77.0,
         | 
| 73 | 
            +
                  "learning_rate": 0.00019863613034027224,
         | 
| 74 | 
            +
                  "loss": 1.9073,
         | 
| 75 | 
            +
                  "step": 60
         | 
| 76 | 
            +
                },
         | 
| 77 | 
            +
                {
         | 
| 78 | 
            +
                  "epoch": 0.5555555555555556,
         | 
| 79 | 
            +
                  "eval_loss": 1.8073501586914062,
         | 
| 80 | 
            +
                  "eval_runtime": 45.0134,
         | 
| 81 | 
            +
                  "eval_samples_per_second": 33.346,
         | 
| 82 | 
            +
                  "eval_steps_per_second": 4.177,
         | 
| 83 | 
            +
                  "step": 60
         | 
| 84 | 
             
                }
         | 
| 85 | 
             
              ],
         | 
| 86 | 
             
              "logging_steps": 10,
         | 
|  | |
| 100 | 
             
                  "attributes": {}
         | 
| 101 | 
             
                }
         | 
| 102 | 
             
              },
         | 
| 103 | 
            +
              "total_flos": 3.897170002024858e+16,
         | 
| 104 | 
             
              "train_batch_size": 2,
         | 
| 105 | 
             
              "trial_name": null,
         | 
| 106 | 
             
              "trial_params": null
         |