neuralwonderland commited on
Commit
f5200d3
·
verified ·
1 Parent(s): 265de0b

Training in progress, step 2850, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:966b1eba27b60a69de9941a4adc40d47f6930990f3711e0c6b943dcd7e8be6f8
3
  size 17447528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:507b0045580eaf7396aaa0f06eed3c9395cf84e452522665d7d9c90deb17eb9b
3
  size 17447528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f50057a5284d267fe090961c400974b32cf269a0ae5da66fea9da29dacdba32d
3
  size 34959674
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e23f7491d13f76c7387fce24e6f4ca9afba3426a99cb9bcef450bb8c23de9fc8
3
  size 34959674
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c64140e0046965aea8413f1656f4090612556428328ec675266a89312bb1ca8f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:163146a068117fb98f41c23ddbb66802e20f4e6a275e3f9a2f37a754bb33ba25
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:631e61b76e4f847dac83ed69bd0682bc8277236b03d3ca41b916b7838850a58e
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:675b3a9e5fe7372c3c43243a8218917ed6d7710114a3dd64818b1e7e3d4370e0
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.000862631481140852,
3
  "best_model_checkpoint": "./output/checkpoint-1950",
4
- "epoch": 0.7529280535415505,
5
  "eval_steps": 150,
6
- "global_step": 2700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2041,6 +2041,119 @@
2041
  "eval_samples_per_second": 13.942,
2042
  "eval_steps_per_second": 13.942,
2043
  "step": 2700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2044
  }
2045
  ],
2046
  "logging_steps": 10,
@@ -2060,7 +2173,7 @@
2060
  "attributes": {}
2061
  }
2062
  },
2063
- "total_flos": 2.5811377525633843e+17,
2064
  "train_batch_size": 32,
2065
  "trial_name": null,
2066
  "trial_params": null
 
1
  {
2
  "best_metric": 0.000862631481140852,
3
  "best_model_checkpoint": "./output/checkpoint-1950",
4
+ "epoch": 0.7947573898494144,
5
  "eval_steps": 150,
6
+ "global_step": 2850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2041
  "eval_samples_per_second": 13.942,
2042
  "eval_steps_per_second": 13.942,
2043
  "step": 2700
2044
+ },
2045
+ {
2046
+ "epoch": 0.7557166759620747,
2047
+ "grad_norm": 0.011520965956151485,
2048
+ "learning_rate": 5.609982526366054e-05,
2049
+ "loss": 0.0006,
2050
+ "step": 2710
2051
+ },
2052
+ {
2053
+ "epoch": 0.758505298382599,
2054
+ "grad_norm": 0.023217786103487015,
2055
+ "learning_rate": 5.570135273266683e-05,
2056
+ "loss": 0.0008,
2057
+ "step": 2720
2058
+ },
2059
+ {
2060
+ "epoch": 0.7612939208031233,
2061
+ "grad_norm": 0.09491502493619919,
2062
+ "learning_rate": 5.53031596674338e-05,
2063
+ "loss": 0.0002,
2064
+ "step": 2730
2065
+ },
2066
+ {
2067
+ "epoch": 0.7640825432236475,
2068
+ "grad_norm": 0.13169516623020172,
2069
+ "learning_rate": 5.490526243611873e-05,
2070
+ "loss": 0.0003,
2071
+ "step": 2740
2072
+ },
2073
+ {
2074
+ "epoch": 0.7668711656441718,
2075
+ "grad_norm": 0.0027904631569981575,
2076
+ "learning_rate": 5.450767739471837e-05,
2077
+ "loss": 0.0007,
2078
+ "step": 2750
2079
+ },
2080
+ {
2081
+ "epoch": 0.769659788064696,
2082
+ "grad_norm": 0.006744542624801397,
2083
+ "learning_rate": 5.411042088639655e-05,
2084
+ "loss": 0.0007,
2085
+ "step": 2760
2086
+ },
2087
+ {
2088
+ "epoch": 0.7724484104852203,
2089
+ "grad_norm": 0.043837904930114746,
2090
+ "learning_rate": 5.371350924081234e-05,
2091
+ "loss": 0.0007,
2092
+ "step": 2770
2093
+ },
2094
+ {
2095
+ "epoch": 0.7752370329057445,
2096
+ "grad_norm": 0.0588638074696064,
2097
+ "learning_rate": 5.331695877344888e-05,
2098
+ "loss": 0.0002,
2099
+ "step": 2780
2100
+ },
2101
+ {
2102
+ "epoch": 0.7780256553262688,
2103
+ "grad_norm": 0.22443899512290955,
2104
+ "learning_rate": 5.292078578494275e-05,
2105
+ "loss": 0.0005,
2106
+ "step": 2790
2107
+ },
2108
+ {
2109
+ "epoch": 0.7808142777467931,
2110
+ "grad_norm": 0.037049710750579834,
2111
+ "learning_rate": 5.2525006560413816e-05,
2112
+ "loss": 0.0003,
2113
+ "step": 2800
2114
+ },
2115
+ {
2116
+ "epoch": 0.7836029001673174,
2117
+ "grad_norm": 0.018167397007346153,
2118
+ "learning_rate": 5.212963736879578e-05,
2119
+ "loss": 0.0014,
2120
+ "step": 2810
2121
+ },
2122
+ {
2123
+ "epoch": 0.7863915225878416,
2124
+ "grad_norm": 0.0031329223420470953,
2125
+ "learning_rate": 5.173469446216757e-05,
2126
+ "loss": 0.0004,
2127
+ "step": 2820
2128
+ },
2129
+ {
2130
+ "epoch": 0.7891801450083659,
2131
+ "grad_norm": 0.0297186728566885,
2132
+ "learning_rate": 5.134019407508521e-05,
2133
+ "loss": 0.0006,
2134
+ "step": 2830
2135
+ },
2136
+ {
2137
+ "epoch": 0.7919687674288901,
2138
+ "grad_norm": 0.21967540681362152,
2139
+ "learning_rate": 5.0946152423914456e-05,
2140
+ "loss": 0.0002,
2141
+ "step": 2840
2142
+ },
2143
+ {
2144
+ "epoch": 0.7947573898494144,
2145
+ "grad_norm": 0.013743867166340351,
2146
+ "learning_rate": 5.0552585706164246e-05,
2147
+ "loss": 0.0011,
2148
+ "step": 2850
2149
+ },
2150
+ {
2151
+ "epoch": 0.7947573898494144,
2152
+ "eval_loss": 0.0009596548043191433,
2153
+ "eval_runtime": 34.7856,
2154
+ "eval_samples_per_second": 14.374,
2155
+ "eval_steps_per_second": 14.374,
2156
+ "step": 2850
2157
  }
2158
  ],
2159
  "logging_steps": 10,
 
2173
  "attributes": {}
2174
  }
2175
  },
2176
+ "total_flos": 2.7249325604477338e+17,
2177
  "train_batch_size": 32,
2178
  "trial_name": null,
2179
  "trial_params": null