alicegoesdown commited on
Commit
bbd51c0
·
verified ·
1 Parent(s): 0bfe786

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa4586cd39b6095e3e3350c4b4fd4a423feddd57d5b89309521c271dd1edfcea
3
  size 144748392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:846434ca50ba2a07c1aa914ac92335cf2e993e47f0a4fb98035e3767df707187
3
  size 144748392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:940d506977625c427e4f4d8efd30db209d869dd599b81aaccc1fdfb648b4baf9
3
- size 289690562
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76bd4bbf6bad97827cd2d1c96918889601fe5549f9ddbaa27e3833ca7ecf5226
3
+ size 289690498
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c134142163f210ef636989093f8a3cf420b7af01245544ebe6b11eca5dd0cfe
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bcf421949ef5e2cf610a12a564035f1eafe2c2459ad36aec2693cef4a5645fc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d4a0c3221fa3abf4dadac0cf9d8e01267c0b24a8953a99482185a67eeee3950
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2a61a0595dd1862605bddb150b0c4ebd6b684b46d33bd5e4926bf5e77255160
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.113584280014038,
3
- "best_model_checkpoint": "./output/checkpoint-150",
4
- "epoch": 0.009560229445506692,
5
  "eval_steps": 150,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -120,6 +120,119 @@
120
  "eval_samples_per_second": 12.171,
121
  "eval_steps_per_second": 12.171,
122
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  }
124
  ],
125
  "logging_steps": 10,
@@ -139,7 +252,7 @@
139
  "attributes": {}
140
  }
141
  },
142
- "total_flos": 4199804000010240.0,
143
  "train_batch_size": 32,
144
  "trial_name": null,
145
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.0569764375686646,
3
+ "best_model_checkpoint": "./output/checkpoint-300",
4
+ "epoch": 0.019120458891013385,
5
  "eval_steps": 150,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
120
  "eval_samples_per_second": 12.171,
121
  "eval_steps_per_second": 12.171,
122
  "step": 150
123
+ },
124
+ {
125
+ "epoch": 0.010197578075207138,
126
+ "grad_norm": 0.8378329873085022,
127
+ "learning_rate": 0.00012495376120044173,
128
+ "loss": 1.0766,
129
+ "step": 160
130
+ },
131
+ {
132
+ "epoch": 0.010834926704907584,
133
+ "grad_norm": 0.7584741711616516,
134
+ "learning_rate": 0.00012493706665883217,
135
+ "loss": 1.1184,
136
+ "step": 170
137
+ },
138
+ {
139
+ "epoch": 0.011472275334608031,
140
+ "grad_norm": 0.7753424048423767,
141
+ "learning_rate": 0.00012491780557396154,
142
+ "loss": 1.089,
143
+ "step": 180
144
+ },
145
+ {
146
+ "epoch": 0.012109623964308477,
147
+ "grad_norm": 0.801698625087738,
148
+ "learning_rate": 0.00012489597873757756,
149
+ "loss": 1.0882,
150
+ "step": 190
151
+ },
152
+ {
153
+ "epoch": 0.012746972594008922,
154
+ "grad_norm": 0.7822267413139343,
155
+ "learning_rate": 0.00012487158704689602,
156
+ "loss": 1.0684,
157
+ "step": 200
158
+ },
159
+ {
160
+ "epoch": 0.01338432122370937,
161
+ "grad_norm": 0.689703106880188,
162
+ "learning_rate": 0.0001248446315045638,
163
+ "loss": 1.0564,
164
+ "step": 210
165
+ },
166
+ {
167
+ "epoch": 0.014021669853409816,
168
+ "grad_norm": 0.768937349319458,
169
+ "learning_rate": 0.00012481511321861763,
170
+ "loss": 1.0662,
171
+ "step": 220
172
+ },
173
+ {
174
+ "epoch": 0.014659018483110261,
175
+ "grad_norm": 0.6786907315254211,
176
+ "learning_rate": 0.00012478303340243864,
177
+ "loss": 1.067,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 0.015296367112810707,
182
+ "grad_norm": 0.7319411039352417,
183
+ "learning_rate": 0.00012474839337470246,
184
+ "loss": 1.0751,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 0.015933715742511154,
189
+ "grad_norm": 0.707256555557251,
190
+ "learning_rate": 0.0001247111945593249,
191
+ "loss": 1.0672,
192
+ "step": 250
193
+ },
194
+ {
195
+ "epoch": 0.0165710643722116,
196
+ "grad_norm": 0.7429525256156921,
197
+ "learning_rate": 0.00012467143848540359,
198
+ "loss": 1.086,
199
+ "step": 260
200
+ },
201
+ {
202
+ "epoch": 0.017208413001912046,
203
+ "grad_norm": 0.7446891069412231,
204
+ "learning_rate": 0.000124629126787155,
205
+ "loss": 1.0906,
206
+ "step": 270
207
+ },
208
+ {
209
+ "epoch": 0.017845761631612493,
210
+ "grad_norm": 0.7032232284545898,
211
+ "learning_rate": 0.00012458426120384738,
212
+ "loss": 1.0761,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 0.018483110261312937,
217
+ "grad_norm": 0.7499470114707947,
218
+ "learning_rate": 0.00012453684357972906,
219
+ "loss": 1.0472,
220
+ "step": 290
221
+ },
222
+ {
223
+ "epoch": 0.019120458891013385,
224
+ "grad_norm": 0.7211316227912903,
225
+ "learning_rate": 0.00012448687586395289,
226
+ "loss": 1.0621,
227
+ "step": 300
228
+ },
229
+ {
230
+ "epoch": 0.019120458891013385,
231
+ "eval_loss": 1.0569764375686646,
232
+ "eval_runtime": 41.5487,
233
+ "eval_samples_per_second": 12.034,
234
+ "eval_steps_per_second": 12.034,
235
+ "step": 300
236
  }
237
  ],
238
  "logging_steps": 10,
 
252
  "attributes": {}
253
  }
254
  },
255
+ "total_flos": 8380319636520960.0,
256
  "train_batch_size": 32,
257
  "trial_name": null,
258
  "trial_params": null