afull05 commited on
Commit
9660326
·
verified ·
1 Parent(s): 9896a30

Training in progress, step 334, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:034b66bbefd5819fa88e17e2dea7b0e8ab5c14ab22537660f3048a25e6a7c617
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc3f907aaa715b71a61eebdecb1333d7c5f0a6d30903b65d7ddcf916d23be30
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eac864be4e5f5a5eba9357b2e1306aebb2b578c74f0e112dcb1aaa4df98178a8
3
- size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86b231a11b5058c73716d24e13fb98a1a8474c625633b1d6c44138c13b4edd7d
3
+ size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d49d3439d604b444970a71de5cb79f27d2d4be72cae91cf222b427976fd29865
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cce5595f22fff3c51a6d507b41d2596013d8383dba38b7309a6e6c86cbe8c90c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60f64d21f05d7df4421d09373be231cf5e5d1a10934be119a18d2b78545876ee
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adcadd27beefdfbf5840092bf08d57b92f1d1b18154a8342ab8cd911b37488da
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.005213923047803401,
5
  "eval_steps": 334,
6
- "global_step": 167,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -127,6 +127,133 @@
127
  "learning_rate": 0.00018888354486549237,
128
  "loss": 1.145,
129
  "step": 160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  }
131
  ],
132
  "logging_steps": 10,
@@ -146,7 +273,7 @@
146
  "attributes": {}
147
  }
148
  },
149
- "total_flos": 1.2765796160136806e+17,
150
  "train_batch_size": 1,
151
  "trial_name": null,
152
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.010427846095606801,
5
  "eval_steps": 334,
6
+ "global_step": 334,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
127
  "learning_rate": 0.00018888354486549237,
128
  "loss": 1.145,
129
  "step": 160
130
+ },
131
+ {
132
+ "epoch": 0.005307586336087294,
133
+ "grad_norm": 1.1813420057296753,
134
+ "learning_rate": 0.00018738493770697852,
135
+ "loss": 0.5603,
136
+ "step": 170
137
+ },
138
+ {
139
+ "epoch": 0.005619797297033605,
140
+ "grad_norm": 2.35960054397583,
141
+ "learning_rate": 0.00018579834132349772,
142
+ "loss": 0.8248,
143
+ "step": 180
144
+ },
145
+ {
146
+ "epoch": 0.005932008257979917,
147
+ "grad_norm": 2.443915605545044,
148
+ "learning_rate": 0.00018412535328311814,
149
+ "loss": 1.1141,
150
+ "step": 190
151
+ },
152
+ {
153
+ "epoch": 0.006244219218926228,
154
+ "grad_norm": 5.689703941345215,
155
+ "learning_rate": 0.0001823676581429833,
156
+ "loss": 1.2009,
157
+ "step": 200
158
+ },
159
+ {
160
+ "epoch": 0.00655643017987254,
161
+ "grad_norm": 1.4314906597137451,
162
+ "learning_rate": 0.00018052702575310588,
163
+ "loss": 1.1061,
164
+ "step": 210
165
+ },
166
+ {
167
+ "epoch": 0.006868641140818851,
168
+ "grad_norm": 0.6448104977607727,
169
+ "learning_rate": 0.00017860530947427875,
170
+ "loss": 0.4016,
171
+ "step": 220
172
+ },
173
+ {
174
+ "epoch": 0.007180852101765163,
175
+ "grad_norm": 2.0396196842193604,
176
+ "learning_rate": 0.0001766044443118978,
177
+ "loss": 0.8709,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 0.007493063062711474,
182
+ "grad_norm": 2.5875227451324463,
183
+ "learning_rate": 0.0001745264449675755,
184
+ "loss": 1.1121,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 0.007805274023657786,
189
+ "grad_norm": 3.9609525203704834,
190
+ "learning_rate": 0.00017237340381050703,
191
+ "loss": 1.251,
192
+ "step": 250
193
+ },
194
+ {
195
+ "epoch": 0.008117484984604097,
196
+ "grad_norm": 1.2032607793807983,
197
+ "learning_rate": 0.00017014748877063214,
198
+ "loss": 1.1823,
199
+ "step": 260
200
+ },
201
+ {
202
+ "epoch": 0.008429695945550408,
203
+ "grad_norm": 1.186848521232605,
204
+ "learning_rate": 0.00016785094115571322,
205
+ "loss": 0.6219,
206
+ "step": 270
207
+ },
208
+ {
209
+ "epoch": 0.00874190690649672,
210
+ "grad_norm": 1.85453462600708,
211
+ "learning_rate": 0.00016548607339452853,
212
+ "loss": 0.5809,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 0.009054117867443031,
217
+ "grad_norm": 2.0443332195281982,
218
+ "learning_rate": 0.00016305526670845226,
219
+ "loss": 1.2146,
220
+ "step": 290
221
+ },
222
+ {
223
+ "epoch": 0.009366328828389343,
224
+ "grad_norm": 7.1448516845703125,
225
+ "learning_rate": 0.00016056096871376667,
226
+ "loss": 1.2524,
227
+ "step": 300
228
+ },
229
+ {
230
+ "epoch": 0.009678539789335654,
231
+ "grad_norm": 1.334848165512085,
232
+ "learning_rate": 0.00015800569095711982,
233
+ "loss": 1.1966,
234
+ "step": 310
235
+ },
236
+ {
237
+ "epoch": 0.009990750750281965,
238
+ "grad_norm": 0.4558267295360565,
239
+ "learning_rate": 0.00015539200638661104,
240
+ "loss": 0.589,
241
+ "step": 320
242
+ },
243
+ {
244
+ "epoch": 0.010302961711228277,
245
+ "grad_norm": 1.8344190120697021,
246
+ "learning_rate": 0.00015272254676105025,
247
+ "loss": 0.5806,
248
+ "step": 330
249
+ },
250
+ {
251
+ "epoch": 0.010427846095606801,
252
+ "eval_loss": 0.9566133618354797,
253
+ "eval_runtime": 13592.0734,
254
+ "eval_samples_per_second": 2.095,
255
+ "eval_steps_per_second": 2.095,
256
+ "step": 334
257
  }
258
  ],
259
  "logging_steps": 10,
 
273
  "attributes": {}
274
  }
275
  },
276
+ "total_flos": 2.544673843146916e+17,
277
  "train_batch_size": 1,
278
  "trial_name": null,
279
  "trial_params": null