afull05 commited on
Commit
74faa33
·
verified ·
1 Parent(s): e396ff7

Training in progress, step 501, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cc3f907aaa715b71a61eebdecb1333d7c5f0a6d30903b65d7ddcf916d23be30
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0a90302ba872fc5e9fbd81b8001b1ec47581ce27eb6b7de96d47864ae6baea6
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86b231a11b5058c73716d24e13fb98a1a8474c625633b1d6c44138c13b4edd7d
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d0af6fc6a885bd4f3dc9a38a3940aef3c5fcad0903424e373a457694dc79ba3
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cce5595f22fff3c51a6d507b41d2596013d8383dba38b7309a6e6c86cbe8c90c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ef870d414744998f4151ccf29729885033e4a65886e92505678cc2e7a90569
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adcadd27beefdfbf5840092bf08d57b92f1d1b18154a8342ab8cd911b37488da
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e594abdaba229076f66031ff8b5f12f7dac58b2668bc9aeb641b9d5709850a7f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.010427846095606801,
5
  "eval_steps": 334,
6
- "global_step": 334,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -254,6 +254,125 @@
254
  "eval_samples_per_second": 2.095,
255
  "eval_steps_per_second": 2.095,
256
  "step": 334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  }
258
  ],
259
  "logging_steps": 10,
@@ -273,7 +392,7 @@
273
  "attributes": {}
274
  }
275
  },
276
- "total_flos": 2.544673843146916e+17,
277
  "train_batch_size": 1,
278
  "trial_name": null,
279
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.015641769143410204,
5
  "eval_steps": 334,
6
+ "global_step": 501,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
254
  "eval_samples_per_second": 2.095,
255
  "eval_steps_per_second": 2.095,
256
  "step": 334
257
+ },
258
+ {
259
+ "epoch": 0.010615172672174588,
260
+ "grad_norm": 1.9632649421691895,
261
+ "learning_rate": 0.00015000000000000001,
262
+ "loss": 1.0551,
263
+ "step": 340
264
+ },
265
+ {
266
+ "epoch": 0.0109273836331209,
267
+ "grad_norm": 4.136826992034912,
268
+ "learning_rate": 0.0001472271074772683,
269
+ "loss": 1.0784,
270
+ "step": 350
271
+ },
272
+ {
273
+ "epoch": 0.01123959459406721,
274
+ "grad_norm": 1.1779104471206665,
275
+ "learning_rate": 0.00014440666126057744,
276
+ "loss": 1.1613,
277
+ "step": 360
278
+ },
279
+ {
280
+ "epoch": 0.011551805555013523,
281
+ "grad_norm": 0.8325644731521606,
282
+ "learning_rate": 0.00014154150130018866,
283
+ "loss": 0.5119,
284
+ "step": 370
285
+ },
286
+ {
287
+ "epoch": 0.011864016515959834,
288
+ "grad_norm": 1.6711801290512085,
289
+ "learning_rate": 0.00013863451256931287,
290
+ "loss": 0.6156,
291
+ "step": 380
292
+ },
293
+ {
294
+ "epoch": 0.012176227476906146,
295
+ "grad_norm": 2.293975353240967,
296
+ "learning_rate": 0.00013568862215918717,
297
+ "loss": 1.0706,
298
+ "step": 390
299
+ },
300
+ {
301
+ "epoch": 0.012488438437852457,
302
+ "grad_norm": 2.2785656452178955,
303
+ "learning_rate": 0.00013270679633174218,
304
+ "loss": 1.2872,
305
+ "step": 400
306
+ },
307
+ {
308
+ "epoch": 0.012800649398798769,
309
+ "grad_norm": 1.2502048015594482,
310
+ "learning_rate": 0.0001296920375328275,
311
+ "loss": 1.0768,
312
+ "step": 410
313
+ },
314
+ {
315
+ "epoch": 0.01311286035974508,
316
+ "grad_norm": 0.7812928557395935,
317
+ "learning_rate": 0.00012664738136900348,
318
+ "loss": 0.5199,
319
+ "step": 420
320
+ },
321
+ {
322
+ "epoch": 0.01342507132069139,
323
+ "grad_norm": 2.0176918506622314,
324
+ "learning_rate": 0.00012357589355094275,
325
+ "loss": 0.8125,
326
+ "step": 430
327
+ },
328
+ {
329
+ "epoch": 0.013737282281637703,
330
+ "grad_norm": 2.014697313308716,
331
+ "learning_rate": 0.00012048066680651908,
332
+ "loss": 1.0261,
333
+ "step": 440
334
+ },
335
+ {
336
+ "epoch": 0.014049493242584013,
337
+ "grad_norm": 3.0161404609680176,
338
+ "learning_rate": 0.00011736481776669306,
339
+ "loss": 1.1352,
340
+ "step": 450
341
+ },
342
+ {
343
+ "epoch": 0.014361704203530326,
344
+ "grad_norm": 1.1186920404434204,
345
+ "learning_rate": 0.00011423148382732853,
346
+ "loss": 1.1374,
347
+ "step": 460
348
+ },
349
+ {
350
+ "epoch": 0.014673915164476636,
351
+ "grad_norm": 0.9820886850357056,
352
+ "learning_rate": 0.00011108381999010111,
353
+ "loss": 0.5135,
354
+ "step": 470
355
+ },
356
+ {
357
+ "epoch": 0.014986126125422949,
358
+ "grad_norm": 2.8473262786865234,
359
+ "learning_rate": 0.00010792499568567884,
360
+ "loss": 0.8812,
361
+ "step": 480
362
+ },
363
+ {
364
+ "epoch": 0.01529833708636926,
365
+ "grad_norm": 2.1481053829193115,
366
+ "learning_rate": 0.00010475819158237425,
367
+ "loss": 1.0178,
368
+ "step": 490
369
+ },
370
+ {
371
+ "epoch": 0.015610548047315572,
372
+ "grad_norm": 1.20015287399292,
373
+ "learning_rate": 0.00010158659638348081,
374
+ "loss": 1.0468,
375
+ "step": 500
376
  }
377
  ],
378
  "logging_steps": 10,
 
392
  "attributes": {}
393
  }
394
  },
395
+ "total_flos": 3.824081922120745e+17,
396
  "train_batch_size": 1,
397
  "trial_name": null,
398
  "trial_params": null