sandernotenbaert commited on
Commit
f5b8717
·
verified ·
1 Parent(s): 8ab9a93

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f472a71e58c1e05f38e3ac96ecd5e8545ca967b82df98601d845c258a3482a23
3
  size 30214176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34d3555cfca007ef636809b8f3241beff2d95435ec2f1596bb03a9d3f865cc63
3
  size 30214176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dafdaff06aed498a0e111444051ede7608a7962108f89d16cbbd37fdac81cfa
3
  size 291962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25011134f263f44b5a284e8818a6302e5420c80915fb0baed6f67eb7a593568a
3
  size 291962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9b7da814158dbbb8d60189b3b5255f312edc3c87062d26ecc4a3197477d7d1f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8bd8829e73b825a940f709da54f29899bba70342040af60abace1f481dd4757
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b15d8832650f7e6c0cd77d0d5d945bee578308b9cd39297e92f1063b58b0f3c8
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3efab4a26e653f12b072c8c84cd98873adfff5605ef352f4a784bafec4fd37e6
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04fe46db27f239a414db1d5d90722d80220853d3e644018ca60e784cd72b6710
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0054bae4a1765c1151c5d499ec353895197f5e92e18df41e313afd3470bb8693
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 500,
3
  "best_metric": 1.6513175964355469,
4
  "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-500",
5
- "epoch": 0.4452483651036594,
6
  "eval_steps": 500,
7
- "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -164,6 +164,84 @@
164
  "eval_samples_per_second": 392.083,
165
  "eval_steps_per_second": 49.026,
166
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  }
168
  ],
169
  "logging_steps": 50,
@@ -178,7 +256,7 @@
178
  "early_stopping_threshold": 0.0
179
  },
180
  "attributes": {
181
- "early_stopping_patience_counter": 1
182
  }
183
  },
184
  "TrainerControl": {
@@ -192,7 +270,7 @@
192
  "attributes": {}
193
  }
194
  },
195
- "total_flos": 4620262884249600.0,
196
  "train_batch_size": 4,
197
  "trial_name": null,
198
  "trial_params": null
 
2
  "best_global_step": 500,
3
  "best_metric": 1.6513175964355469,
4
  "best_model_checkpoint": "./results/hierarchical_music_t5_small_finetune/checkpoint-500",
5
+ "epoch": 0.6678725476554891,
6
  "eval_steps": 500,
7
+ "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
164
  "eval_samples_per_second": 392.083,
165
  "eval_steps_per_second": 49.026,
166
  "step": 1000
167
+ },
168
+ {
169
+ "epoch": 0.46751078335884233,
170
+ "grad_norm": 0.9494450092315674,
171
+ "learning_rate": 5e-05,
172
+ "loss": 1.6604,
173
+ "step": 1050
174
+ },
175
+ {
176
+ "epoch": 0.4897732016140253,
177
+ "grad_norm": 0.9924134612083435,
178
+ "learning_rate": 5e-05,
179
+ "loss": 1.6539,
180
+ "step": 1100
181
+ },
182
+ {
183
+ "epoch": 0.5120356198692083,
184
+ "grad_norm": 1.0620170831680298,
185
+ "learning_rate": 5e-05,
186
+ "loss": 1.6552,
187
+ "step": 1150
188
+ },
189
+ {
190
+ "epoch": 0.5342980381243913,
191
+ "grad_norm": 1.1163603067398071,
192
+ "learning_rate": 5e-05,
193
+ "loss": 1.6452,
194
+ "step": 1200
195
+ },
196
+ {
197
+ "epoch": 0.5565604563795742,
198
+ "grad_norm": 1.025298833847046,
199
+ "learning_rate": 5e-05,
200
+ "loss": 1.6468,
201
+ "step": 1250
202
+ },
203
+ {
204
+ "epoch": 0.5788228746347572,
205
+ "grad_norm": 0.9661399722099304,
206
+ "learning_rate": 5e-05,
207
+ "loss": 1.6377,
208
+ "step": 1300
209
+ },
210
+ {
211
+ "epoch": 0.6010852928899402,
212
+ "grad_norm": 0.9570266008377075,
213
+ "learning_rate": 5e-05,
214
+ "loss": 1.6525,
215
+ "step": 1350
216
+ },
217
+ {
218
+ "epoch": 0.6233477111451231,
219
+ "grad_norm": 0.9325594902038574,
220
+ "learning_rate": 5e-05,
221
+ "loss": 1.6443,
222
+ "step": 1400
223
+ },
224
+ {
225
+ "epoch": 0.6456101294003062,
226
+ "grad_norm": 1.071475625038147,
227
+ "learning_rate": 5e-05,
228
+ "loss": 1.6418,
229
+ "step": 1450
230
+ },
231
+ {
232
+ "epoch": 0.6678725476554891,
233
+ "grad_norm": 0.9684040546417236,
234
+ "learning_rate": 5e-05,
235
+ "loss": 1.6396,
236
+ "step": 1500
237
+ },
238
+ {
239
+ "epoch": 0.6678725476554891,
240
+ "eval_loss": 1.6649832725524902,
241
+ "eval_runtime": 40.9434,
242
+ "eval_samples_per_second": 390.075,
243
+ "eval_steps_per_second": 48.775,
244
+ "step": 1500
245
  }
246
  ],
247
  "logging_steps": 50,
 
256
  "early_stopping_threshold": 0.0
257
  },
258
  "attributes": {
259
+ "early_stopping_patience_counter": 2
260
  }
261
  },
262
  "TrainerControl": {
 
270
  "attributes": {}
271
  }
272
  },
273
+ "total_flos": 8275168727519232.0,
274
  "train_batch_size": 4,
275
  "trial_name": null,
276
  "trial_params": null