ChiefTheLord commited on
Commit
805516f
verified
1 Parent(s): 51e906c

Delete checkpoints

Browse files
checkpoints/checkpoint-408/adapter.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:983952edf714374c6e674f56ed4de55c61049452f9c3d7048f5e92ceb4b77c7f
3
- size 6439640
 
 
 
 
checkpoints/checkpoint-408/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d142f611fe97236ef3df07856eadbc9b2d0593f8e4d9b592fce4c870449eba8
3
- size 3304962
 
 
 
 
checkpoints/checkpoint-408/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ed0adb262ae5a32b0426ae5f447cd38d4498b7741803b88609e5036322fb8d3
3
- size 14244
 
 
 
 
checkpoints/checkpoint-408/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:308aef47a7e13f7067a6469ac9a30684921e5d2059f036308d7ee97689efbaeb
3
- size 1064
 
 
 
 
checkpoints/checkpoint-408/trainer_state.json DELETED
@@ -1,360 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 8.0,
5
- "eval_steps": 500,
6
- "global_step": 408,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.3137254901960784,
13
- "grad_norm": 1.1791675090789795,
14
- "learning_rate": 1.7073170731707317e-07,
15
- "loss": 9.2388,
16
- "step": 16
17
- },
18
- {
19
- "epoch": 0.6274509803921569,
20
- "grad_norm": 1.0611275434494019,
21
- "learning_rate": 3.6585365853658536e-07,
22
- "loss": 9.2418,
23
- "step": 32
24
- },
25
- {
26
- "epoch": 0.9411764705882353,
27
- "grad_norm": 1.4214043617248535,
28
- "learning_rate": 5.609756097560975e-07,
29
- "loss": 9.2444,
30
- "step": 48
31
- },
32
- {
33
- "epoch": 1.0,
34
- "eval_bleu": 0.2649535678833101,
35
- "eval_cap_loss": 1.862084381720599,
36
- "eval_con_loss": 2.059098891183442,
37
- "eval_loss": 5.049239996601553,
38
- "step": 51
39
- },
40
- {
41
- "epoch": 1.0,
42
- "eval_bleu": 0.2649535678833101,
43
- "eval_cap_loss": 1.862084381720599,
44
- "eval_con_loss": 2.059098891183442,
45
- "eval_loss": 5.049239996601553,
46
- "eval_runtime": 30.9562,
47
- "eval_samples_per_second": 26.134,
48
- "eval_steps_per_second": 3.295,
49
- "step": 51
50
- },
51
- {
52
- "epoch": 1.2549019607843137,
53
- "grad_norm": 1.0868176221847534,
54
- "learning_rate": 7.560975609756097e-07,
55
- "loss": 9.1113,
56
- "step": 64
57
- },
58
- {
59
- "epoch": 1.5686274509803921,
60
- "grad_norm": 1.048159122467041,
61
- "learning_rate": 9.390243902439024e-07,
62
- "loss": 9.2393,
63
- "step": 80
64
- },
65
- {
66
- "epoch": 1.8823529411764706,
67
- "grad_norm": 1.2898032665252686,
68
- "learning_rate": 9.97193382647516e-07,
69
- "loss": 9.2355,
70
- "step": 96
71
- },
72
- {
73
- "epoch": 2.0,
74
- "eval_bleu": 0.26492614431633255,
75
- "eval_cap_loss": 1.8576759625883663,
76
- "eval_con_loss": 2.059098112816904,
77
- "eval_loss": 5.047034211018506,
78
- "step": 102
79
- },
80
- {
81
- "epoch": 2.0,
82
- "eval_bleu": 0.26492614431633255,
83
- "eval_cap_loss": 1.8576759625883663,
84
- "eval_con_loss": 2.059098112816904,
85
- "eval_loss": 5.047034211018506,
86
- "eval_runtime": 30.947,
87
- "eval_samples_per_second": 26.141,
88
- "eval_steps_per_second": 3.296,
89
- "step": 102
90
- },
91
- {
92
- "epoch": 2.196078431372549,
93
- "grad_norm": 1.2055140733718872,
94
- "learning_rate": 9.831701387893532e-07,
95
- "loss": 9.1146,
96
- "step": 112
97
- },
98
- {
99
- "epoch": 2.5098039215686274,
100
- "grad_norm": 5.2426838874816895,
101
- "learning_rate": 9.576826881282595e-07,
102
- "loss": 9.2355,
103
- "step": 128
104
- },
105
- {
106
- "epoch": 2.8235294117647056,
107
- "grad_norm": 1.1463598012924194,
108
- "learning_rate": 9.213357729316076e-07,
109
- "loss": 9.2393,
110
- "step": 144
111
- },
112
- {
113
- "epoch": 3.0,
114
- "eval_bleu": 0.265008566328907,
115
- "eval_cap_loss": 1.8531216908903683,
116
- "eval_con_loss": 2.059098047368667,
117
- "eval_loss": 5.044756932585847,
118
- "step": 153
119
- },
120
- {
121
- "epoch": 3.0,
122
- "eval_bleu": 0.265008566328907,
123
- "eval_cap_loss": 1.8531216908903683,
124
- "eval_con_loss": 2.059098047368667,
125
- "eval_loss": 5.044756932585847,
126
- "eval_runtime": 31.3221,
127
- "eval_samples_per_second": 25.828,
128
- "eval_steps_per_second": 3.256,
129
- "step": 153
130
- },
131
- {
132
- "epoch": 3.1372549019607843,
133
- "grad_norm": 1.1249020099639893,
134
- "learning_rate": 8.749917986317928e-07,
135
- "loss": 9.1006,
136
- "step": 160
137
- },
138
- {
139
- "epoch": 3.450980392156863,
140
- "grad_norm": 1.0396620035171509,
141
- "learning_rate": 8.19750371480919e-07,
142
- "loss": 9.2389,
143
- "step": 176
144
- },
145
- {
146
- "epoch": 3.764705882352941,
147
- "grad_norm": 1.092300534248352,
148
- "learning_rate": 7.569222081274395e-07,
149
- "loss": 9.2325,
150
- "step": 192
151
- },
152
- {
153
- "epoch": 4.0,
154
- "eval_bleu": 0.26470999206118423,
155
- "eval_cap_loss": 1.8498210708300273,
156
- "eval_con_loss": 2.0590969020245122,
157
- "eval_loss": 5.043104336542242,
158
- "step": 204
159
- },
160
- {
161
- "epoch": 4.0,
162
- "eval_bleu": 0.26470999206118423,
163
- "eval_cap_loss": 1.8498210708300273,
164
- "eval_con_loss": 2.0590969020245122,
165
- "eval_loss": 5.043104336542242,
166
- "eval_runtime": 31.2759,
167
- "eval_samples_per_second": 25.867,
168
- "eval_steps_per_second": 3.261,
169
- "step": 204
170
- },
171
- {
172
- "epoch": 4.078431372549019,
173
- "grad_norm": 1.492817759513855,
174
- "learning_rate": 6.879980361637865e-07,
175
- "loss": 9.1007,
176
- "step": 208
177
- },
178
- {
179
- "epoch": 4.392156862745098,
180
- "grad_norm": 1.1078872680664062,
181
- "learning_rate": 6.146132235435591e-07,
182
- "loss": 9.236,
183
- "step": 224
184
- },
185
- {
186
- "epoch": 4.705882352941177,
187
- "grad_norm": 1.093540906906128,
188
- "learning_rate": 5.385089761082039e-07,
189
- "loss": 9.2259,
190
- "step": 240
191
- },
192
- {
193
- "epoch": 5.0,
194
- "eval_bleu": 0.26489835558929614,
195
- "eval_cap_loss": 1.8473522861798604,
196
- "eval_con_loss": 2.059096609844881,
197
- "eval_loss": 5.04186936394841,
198
- "step": 255
199
- },
200
- {
201
- "epoch": 5.0,
202
- "eval_bleu": 0.26489835558929614,
203
- "eval_cap_loss": 1.8473522861798604,
204
- "eval_con_loss": 2.059096609844881,
205
- "eval_loss": 5.04186936394841,
206
- "eval_runtime": 30.9926,
207
- "eval_samples_per_second": 26.103,
208
- "eval_steps_per_second": 3.291,
209
- "step": 255
210
- },
211
- {
212
- "epoch": 5.019607843137255,
213
- "grad_norm": 1.0436476469039917,
214
- "learning_rate": 4.614910238917963e-07,
215
- "loss": 9.1106,
216
- "step": 256
217
- },
218
- {
219
- "epoch": 5.333333333333333,
220
- "grad_norm": 1.0471470355987549,
221
- "learning_rate": 3.853867764564409e-07,
222
- "loss": 9.229,
223
- "step": 272
224
- },
225
- {
226
- "epoch": 5.647058823529412,
227
- "grad_norm": 1.063376545906067,
228
- "learning_rate": 3.120019638362136e-07,
229
- "loss": 9.2323,
230
- "step": 288
231
- },
232
- {
233
- "epoch": 5.96078431372549,
234
- "grad_norm": 0.9393066763877869,
235
- "learning_rate": 2.430777918725606e-07,
236
- "loss": 9.2343,
237
- "step": 304
238
- },
239
- {
240
- "epoch": 6.0,
241
- "eval_bleu": 0.2647592416405099,
242
- "eval_cap_loss": 1.845566402463352,
243
- "eval_con_loss": 2.0590962709165086,
244
- "eval_loss": 5.040975749492645,
245
- "step": 306
246
- },
247
- {
248
- "epoch": 6.0,
249
- "eval_bleu": 0.2647592416405099,
250
- "eval_cap_loss": 1.845566402463352,
251
- "eval_con_loss": 2.0590962709165086,
252
- "eval_loss": 5.040975749492645,
253
- "eval_runtime": 31.1784,
254
- "eval_samples_per_second": 25.947,
255
- "eval_steps_per_second": 3.271,
256
- "step": 306
257
- },
258
- {
259
- "epoch": 6.2745098039215685,
260
- "grad_norm": 1.2808588743209839,
261
- "learning_rate": 1.8024962851908105e-07,
262
- "loss": 9.0982,
263
- "step": 320
264
- },
265
- {
266
- "epoch": 6.588235294117647,
267
- "grad_norm": 2.2777695655822754,
268
- "learning_rate": 1.2500820136820733e-07,
269
- "loss": 9.2284,
270
- "step": 336
271
- },
272
- {
273
- "epoch": 6.901960784313726,
274
- "grad_norm": 1.1129379272460938,
275
- "learning_rate": 8.127810884536402e-08,
276
- "loss": 9.2365,
277
- "step": 352
278
- },
279
- {
280
- "epoch": 7.0,
281
- "eval_bleu": 0.26468108670121876,
282
- "eval_cap_loss": 1.8450663510490866,
283
- "eval_con_loss": 2.0590960582097373,
284
- "eval_loss": 5.040725319408903,
285
- "step": 357
286
- },
287
- {
288
- "epoch": 7.0,
289
- "eval_bleu": 0.26468108670121876,
290
- "eval_cap_loss": 1.8450663510490866,
291
- "eval_con_loss": 2.0590960582097373,
292
- "eval_loss": 5.040725319408903,
293
- "eval_runtime": 30.8218,
294
- "eval_samples_per_second": 26.248,
295
- "eval_steps_per_second": 3.309,
296
- "step": 357
297
- },
298
- {
299
- "epoch": 7.215686274509804,
300
- "grad_norm": 0.9861984252929688,
301
- "learning_rate": 4.427853541662091e-08,
302
- "loss": 9.097,
303
- "step": 368
304
- },
305
- {
306
- "epoch": 7.529411764705882,
307
- "grad_norm": 1.0506465435028076,
308
- "learning_rate": 1.8091892454998593e-08,
309
- "loss": 9.232,
310
- "step": 384
311
- },
312
- {
313
- "epoch": 7.8431372549019605,
314
- "grad_norm": 1.1292920112609863,
315
- "learning_rate": 3.3395120054343086e-09,
316
- "loss": 9.2326,
317
- "step": 400
318
- },
319
- {
320
- "epoch": 8.0,
321
- "eval_bleu": 0.2648108207781699,
322
- "eval_cap_loss": 1.8449630152945424,
323
- "eval_con_loss": 2.059096009123559,
324
- "eval_loss": 5.04067354926876,
325
- "step": 408
326
- },
327
- {
328
- "epoch": 8.0,
329
- "eval_bleu": 0.2648108207781699,
330
- "eval_cap_loss": 1.8449630152945424,
331
- "eval_con_loss": 2.059096009123559,
332
- "eval_loss": 5.04067354926876,
333
- "eval_runtime": 31.3707,
334
- "eval_samples_per_second": 25.788,
335
- "eval_steps_per_second": 3.251,
336
- "step": 408
337
- }
338
- ],
339
- "logging_steps": 16,
340
- "max_steps": 408,
341
- "num_input_tokens_seen": 0,
342
- "num_train_epochs": 8,
343
- "save_steps": 500,
344
- "stateful_callbacks": {
345
- "TrainerControl": {
346
- "args": {
347
- "should_epoch_stop": false,
348
- "should_evaluate": false,
349
- "should_log": false,
350
- "should_save": true,
351
- "should_training_stop": true
352
- },
353
- "attributes": {}
354
- }
355
- },
356
- "total_flos": 0.0,
357
- "train_batch_size": 64,
358
- "trial_name": null,
359
- "trial_params": null
360
- }