lewtun HF staff commited on
Commit
3a3e6eb
·
1 Parent(s): 63b854b

Model save

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 0.67,
3
- "eval_loss": 0.9324695467948914,
4
- "eval_runtime": 331.4133,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 69.732,
7
- "eval_steps_per_second": 0.546,
8
- "train_loss": 0.9717059337040957,
9
- "train_runtime": 11636.4515,
10
  "train_samples": 207865,
11
- "train_samples_per_second": 17.863,
12
  "train_steps_per_second": 0.035
13
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "eval_loss": 0.9324741959571838,
4
+ "eval_runtime": 329.8618,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 70.06,
7
+ "eval_steps_per_second": 0.549,
8
+ "train_loss": 0.9717525305116878,
9
+ "train_runtime": 11635.2882,
10
  "train_samples": 207865,
11
+ "train_samples_per_second": 17.865,
12
  "train_steps_per_second": 0.035
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.67,
3
- "eval_loss": 0.9324695467948914,
4
- "eval_runtime": 331.4133,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 69.732,
7
- "eval_steps_per_second": 0.546
8
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "eval_loss": 0.9324741959571838,
4
+ "eval_runtime": 329.8618,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 70.06,
7
+ "eval_steps_per_second": 0.549
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a5a9a3ba34e8a93479e3c32c586d97e5043895240fadfdf110fdaf2282cf0c8
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20e559f5bacc048426347855c065fe0ea22051e726980180fd355f08f23d3af7
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80aae64f36ee2cadee0cda2d2fe5ef705505a6280b13e1861f48813af6321f59
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c12a664f67d2f8f80d64f6d37209fc3fc41757f8ad6dc3b40a83fd731d0bf84
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f5f730d4f2cb41c73c4b91d612e2231c8eed1bc47779f6ec3d324a27d76b195
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b65ce0fc315aa5f4c8d001e39730681c1111b74d6bc47761d5e943493c9075b8
3
  size 4540516344
runs/Nov09_02-07-37_ip-26-0-144-35/events.out.tfevents.1699495735.ip-26-0-144-35.317141.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c214dfb70cb5cad55741b0942a5a9f02f54b203d012ead4aea864b49e555ef3
3
+ size 13396
runs/Nov09_02-07-37_ip-26-0-144-35/events.out.tfevents.1699507701.ip-26-0-144-35.317141.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38f305950c24021ebd4b4c8ebc937d3370450e9fb7845f6bbd8bdb55aad6dca4
3
+ size 359
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.67,
3
- "train_loss": 0.9717059337040957,
4
- "train_runtime": 11636.4515,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 17.863,
7
  "train_steps_per_second": 0.035
8
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "train_loss": 0.9717525305116878,
4
+ "train_runtime": 11635.2882,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 17.865,
7
  "train_steps_per_second": 0.035
8
  }
trainer_state.json CHANGED
@@ -23,43 +23,43 @@
23
  {
24
  "epoch": 0.02,
25
  "learning_rate": 1.997007728639956e-05,
26
- "loss": 1.1489,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.04,
31
  "learning_rate": 1.9932715879473385e-05,
32
- "loss": 1.0705,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.05,
37
  "learning_rate": 1.9880488219356086e-05,
38
- "loss": 1.0486,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.06,
43
  "learning_rate": 1.981347247496222e-05,
44
- "loss": 1.0137,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.07,
49
  "learning_rate": 1.973176894846855e-05,
50
- "loss": 0.9985,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.09,
55
  "learning_rate": 1.963549992519223e-05,
56
- "loss": 0.9962,
57
  "step": 35
58
  },
59
  {
60
  "epoch": 0.1,
61
  "learning_rate": 1.9524809490566878e-05,
62
- "loss": 0.9805,
63
  "step": 40
64
  },
65
  {
@@ -77,7 +77,7 @@
77
  {
78
  "epoch": 0.14,
79
  "learning_rate": 1.910797282022027e-05,
80
- "loss": 0.9725,
81
  "step": 55
82
  },
83
  {
@@ -95,13 +95,13 @@
95
  {
96
  "epoch": 0.17,
97
  "learning_rate": 1.8568571761675893e-05,
98
- "loss": 0.9532,
99
  "step": 70
100
  },
101
  {
102
  "epoch": 0.18,
103
  "learning_rate": 1.8362743705288127e-05,
104
- "loss": 0.9606,
105
  "step": 75
106
  },
107
  {
@@ -113,7 +113,7 @@
113
  {
114
  "epoch": 0.21,
115
  "learning_rate": 1.791386494010081e-05,
116
- "loss": 0.9642,
117
  "step": 85
118
  },
119
  {
@@ -179,13 +179,13 @@
179
  {
180
  "epoch": 0.34,
181
  "learning_rate": 1.4684084406997903e-05,
182
- "loss": 0.9428,
183
  "step": 140
184
  },
185
  {
186
  "epoch": 0.36,
187
  "learning_rate": 1.4338837391175582e-05,
188
- "loss": 0.9486,
189
  "step": 145
190
  },
191
  {
@@ -215,7 +215,7 @@
215
  {
216
  "epoch": 0.42,
217
  "learning_rate": 1.252585165803135e-05,
218
- "loss": 0.9464,
219
  "step": 170
220
  },
221
  {
@@ -239,7 +239,7 @@
239
  {
240
  "epoch": 0.47,
241
  "learning_rate": 1.1004233054136726e-05,
242
- "loss": 0.9422,
243
  "step": 190
244
  },
245
  {
@@ -269,7 +269,7 @@
269
  {
270
  "epoch": 0.53,
271
  "learning_rate": 9.072784204417995e-06,
272
- "loss": 0.9461,
273
  "step": 215
274
  },
275
  {
@@ -340,19 +340,19 @@
340
  },
341
  {
342
  "epoch": 0.67,
343
- "eval_loss": 0.9323223233222961,
344
- "eval_runtime": 337.1471,
345
- "eval_samples_per_second": 68.546,
346
- "eval_steps_per_second": 0.537,
347
  "step": 272
348
  },
349
  {
350
  "epoch": 0.67,
351
  "step": 272,
352
  "total_flos": 455322233733120.0,
353
- "train_loss": 0.9717059337040957,
354
- "train_runtime": 11636.4515,
355
- "train_samples_per_second": 17.863,
356
  "train_steps_per_second": 0.035
357
  }
358
  ],
 
23
  {
24
  "epoch": 0.02,
25
  "learning_rate": 1.997007728639956e-05,
26
+ "loss": 1.1492,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.04,
31
  "learning_rate": 1.9932715879473385e-05,
32
+ "loss": 1.0712,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.05,
37
  "learning_rate": 1.9880488219356086e-05,
38
+ "loss": 1.0494,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.06,
43
  "learning_rate": 1.981347247496222e-05,
44
+ "loss": 1.0139,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.07,
49
  "learning_rate": 1.973176894846855e-05,
50
+ "loss": 0.9987,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.09,
55
  "learning_rate": 1.963549992519223e-05,
56
+ "loss": 0.9963,
57
  "step": 35
58
  },
59
  {
60
  "epoch": 0.1,
61
  "learning_rate": 1.9524809490566878e-05,
62
+ "loss": 0.9806,
63
  "step": 40
64
  },
65
  {
 
77
  {
78
  "epoch": 0.14,
79
  "learning_rate": 1.910797282022027e-05,
80
+ "loss": 0.9726,
81
  "step": 55
82
  },
83
  {
 
95
  {
96
  "epoch": 0.17,
97
  "learning_rate": 1.8568571761675893e-05,
98
+ "loss": 0.9533,
99
  "step": 70
100
  },
101
  {
102
  "epoch": 0.18,
103
  "learning_rate": 1.8362743705288127e-05,
104
+ "loss": 0.9607,
105
  "step": 75
106
  },
107
  {
 
113
  {
114
  "epoch": 0.21,
115
  "learning_rate": 1.791386494010081e-05,
116
+ "loss": 0.9643,
117
  "step": 85
118
  },
119
  {
 
179
  {
180
  "epoch": 0.34,
181
  "learning_rate": 1.4684084406997903e-05,
182
+ "loss": 0.9427,
183
  "step": 140
184
  },
185
  {
186
  "epoch": 0.36,
187
  "learning_rate": 1.4338837391175582e-05,
188
+ "loss": 0.9485,
189
  "step": 145
190
  },
191
  {
 
215
  {
216
  "epoch": 0.42,
217
  "learning_rate": 1.252585165803135e-05,
218
+ "loss": 0.9463,
219
  "step": 170
220
  },
221
  {
 
239
  {
240
  "epoch": 0.47,
241
  "learning_rate": 1.1004233054136726e-05,
242
+ "loss": 0.9421,
243
  "step": 190
244
  },
245
  {
 
269
  {
270
  "epoch": 0.53,
271
  "learning_rate": 9.072784204417995e-06,
272
+ "loss": 0.9462,
273
  "step": 215
274
  },
275
  {
 
340
  },
341
  {
342
  "epoch": 0.67,
343
+ "eval_loss": 0.932327389717102,
344
+ "eval_runtime": 335.4717,
345
+ "eval_samples_per_second": 68.888,
346
+ "eval_steps_per_second": 0.54,
347
  "step": 272
348
  },
349
  {
350
  "epoch": 0.67,
351
  "step": 272,
352
  "total_flos": 455322233733120.0,
353
+ "train_loss": 0.9717525305116878,
354
+ "train_runtime": 11635.2882,
355
+ "train_samples_per_second": 17.865,
356
  "train_steps_per_second": 0.035
357
  }
358
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d36e932256b4bf120a53214b3521b397d76398deb28d40b0620d488061c0d60
3
  size 5179
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f71ca7f71743b3a6bdbe97c61df987f92fb4922a63571f5a765dc7d768f13928
3
  size 5179