Model save
Browse files- all_results.json +7 -7
- eval_results.json +4 -4
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- runs/Nov09_02-07-37_ip-26-0-144-35/events.out.tfevents.1699495735.ip-26-0-144-35.317141.0 +3 -0
- runs/Nov09_02-07-37_ip-26-0-144-35/events.out.tfevents.1699507701.ip-26-0-144-35.317141.1 +3 -0
- train_results.json +3 -3
- trainer_state.json +23 -23
- training_args.bin +1 -1
all_results.json
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
{
|
2 |
"epoch": 0.67,
|
3 |
-
"eval_loss": 0.
|
4 |
-
"eval_runtime":
|
5 |
"eval_samples": 23110,
|
6 |
-
"eval_samples_per_second":
|
7 |
-
"eval_steps_per_second": 0.
|
8 |
-
"train_loss": 0.
|
9 |
-
"train_runtime":
|
10 |
"train_samples": 207865,
|
11 |
-
"train_samples_per_second": 17.
|
12 |
"train_steps_per_second": 0.035
|
13 |
}
|
|
|
1 |
{
|
2 |
"epoch": 0.67,
|
3 |
+
"eval_loss": 0.9324741959571838,
|
4 |
+
"eval_runtime": 329.8618,
|
5 |
"eval_samples": 23110,
|
6 |
+
"eval_samples_per_second": 70.06,
|
7 |
+
"eval_steps_per_second": 0.549,
|
8 |
+
"train_loss": 0.9717525305116878,
|
9 |
+
"train_runtime": 11635.2882,
|
10 |
"train_samples": 207865,
|
11 |
+
"train_samples_per_second": 17.865,
|
12 |
"train_steps_per_second": 0.035
|
13 |
}
|
eval_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 0.67,
|
3 |
-
"eval_loss": 0.
|
4 |
-
"eval_runtime":
|
5 |
"eval_samples": 23110,
|
6 |
-
"eval_samples_per_second":
|
7 |
-
"eval_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 0.67,
|
3 |
+
"eval_loss": 0.9324741959571838,
|
4 |
+
"eval_runtime": 329.8618,
|
5 |
"eval_samples": 23110,
|
6 |
+
"eval_samples_per_second": 70.06,
|
7 |
+
"eval_steps_per_second": 0.549
|
8 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943162336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20e559f5bacc048426347855c065fe0ea22051e726980180fd355f08f23d3af7
|
3 |
size 4943162336
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c12a664f67d2f8f80d64f6d37209fc3fc41757f8ad6dc3b40a83fd731d0bf84
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540516344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b65ce0fc315aa5f4c8d001e39730681c1111b74d6bc47761d5e943493c9075b8
|
3 |
size 4540516344
|
runs/Nov09_02-07-37_ip-26-0-144-35/events.out.tfevents.1699495735.ip-26-0-144-35.317141.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c214dfb70cb5cad55741b0942a5a9f02f54b203d012ead4aea864b49e555ef3
|
3 |
+
size 13396
|
runs/Nov09_02-07-37_ip-26-0-144-35/events.out.tfevents.1699507701.ip-26-0-144-35.317141.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38f305950c24021ebd4b4c8ebc937d3370450e9fb7845f6bbd8bdb55aad6dca4
|
3 |
+
size 359
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 0.67,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 207865,
|
6 |
-
"train_samples_per_second": 17.
|
7 |
"train_steps_per_second": 0.035
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 0.67,
|
3 |
+
"train_loss": 0.9717525305116878,
|
4 |
+
"train_runtime": 11635.2882,
|
5 |
"train_samples": 207865,
|
6 |
+
"train_samples_per_second": 17.865,
|
7 |
"train_steps_per_second": 0.035
|
8 |
}
|
trainer_state.json
CHANGED
@@ -23,43 +23,43 @@
|
|
23 |
{
|
24 |
"epoch": 0.02,
|
25 |
"learning_rate": 1.997007728639956e-05,
|
26 |
-
"loss": 1.
|
27 |
"step": 10
|
28 |
},
|
29 |
{
|
30 |
"epoch": 0.04,
|
31 |
"learning_rate": 1.9932715879473385e-05,
|
32 |
-
"loss": 1.
|
33 |
"step": 15
|
34 |
},
|
35 |
{
|
36 |
"epoch": 0.05,
|
37 |
"learning_rate": 1.9880488219356086e-05,
|
38 |
-
"loss": 1.
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
42 |
"epoch": 0.06,
|
43 |
"learning_rate": 1.981347247496222e-05,
|
44 |
-
"loss": 1.
|
45 |
"step": 25
|
46 |
},
|
47 |
{
|
48 |
"epoch": 0.07,
|
49 |
"learning_rate": 1.973176894846855e-05,
|
50 |
-
"loss": 0.
|
51 |
"step": 30
|
52 |
},
|
53 |
{
|
54 |
"epoch": 0.09,
|
55 |
"learning_rate": 1.963549992519223e-05,
|
56 |
-
"loss": 0.
|
57 |
"step": 35
|
58 |
},
|
59 |
{
|
60 |
"epoch": 0.1,
|
61 |
"learning_rate": 1.9524809490566878e-05,
|
62 |
-
"loss": 0.
|
63 |
"step": 40
|
64 |
},
|
65 |
{
|
@@ -77,7 +77,7 @@
|
|
77 |
{
|
78 |
"epoch": 0.14,
|
79 |
"learning_rate": 1.910797282022027e-05,
|
80 |
-
"loss": 0.
|
81 |
"step": 55
|
82 |
},
|
83 |
{
|
@@ -95,13 +95,13 @@
|
|
95 |
{
|
96 |
"epoch": 0.17,
|
97 |
"learning_rate": 1.8568571761675893e-05,
|
98 |
-
"loss": 0.
|
99 |
"step": 70
|
100 |
},
|
101 |
{
|
102 |
"epoch": 0.18,
|
103 |
"learning_rate": 1.8362743705288127e-05,
|
104 |
-
"loss": 0.
|
105 |
"step": 75
|
106 |
},
|
107 |
{
|
@@ -113,7 +113,7 @@
|
|
113 |
{
|
114 |
"epoch": 0.21,
|
115 |
"learning_rate": 1.791386494010081e-05,
|
116 |
-
"loss": 0.
|
117 |
"step": 85
|
118 |
},
|
119 |
{
|
@@ -179,13 +179,13 @@
|
|
179 |
{
|
180 |
"epoch": 0.34,
|
181 |
"learning_rate": 1.4684084406997903e-05,
|
182 |
-
"loss": 0.
|
183 |
"step": 140
|
184 |
},
|
185 |
{
|
186 |
"epoch": 0.36,
|
187 |
"learning_rate": 1.4338837391175582e-05,
|
188 |
-
"loss": 0.
|
189 |
"step": 145
|
190 |
},
|
191 |
{
|
@@ -215,7 +215,7 @@
|
|
215 |
{
|
216 |
"epoch": 0.42,
|
217 |
"learning_rate": 1.252585165803135e-05,
|
218 |
-
"loss": 0.
|
219 |
"step": 170
|
220 |
},
|
221 |
{
|
@@ -239,7 +239,7 @@
|
|
239 |
{
|
240 |
"epoch": 0.47,
|
241 |
"learning_rate": 1.1004233054136726e-05,
|
242 |
-
"loss": 0.
|
243 |
"step": 190
|
244 |
},
|
245 |
{
|
@@ -269,7 +269,7 @@
|
|
269 |
{
|
270 |
"epoch": 0.53,
|
271 |
"learning_rate": 9.072784204417995e-06,
|
272 |
-
"loss": 0.
|
273 |
"step": 215
|
274 |
},
|
275 |
{
|
@@ -340,19 +340,19 @@
|
|
340 |
},
|
341 |
{
|
342 |
"epoch": 0.67,
|
343 |
-
"eval_loss": 0.
|
344 |
-
"eval_runtime":
|
345 |
-
"eval_samples_per_second": 68.
|
346 |
-
"eval_steps_per_second": 0.
|
347 |
"step": 272
|
348 |
},
|
349 |
{
|
350 |
"epoch": 0.67,
|
351 |
"step": 272,
|
352 |
"total_flos": 455322233733120.0,
|
353 |
-
"train_loss": 0.
|
354 |
-
"train_runtime":
|
355 |
-
"train_samples_per_second": 17.
|
356 |
"train_steps_per_second": 0.035
|
357 |
}
|
358 |
],
|
|
|
23 |
{
|
24 |
"epoch": 0.02,
|
25 |
"learning_rate": 1.997007728639956e-05,
|
26 |
+
"loss": 1.1492,
|
27 |
"step": 10
|
28 |
},
|
29 |
{
|
30 |
"epoch": 0.04,
|
31 |
"learning_rate": 1.9932715879473385e-05,
|
32 |
+
"loss": 1.0712,
|
33 |
"step": 15
|
34 |
},
|
35 |
{
|
36 |
"epoch": 0.05,
|
37 |
"learning_rate": 1.9880488219356086e-05,
|
38 |
+
"loss": 1.0494,
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
42 |
"epoch": 0.06,
|
43 |
"learning_rate": 1.981347247496222e-05,
|
44 |
+
"loss": 1.0139,
|
45 |
"step": 25
|
46 |
},
|
47 |
{
|
48 |
"epoch": 0.07,
|
49 |
"learning_rate": 1.973176894846855e-05,
|
50 |
+
"loss": 0.9987,
|
51 |
"step": 30
|
52 |
},
|
53 |
{
|
54 |
"epoch": 0.09,
|
55 |
"learning_rate": 1.963549992519223e-05,
|
56 |
+
"loss": 0.9963,
|
57 |
"step": 35
|
58 |
},
|
59 |
{
|
60 |
"epoch": 0.1,
|
61 |
"learning_rate": 1.9524809490566878e-05,
|
62 |
+
"loss": 0.9806,
|
63 |
"step": 40
|
64 |
},
|
65 |
{
|
|
|
77 |
{
|
78 |
"epoch": 0.14,
|
79 |
"learning_rate": 1.910797282022027e-05,
|
80 |
+
"loss": 0.9726,
|
81 |
"step": 55
|
82 |
},
|
83 |
{
|
|
|
95 |
{
|
96 |
"epoch": 0.17,
|
97 |
"learning_rate": 1.8568571761675893e-05,
|
98 |
+
"loss": 0.9533,
|
99 |
"step": 70
|
100 |
},
|
101 |
{
|
102 |
"epoch": 0.18,
|
103 |
"learning_rate": 1.8362743705288127e-05,
|
104 |
+
"loss": 0.9607,
|
105 |
"step": 75
|
106 |
},
|
107 |
{
|
|
|
113 |
{
|
114 |
"epoch": 0.21,
|
115 |
"learning_rate": 1.791386494010081e-05,
|
116 |
+
"loss": 0.9643,
|
117 |
"step": 85
|
118 |
},
|
119 |
{
|
|
|
179 |
{
|
180 |
"epoch": 0.34,
|
181 |
"learning_rate": 1.4684084406997903e-05,
|
182 |
+
"loss": 0.9427,
|
183 |
"step": 140
|
184 |
},
|
185 |
{
|
186 |
"epoch": 0.36,
|
187 |
"learning_rate": 1.4338837391175582e-05,
|
188 |
+
"loss": 0.9485,
|
189 |
"step": 145
|
190 |
},
|
191 |
{
|
|
|
215 |
{
|
216 |
"epoch": 0.42,
|
217 |
"learning_rate": 1.252585165803135e-05,
|
218 |
+
"loss": 0.9463,
|
219 |
"step": 170
|
220 |
},
|
221 |
{
|
|
|
239 |
{
|
240 |
"epoch": 0.47,
|
241 |
"learning_rate": 1.1004233054136726e-05,
|
242 |
+
"loss": 0.9421,
|
243 |
"step": 190
|
244 |
},
|
245 |
{
|
|
|
269 |
{
|
270 |
"epoch": 0.53,
|
271 |
"learning_rate": 9.072784204417995e-06,
|
272 |
+
"loss": 0.9462,
|
273 |
"step": 215
|
274 |
},
|
275 |
{
|
|
|
340 |
},
|
341 |
{
|
342 |
"epoch": 0.67,
|
343 |
+
"eval_loss": 0.932327389717102,
|
344 |
+
"eval_runtime": 335.4717,
|
345 |
+
"eval_samples_per_second": 68.888,
|
346 |
+
"eval_steps_per_second": 0.54,
|
347 |
"step": 272
|
348 |
},
|
349 |
{
|
350 |
"epoch": 0.67,
|
351 |
"step": 272,
|
352 |
"total_flos": 455322233733120.0,
|
353 |
+
"train_loss": 0.9717525305116878,
|
354 |
+
"train_runtime": 11635.2882,
|
355 |
+
"train_samples_per_second": 17.865,
|
356 |
"train_steps_per_second": 0.035
|
357 |
}
|
358 |
],
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5179
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f71ca7f71743b3a6bdbe97c61df987f92fb4922a63571f5a765dc7d768f13928
|
3 |
size 5179
|