End of training
Browse files- README.md +6 -0
- all_results.json +23 -0
- eval_results.json +10 -0
- generated_predictions.txt +0 -0
- generation_config.json +0 -1
- predict_results.json +9 -0
- runs/May25_13-37-31_0d573eeffc83/events.out.tfevents.1716737990.0d573eeffc83.1345596.1 +3 -0
- train_results.json +9 -0
- trainer_state.json +0 -0
README.md
CHANGED
@@ -3,6 +3,8 @@ license: apache-2.0
|
|
3 |
base_model: google-t5/t5-small
|
4 |
tags:
|
5 |
- generated_from_trainer
|
|
|
|
|
6 |
model-index:
|
7 |
- name: t5-big-scratch-iwslt3008
|
8 |
results: []
|
@@ -14,6 +16,10 @@ should probably proofread and complete it, then remove this comment. -->
|
|
14 |
# t5-big-scratch-iwslt3008
|
15 |
|
16 |
This model is a fine-tuned version of [google-t5/t5-small](https://huggingface.co/google-t5/t5-small) on an unknown dataset.
|
|
|
|
|
|
|
|
|
17 |
|
18 |
## Model description
|
19 |
|
|
|
3 |
base_model: google-t5/t5-small
|
4 |
tags:
|
5 |
- generated_from_trainer
|
6 |
+
metrics:
|
7 |
+
- bleu
|
8 |
model-index:
|
9 |
- name: t5-big-scratch-iwslt3008
|
10 |
results: []
|
|
|
16 |
# t5-big-scratch-iwslt3008
|
17 |
|
18 |
This model is a fine-tuned version of [google-t5/t5-small](https://huggingface.co/google-t5/t5-small) on an unknown dataset.
|
19 |
+
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 2.3140
|
21 |
+
- Bleu: 0.2623
|
22 |
+
- Gen Len: 26.3604
|
23 |
|
24 |
## Model description
|
25 |
|
all_results.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 50.0,
|
3 |
+
"eval_bleu": 0.2623,
|
4 |
+
"eval_gen_len": 26.3604,
|
5 |
+
"eval_loss": 2.3139915466308594,
|
6 |
+
"eval_runtime": 10.8629,
|
7 |
+
"eval_samples": 888,
|
8 |
+
"eval_samples_per_second": 81.746,
|
9 |
+
"eval_steps_per_second": 1.289,
|
10 |
+
"predict_bleu": 0.2694,
|
11 |
+
"predict_gen_len": 21.9933,
|
12 |
+
"predict_loss": 2.3295047283172607,
|
13 |
+
"predict_runtime": 86.6486,
|
14 |
+
"predict_samples": 8079,
|
15 |
+
"predict_samples_per_second": 93.239,
|
16 |
+
"predict_steps_per_second": 1.466,
|
17 |
+
"total_flos": 9.94521893679661e+17,
|
18 |
+
"train_loss": 1.8893472661618176,
|
19 |
+
"train_runtime": 93675.3384,
|
20 |
+
"train_samples": 206112,
|
21 |
+
"train_samples_per_second": 110.014,
|
22 |
+
"train_steps_per_second": 3.438
|
23 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 50.0,
|
3 |
+
"eval_bleu": 0.2623,
|
4 |
+
"eval_gen_len": 26.3604,
|
5 |
+
"eval_loss": 2.3139915466308594,
|
6 |
+
"eval_runtime": 10.8629,
|
7 |
+
"eval_samples": 888,
|
8 |
+
"eval_samples_per_second": 81.746,
|
9 |
+
"eval_steps_per_second": 1.289
|
10 |
+
}
|
generated_predictions.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
generation_config.json
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"_from_model_config": true,
|
3 |
"decoder_start_token_id": 0,
|
4 |
"eos_token_id": 1,
|
5 |
"pad_token_id": 0,
|
|
|
1 |
{
|
|
|
2 |
"decoder_start_token_id": 0,
|
3 |
"eos_token_id": 1,
|
4 |
"pad_token_id": 0,
|
predict_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"predict_bleu": 0.2694,
|
3 |
+
"predict_gen_len": 21.9933,
|
4 |
+
"predict_loss": 2.3295047283172607,
|
5 |
+
"predict_runtime": 86.6486,
|
6 |
+
"predict_samples": 8079,
|
7 |
+
"predict_samples_per_second": 93.239,
|
8 |
+
"predict_steps_per_second": 1.466
|
9 |
+
}
|
runs/May25_13-37-31_0d573eeffc83/events.out.tfevents.1716737990.0d573eeffc83.1345596.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68b217d1268ed9258889dc5b343ea4612b89400c4d4a0f9719827944f1bd7c6c
|
3 |
+
size 465
|
train_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 50.0,
|
3 |
+
"total_flos": 9.94521893679661e+17,
|
4 |
+
"train_loss": 1.8893472661618176,
|
5 |
+
"train_runtime": 93675.3384,
|
6 |
+
"train_samples": 206112,
|
7 |
+
"train_samples_per_second": 110.014,
|
8 |
+
"train_steps_per_second": 3.438
|
9 |
+
}
|
trainer_state.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|