Training in progress, step 52000
Browse files- README.md +69 -69
- all_results.json +8 -8
- config.json +4 -4
- final/config.json +7 -1
- final/model.safetensors +2 -2
- final/tokenizer.json +0 -0
- final/tokenizer_config.json +5 -3
- final/training_args.bin +1 -1
- final/vocab.txt +0 -0
- model.safetensors +1 -1
- test_results.json +8 -8
- tokenizer_config.json +1 -0
- training_args.bin +1 -1
README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
---
|
2 |
library_name: transformers
|
3 |
license: apache-2.0
|
4 |
-
base_model:
|
5 |
tags:
|
6 |
- generated_from_trainer
|
7 |
metrics:
|
@@ -9,22 +9,22 @@ metrics:
|
|
9 |
- recall
|
10 |
- accuracy
|
11 |
model-index:
|
12 |
-
- name:
|
13 |
results: []
|
14 |
---
|
15 |
|
16 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
17 |
should probably proofread and complete it, then remove this comment. -->
|
18 |
|
19 |
-
#
|
20 |
|
21 |
-
This model is a fine-tuned version of [
|
22 |
It achieves the following results on the evaluation set:
|
23 |
-
- Loss: 1.
|
24 |
-
- Precision: 0.
|
25 |
-
- Recall: 0.
|
26 |
-
- F1 Macro: 0.
|
27 |
-
- Accuracy: 0.
|
28 |
|
29 |
## Model description
|
30 |
|
@@ -56,66 +56,66 @@ The following hyperparameters were used during training:
|
|
56 |
|
57 |
| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 Macro | Accuracy |
|
58 |
|:-------------:|:-------:|:-----:|:---------------:|:---------:|:------:|:--------:|:--------:|
|
59 |
-
| No log | 0 | 0 | 3.
|
60 |
-
| 0.
|
61 |
-
| 0.
|
62 |
-
|
|
63 |
-
| 0.
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
-
| 0.
|
72 |
-
| 0.
|
73 |
-
| 0.
|
74 |
-
| 0.
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
-
| 0.
|
79 |
-
| 0.
|
80 |
-
| 0.
|
81 |
-
| 0.
|
82 |
-
| 0.
|
83 |
-
| 0.
|
84 |
-
| 0.
|
85 |
-
| 0.
|
86 |
-
| 0.
|
87 |
-
| 0.
|
88 |
-
| 0.
|
89 |
-
| 0.
|
90 |
-
| 0.
|
91 |
-
| 0.
|
92 |
-
| 0.
|
93 |
-
| 0.
|
94 |
-
| 0.
|
95 |
-
| 0.
|
96 |
-
| 0.
|
97 |
-
| 0.
|
98 |
-
| 0.
|
99 |
-
| 0.
|
100 |
-
| 0.
|
101 |
-
| 0.
|
102 |
-
| 0.
|
103 |
-
| 0.
|
104 |
-
| 0.
|
105 |
-
| 0.
|
106 |
-
| 0.
|
107 |
-
| 0.
|
108 |
-
| 0.
|
109 |
-
| 0.
|
110 |
-
| 0.
|
111 |
-
| 0.
|
112 |
-
| 0.
|
113 |
-
| 0.
|
114 |
-
| 0.
|
115 |
-
| 0.
|
116 |
-
| 0.
|
117 |
-
| 0.
|
118 |
-
| 0.
|
119 |
|
120 |
|
121 |
### Framework versions
|
|
|
1 |
---
|
2 |
library_name: transformers
|
3 |
license: apache-2.0
|
4 |
+
base_model: NbAiLab/nb-sbert-base
|
5 |
tags:
|
6 |
- generated_from_trainer
|
7 |
metrics:
|
|
|
9 |
- recall
|
10 |
- accuracy
|
11 |
model-index:
|
12 |
+
- name: nb-sbert-base-edu-scorer-lr3e4-bs32
|
13 |
results: []
|
14 |
---
|
15 |
|
16 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
17 |
should probably proofread and complete it, then remove this comment. -->
|
18 |
|
19 |
+
# nb-sbert-base-edu-scorer-lr3e4-bs32
|
20 |
|
21 |
+
This model is a fine-tuned version of [NbAiLab/nb-sbert-base](https://huggingface.co/NbAiLab/nb-sbert-base) on an unknown dataset.
|
22 |
It achieves the following results on the evaluation set:
|
23 |
+
- Loss: 1.1904
|
24 |
+
- Precision: 0.5148
|
25 |
+
- Recall: 0.3533
|
26 |
+
- F1 Macro: 0.3432
|
27 |
+
- Accuracy: 0.3836
|
28 |
|
29 |
## Model description
|
30 |
|
|
|
56 |
|
57 |
| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 Macro | Accuracy |
|
58 |
|:-------------:|:-------:|:-----:|:---------------:|:---------:|:------:|:--------:|:--------:|
|
59 |
+
| No log | 0 | 0 | 3.3343 | 0.0587 | 0.1667 | 0.0869 | 0.3524 |
|
60 |
+
| 0.8018 | 0.3368 | 1000 | 0.7952 | 0.3984 | 0.3147 | 0.3076 | 0.4594 |
|
61 |
+
| 0.7695 | 0.6736 | 2000 | 0.7506 | 0.4340 | 0.3543 | 0.3560 | 0.476 |
|
62 |
+
| 0.7415 | 1.0104 | 3000 | 0.7203 | 0.4055 | 0.3453 | 0.3416 | 0.498 |
|
63 |
+
| 0.6979 | 1.3473 | 4000 | 0.7645 | 0.4222 | 0.3467 | 0.3465 | 0.4438 |
|
64 |
+
| 0.6998 | 1.6841 | 5000 | 0.7053 | 0.4535 | 0.3496 | 0.3519 | 0.5026 |
|
65 |
+
| 0.689 | 2.0209 | 6000 | 0.6925 | 0.4113 | 0.3400 | 0.3327 | 0.5058 |
|
66 |
+
| 0.7226 | 2.3577 | 7000 | 0.6814 | 0.4192 | 0.3460 | 0.3436 | 0.4958 |
|
67 |
+
| 0.6831 | 2.6945 | 8000 | 0.6765 | 0.4067 | 0.3557 | 0.3581 | 0.5118 |
|
68 |
+
| 0.6639 | 3.0313 | 9000 | 0.6643 | 0.4095 | 0.3598 | 0.3623 | 0.5194 |
|
69 |
+
| 0.6487 | 3.3681 | 10000 | 0.6764 | 0.4103 | 0.3571 | 0.3551 | 0.533 |
|
70 |
+
| 0.664 | 3.7050 | 11000 | 0.6706 | 0.4008 | 0.3480 | 0.3457 | 0.5192 |
|
71 |
+
| 0.6485 | 4.0418 | 12000 | 0.6504 | 0.4230 | 0.3717 | 0.3749 | 0.5322 |
|
72 |
+
| 0.637 | 4.3786 | 13000 | 0.6925 | 0.4096 | 0.3597 | 0.3574 | 0.4668 |
|
73 |
+
| 0.6529 | 4.7154 | 14000 | 0.6493 | 0.4046 | 0.3631 | 0.3648 | 0.5252 |
|
74 |
+
| 0.5932 | 5.0522 | 15000 | 0.6512 | 0.4197 | 0.3631 | 0.3656 | 0.5248 |
|
75 |
+
| 0.618 | 5.3890 | 16000 | 0.6633 | 0.4129 | 0.3590 | 0.3623 | 0.5276 |
|
76 |
+
| 0.5966 | 5.7258 | 17000 | 0.7115 | 0.4439 | 0.3446 | 0.3415 | 0.5284 |
|
77 |
+
| 0.6094 | 6.0626 | 18000 | 0.6558 | 0.4207 | 0.3596 | 0.3623 | 0.5252 |
|
78 |
+
| 0.62 | 6.3995 | 19000 | 0.6527 | 0.4106 | 0.3707 | 0.3743 | 0.5238 |
|
79 |
+
| 0.5935 | 6.7363 | 20000 | 0.6473 | 0.4168 | 0.3721 | 0.3755 | 0.52 |
|
80 |
+
| 0.5832 | 7.0731 | 21000 | 0.6533 | 0.4339 | 0.3619 | 0.3637 | 0.5266 |
|
81 |
+
| 0.5864 | 7.4099 | 22000 | 0.6543 | 0.4106 | 0.3777 | 0.3814 | 0.5246 |
|
82 |
+
| 0.5738 | 7.7467 | 23000 | 0.6503 | 0.4128 | 0.3726 | 0.3756 | 0.5236 |
|
83 |
+
| 0.5349 | 8.0835 | 24000 | 0.6563 | 0.4230 | 0.3675 | 0.3711 | 0.5298 |
|
84 |
+
| 0.5521 | 8.4203 | 25000 | 0.6439 | 0.4092 | 0.3725 | 0.3763 | 0.5288 |
|
85 |
+
| 0.588 | 8.7572 | 26000 | 0.6580 | 0.4079 | 0.3666 | 0.3704 | 0.5106 |
|
86 |
+
| 0.572 | 9.0940 | 27000 | 0.6561 | 0.4106 | 0.3684 | 0.3711 | 0.5172 |
|
87 |
+
| 0.5436 | 9.4308 | 28000 | 0.6514 | 0.4107 | 0.3688 | 0.3724 | 0.521 |
|
88 |
+
| 0.5268 | 9.7676 | 29000 | 0.6622 | 0.4167 | 0.3593 | 0.3619 | 0.5278 |
|
89 |
+
| 0.5289 | 10.1044 | 30000 | 0.6567 | 0.4068 | 0.3698 | 0.3724 | 0.5254 |
|
90 |
+
| 0.5285 | 10.4412 | 31000 | 0.6635 | 0.4086 | 0.3574 | 0.3605 | 0.527 |
|
91 |
+
| 0.5051 | 10.7780 | 32000 | 0.6602 | 0.4114 | 0.3678 | 0.3710 | 0.533 |
|
92 |
+
| 0.5249 | 11.1149 | 33000 | 0.6599 | 0.4062 | 0.3784 | 0.3809 | 0.5192 |
|
93 |
+
| 0.5173 | 11.4517 | 34000 | 0.6622 | 0.4037 | 0.3569 | 0.3576 | 0.526 |
|
94 |
+
| 0.5262 | 11.7885 | 35000 | 0.6599 | 0.3977 | 0.3708 | 0.3731 | 0.5124 |
|
95 |
+
| 0.4839 | 12.1253 | 36000 | 0.6586 | 0.4352 | 0.3841 | 0.3911 | 0.5206 |
|
96 |
+
| 0.4992 | 12.4621 | 37000 | 0.6631 | 0.3995 | 0.3587 | 0.3627 | 0.5152 |
|
97 |
+
| 0.5014 | 12.7989 | 38000 | 0.6615 | 0.4014 | 0.3663 | 0.3695 | 0.519 |
|
98 |
+
| 0.4727 | 13.1357 | 39000 | 0.6637 | 0.4010 | 0.3611 | 0.3642 | 0.5242 |
|
99 |
+
| 0.5016 | 13.4725 | 40000 | 0.6639 | 0.3909 | 0.3670 | 0.3682 | 0.5056 |
|
100 |
+
| 0.4763 | 13.8094 | 41000 | 0.6665 | 0.4065 | 0.3646 | 0.3683 | 0.524 |
|
101 |
+
| 0.4578 | 14.1462 | 42000 | 0.6630 | 0.4018 | 0.3657 | 0.3689 | 0.5218 |
|
102 |
+
| 0.4516 | 14.4830 | 43000 | 0.6702 | 0.3908 | 0.3697 | 0.3701 | 0.5048 |
|
103 |
+
| 0.461 | 14.8198 | 44000 | 0.6565 | 0.4008 | 0.3634 | 0.3663 | 0.5122 |
|
104 |
+
| 0.4628 | 15.1566 | 45000 | 0.6657 | 0.3916 | 0.3665 | 0.3685 | 0.5132 |
|
105 |
+
| 0.4557 | 15.4934 | 46000 | 0.6687 | 0.3972 | 0.3638 | 0.3664 | 0.5246 |
|
106 |
+
| 0.449 | 15.8302 | 47000 | 0.6635 | 0.4347 | 0.3727 | 0.3812 | 0.5194 |
|
107 |
+
| 0.4555 | 16.1671 | 48000 | 0.6626 | 0.4153 | 0.3749 | 0.3815 | 0.5152 |
|
108 |
+
| 0.4554 | 16.5039 | 49000 | 0.6681 | 0.3978 | 0.3705 | 0.3733 | 0.5208 |
|
109 |
+
| 0.4093 | 16.8407 | 50000 | 0.6661 | 0.4220 | 0.3759 | 0.3823 | 0.52 |
|
110 |
+
| 0.435 | 17.1775 | 51000 | 0.6653 | 0.4268 | 0.3801 | 0.3868 | 0.516 |
|
111 |
+
| 0.4368 | 17.5143 | 52000 | 0.6686 | 0.3961 | 0.3613 | 0.3632 | 0.5238 |
|
112 |
+
| 0.4308 | 17.8511 | 53000 | 0.6654 | 0.4191 | 0.3757 | 0.3827 | 0.5152 |
|
113 |
+
| 0.411 | 18.1879 | 54000 | 0.6671 | 0.3953 | 0.3643 | 0.3674 | 0.517 |
|
114 |
+
| 0.4365 | 18.5248 | 55000 | 0.6675 | 0.4193 | 0.3758 | 0.3828 | 0.5122 |
|
115 |
+
| 0.4168 | 18.8616 | 56000 | 0.6649 | 0.3969 | 0.3668 | 0.3686 | 0.5228 |
|
116 |
+
| 0.4313 | 19.1984 | 57000 | 0.6647 | 0.3942 | 0.3649 | 0.3675 | 0.517 |
|
117 |
+
| 0.4175 | 19.5352 | 58000 | 0.6650 | 0.3936 | 0.3646 | 0.3667 | 0.5188 |
|
118 |
+
| 0.4214 | 19.8720 | 59000 | 0.6649 | 0.3971 | 0.3681 | 0.3707 | 0.5176 |
|
119 |
|
120 |
|
121 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
{
|
2 |
"epoch": 20.0,
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1_macro": 0.
|
5 |
-
"eval_loss": 1.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime":
|
9 |
-
"eval_samples_per_second":
|
10 |
-
"eval_steps_per_second":
|
11 |
}
|
|
|
1 |
{
|
2 |
"epoch": 20.0,
|
3 |
+
"eval_accuracy": 0.3836363636363636,
|
4 |
+
"eval_f1_macro": 0.3431871935359531,
|
5 |
+
"eval_loss": 1.190366506576538,
|
6 |
+
"eval_precision": 0.5148182228032372,
|
7 |
+
"eval_recall": 0.35333333333333333,
|
8 |
+
"eval_runtime": 6.0691,
|
9 |
+
"eval_samples_per_second": 90.623,
|
10 |
+
"eval_steps_per_second": 2.966
|
11 |
}
|
config.json
CHANGED
@@ -8,20 +8,20 @@
|
|
8 |
"eos_token_id": 2,
|
9 |
"hidden_act": "gelu",
|
10 |
"hidden_dropout_prob": 0.0,
|
11 |
-
"hidden_size":
|
12 |
"id2label": {
|
13 |
"0": "LABEL_0"
|
14 |
},
|
15 |
"initializer_range": 0.02,
|
16 |
-
"intermediate_size":
|
17 |
"label2id": {
|
18 |
"LABEL_0": 0
|
19 |
},
|
20 |
"layer_norm_eps": 1e-05,
|
21 |
"max_position_embeddings": 514,
|
22 |
"model_type": "xlm-roberta",
|
23 |
-
"num_attention_heads":
|
24 |
-
"num_hidden_layers":
|
25 |
"output_past": true,
|
26 |
"pad_token_id": 1,
|
27 |
"position_embedding_type": "absolute",
|
|
|
8 |
"eos_token_id": 2,
|
9 |
"hidden_act": "gelu",
|
10 |
"hidden_dropout_prob": 0.0,
|
11 |
+
"hidden_size": 1024,
|
12 |
"id2label": {
|
13 |
"0": "LABEL_0"
|
14 |
},
|
15 |
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 4096,
|
17 |
"label2id": {
|
18 |
"LABEL_0": 0
|
19 |
},
|
20 |
"layer_norm_eps": 1e-05,
|
21 |
"max_position_embeddings": 514,
|
22 |
"model_type": "xlm-roberta",
|
23 |
+
"num_attention_heads": 16,
|
24 |
+
"num_hidden_layers": 24,
|
25 |
"output_past": true,
|
26 |
"pad_token_id": 1,
|
27 |
"position_embedding_type": "absolute",
|
final/config.json
CHANGED
@@ -4,6 +4,7 @@
|
|
4 |
],
|
5 |
"attention_probs_dropout_prob": 0.1,
|
6 |
"classifier_dropout": 0.0,
|
|
|
7 |
"gradient_checkpointing": false,
|
8 |
"hidden_act": "gelu",
|
9 |
"hidden_dropout_prob": 0.0,
|
@@ -22,11 +23,16 @@
|
|
22 |
"num_attention_heads": 12,
|
23 |
"num_hidden_layers": 12,
|
24 |
"pad_token_id": 0,
|
|
|
|
|
|
|
|
|
|
|
25 |
"position_embedding_type": "absolute",
|
26 |
"problem_type": "regression",
|
27 |
"torch_dtype": "float32",
|
28 |
"transformers_version": "4.53.2",
|
29 |
"type_vocab_size": 2,
|
30 |
"use_cache": true,
|
31 |
-
"vocab_size":
|
32 |
}
|
|
|
4 |
],
|
5 |
"attention_probs_dropout_prob": 0.1,
|
6 |
"classifier_dropout": 0.0,
|
7 |
+
"directionality": "bidi",
|
8 |
"gradient_checkpointing": false,
|
9 |
"hidden_act": "gelu",
|
10 |
"hidden_dropout_prob": 0.0,
|
|
|
23 |
"num_attention_heads": 12,
|
24 |
"num_hidden_layers": 12,
|
25 |
"pad_token_id": 0,
|
26 |
+
"pooler_fc_size": 768,
|
27 |
+
"pooler_num_attention_heads": 12,
|
28 |
+
"pooler_num_fc_layers": 3,
|
29 |
+
"pooler_size_per_head": 128,
|
30 |
+
"pooler_type": "first_token_transform",
|
31 |
"position_embedding_type": "absolute",
|
32 |
"problem_type": "regression",
|
33 |
"torch_dtype": "float32",
|
34 |
"transformers_version": "4.53.2",
|
35 |
"type_vocab_size": 2,
|
36 |
"use_cache": true,
|
37 |
+
"vocab_size": 119547
|
38 |
}
|
final/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81687d82e40b363cbd2ddb9d840535b97619e680e82d969c68f2dd687fe6a7f2
|
3 |
+
size 711440380
|
final/tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
final/tokenizer_config.json
CHANGED
@@ -41,13 +41,15 @@
|
|
41 |
"special": true
|
42 |
}
|
43 |
},
|
44 |
-
"clean_up_tokenization_spaces":
|
45 |
"cls_token": "[CLS]",
|
46 |
-
"
|
|
|
47 |
"extra_special_tokens": {},
|
48 |
"mask_token": "[MASK]",
|
49 |
-
"max_length":
|
50 |
"model_max_length": 512,
|
|
|
51 |
"pad_to_multiple_of": null,
|
52 |
"pad_token": "[PAD]",
|
53 |
"pad_token_type_id": 0,
|
|
|
41 |
"special": true
|
42 |
}
|
43 |
},
|
44 |
+
"clean_up_tokenization_spaces": false,
|
45 |
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": false,
|
48 |
"extra_special_tokens": {},
|
49 |
"mask_token": "[MASK]",
|
50 |
+
"max_length": 75,
|
51 |
"model_max_length": 512,
|
52 |
+
"never_split": null,
|
53 |
"pad_to_multiple_of": null,
|
54 |
"pad_token": "[PAD]",
|
55 |
"pad_token_type_id": 0,
|
final/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5777
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e6daa316635faea8326b3bb0f7fc1c04a4847156260528b67cbdd03d2f53470
|
3 |
size 5777
|
final/vocab.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2239614572
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2db9a07d44c5548200d8d90e7a44d3fda0dbcf85bfd0b84fe817cbbd73382cd9
|
3 |
size 2239614572
|
test_results.json
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
{
|
2 |
"epoch": 20.0,
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1_macro": 0.
|
5 |
-
"eval_loss": 1.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime":
|
9 |
-
"eval_samples_per_second":
|
10 |
-
"eval_steps_per_second":
|
11 |
}
|
|
|
1 |
{
|
2 |
"epoch": 20.0,
|
3 |
+
"eval_accuracy": 0.3836363636363636,
|
4 |
+
"eval_f1_macro": 0.3431871935359531,
|
5 |
+
"eval_loss": 1.190366506576538,
|
6 |
+
"eval_precision": 0.5148182228032372,
|
7 |
+
"eval_recall": 0.35333333333333333,
|
8 |
+
"eval_runtime": 6.0691,
|
9 |
+
"eval_samples_per_second": 90.623,
|
10 |
+
"eval_steps_per_second": 2.966
|
11 |
}
|
tokenizer_config.json
CHANGED
@@ -41,6 +41,7 @@
|
|
41 |
"special": true
|
42 |
}
|
43 |
},
|
|
|
44 |
"bos_token": "<s>",
|
45 |
"clean_up_tokenization_spaces": true,
|
46 |
"cls_token": "<s>",
|
|
|
41 |
"special": true
|
42 |
}
|
43 |
},
|
44 |
+
"additional_special_tokens": [],
|
45 |
"bos_token": "<s>",
|
46 |
"clean_up_tokenization_spaces": true,
|
47 |
"cls_token": "<s>",
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5777
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f19acb01d19c2e15a4bde0136feb6a2ffb10b94d1c1f1fd7673b6169895cf74
|
3 |
size 5777
|