Model save
Browse files- README.md +218 -0
- logs/events.out.tfevents.1745635477.deac701d2f5a.1083.2 +2 -2
- merges.txt +0 -0
- model.safetensors +1 -1
- special_tokens_map.json +15 -0
- tokenizer.json +0 -0
- tokenizer_config.json +58 -0
- vocab.json +0 -0
README.md
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: transformers
|
3 |
+
license: mit
|
4 |
+
base_model: roberta-base
|
5 |
+
tags:
|
6 |
+
- generated_from_trainer
|
7 |
+
metrics:
|
8 |
+
- accuracy
|
9 |
+
- f1
|
10 |
+
- precision
|
11 |
+
- recall
|
12 |
+
model-index:
|
13 |
+
- name: roberta-finetuned-wines-compound-query
|
14 |
+
results: []
|
15 |
+
---
|
16 |
+
|
17 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
18 |
+
should probably proofread and complete it, then remove this comment. -->
|
19 |
+
|
20 |
+
# roberta-finetuned-wines-compound-query
|
21 |
+
|
22 |
+
This model is a fine-tuned version of [roberta-base](https://huggingface.co/roberta-base) on an unknown dataset.
|
23 |
+
It achieves the following results on the evaluation set:
|
24 |
+
- Loss: 5.2160
|
25 |
+
- Accuracy: 0.1305
|
26 |
+
- F1: 0.0856
|
27 |
+
- Precision: 0.4812
|
28 |
+
- Recall: 0.2648
|
29 |
+
|
30 |
+
## Model description
|
31 |
+
|
32 |
+
More information needed
|
33 |
+
|
34 |
+
## Intended uses & limitations
|
35 |
+
|
36 |
+
More information needed
|
37 |
+
|
38 |
+
## Training and evaluation data
|
39 |
+
|
40 |
+
More information needed
|
41 |
+
|
42 |
+
## Training procedure
|
43 |
+
|
44 |
+
### Training hyperparameters
|
45 |
+
|
46 |
+
The following hyperparameters were used during training:
|
47 |
+
- learning_rate: 1e-05
|
48 |
+
- train_batch_size: 32
|
49 |
+
- eval_batch_size: 32
|
50 |
+
- seed: 42
|
51 |
+
- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
52 |
+
- lr_scheduler_type: linear
|
53 |
+
- lr_scheduler_warmup_steps: 5
|
54 |
+
- num_epochs: 150
|
55 |
+
- mixed_precision_training: Native AMP
|
56 |
+
|
57 |
+
### Training results
|
58 |
+
|
59 |
+
| Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
|
60 |
+
|:-------------:|:-----:|:-----:|:---------------:|:--------:|:------:|:---------:|:------:|
|
61 |
+
| 7.8611 | 1.0 | 405 | 7.8547 | 0.0006 | 0.0000 | 0.9994 | 0.0006 |
|
62 |
+
| 7.7809 | 2.0 | 810 | 7.8142 | 0.0019 | 0.0000 | 0.9907 | 0.0053 |
|
63 |
+
| 7.6676 | 3.0 | 1215 | 7.6987 | 0.0068 | 0.0004 | 0.9762 | 0.0097 |
|
64 |
+
| 7.5284 | 4.0 | 1620 | 7.5786 | 0.0142 | 0.0010 | 0.9640 | 0.0176 |
|
65 |
+
| 7.383 | 5.0 | 2025 | 7.4576 | 0.0192 | 0.0034 | 0.9524 | 0.0204 |
|
66 |
+
| 7.2399 | 6.0 | 2430 | 7.3469 | 0.0207 | 0.0039 | 0.9518 | 0.0216 |
|
67 |
+
| 7.0945 | 7.0 | 2835 | 7.2337 | 0.0238 | 0.0052 | 0.9483 | 0.0256 |
|
68 |
+
| 6.9556 | 8.0 | 3240 | 7.1384 | 0.0260 | 0.0051 | 0.9422 | 0.0284 |
|
69 |
+
| 6.8258 | 9.0 | 3645 | 7.0419 | 0.0257 | 0.0054 | 0.9441 | 0.0273 |
|
70 |
+
| 6.6885 | 10.0 | 4050 | 6.9390 | 0.0297 | 0.0067 | 0.9390 | 0.0305 |
|
71 |
+
| 6.5603 | 11.0 | 4455 | 6.8469 | 0.0309 | 0.0079 | 0.9322 | 0.0330 |
|
72 |
+
| 6.4389 | 12.0 | 4860 | 6.7517 | 0.0353 | 0.0108 | 0.9265 | 0.0377 |
|
73 |
+
| 6.3148 | 13.0 | 5265 | 6.6732 | 0.0371 | 0.0119 | 0.9266 | 0.0420 |
|
74 |
+
| 6.2018 | 14.0 | 5670 | 6.5963 | 0.0405 | 0.0118 | 0.9164 | 0.0450 |
|
75 |
+
| 6.0827 | 15.0 | 6075 | 6.5235 | 0.0421 | 0.0125 | 0.9165 | 0.0468 |
|
76 |
+
| 5.9737 | 16.0 | 6480 | 6.4529 | 0.0448 | 0.0140 | 0.9109 | 0.0520 |
|
77 |
+
| 5.8685 | 17.0 | 6885 | 6.3915 | 0.0476 | 0.0150 | 0.9085 | 0.0551 |
|
78 |
+
| 5.7643 | 18.0 | 7290 | 6.3365 | 0.0507 | 0.0180 | 0.9096 | 0.0562 |
|
79 |
+
| 5.6668 | 19.0 | 7695 | 6.2701 | 0.0507 | 0.0178 | 0.8981 | 0.0606 |
|
80 |
+
| 5.5708 | 20.0 | 8100 | 6.2047 | 0.0523 | 0.0174 | 0.8884 | 0.0631 |
|
81 |
+
| 5.4755 | 21.0 | 8505 | 6.1550 | 0.0572 | 0.0207 | 0.8848 | 0.0715 |
|
82 |
+
| 5.3795 | 22.0 | 8910 | 6.1095 | 0.0557 | 0.0213 | 0.8792 | 0.0700 |
|
83 |
+
| 5.2917 | 23.0 | 9315 | 6.0616 | 0.0563 | 0.0193 | 0.8711 | 0.0722 |
|
84 |
+
| 5.1977 | 24.0 | 9720 | 6.0155 | 0.0569 | 0.0220 | 0.8685 | 0.0752 |
|
85 |
+
| 5.1159 | 25.0 | 10125 | 5.9680 | 0.0591 | 0.0218 | 0.8574 | 0.0812 |
|
86 |
+
| 5.0353 | 26.0 | 10530 | 5.9264 | 0.0616 | 0.0238 | 0.8538 | 0.0845 |
|
87 |
+
| 4.951 | 27.0 | 10935 | 5.8896 | 0.0588 | 0.0229 | 0.8575 | 0.0812 |
|
88 |
+
| 4.8736 | 28.0 | 11340 | 5.8609 | 0.0622 | 0.0245 | 0.8466 | 0.0890 |
|
89 |
+
| 4.7959 | 29.0 | 11745 | 5.8207 | 0.0646 | 0.0266 | 0.8387 | 0.0924 |
|
90 |
+
| 4.718 | 30.0 | 12150 | 5.7844 | 0.0653 | 0.0275 | 0.8308 | 0.0954 |
|
91 |
+
| 4.6401 | 31.0 | 12555 | 5.7453 | 0.0665 | 0.0277 | 0.8197 | 0.1019 |
|
92 |
+
| 4.5685 | 32.0 | 12960 | 5.7115 | 0.0680 | 0.0293 | 0.8191 | 0.1024 |
|
93 |
+
| 4.491 | 33.0 | 13365 | 5.6952 | 0.0699 | 0.0296 | 0.8192 | 0.1068 |
|
94 |
+
| 4.4225 | 34.0 | 13770 | 5.6656 | 0.0702 | 0.0306 | 0.8085 | 0.1105 |
|
95 |
+
| 4.3541 | 35.0 | 14175 | 5.6369 | 0.0718 | 0.0325 | 0.8037 | 0.1123 |
|
96 |
+
| 4.2804 | 36.0 | 14580 | 5.6204 | 0.0708 | 0.0311 | 0.7993 | 0.1112 |
|
97 |
+
| 4.2152 | 37.0 | 14985 | 5.5830 | 0.0742 | 0.0342 | 0.7867 | 0.1187 |
|
98 |
+
| 4.1469 | 38.0 | 15390 | 5.5612 | 0.0770 | 0.0380 | 0.7821 | 0.1240 |
|
99 |
+
| 4.0787 | 39.0 | 15795 | 5.5413 | 0.0783 | 0.0402 | 0.7756 | 0.1272 |
|
100 |
+
| 4.0136 | 40.0 | 16200 | 5.5286 | 0.0807 | 0.0414 | 0.7678 | 0.1335 |
|
101 |
+
| 3.95 | 41.0 | 16605 | 5.4969 | 0.0832 | 0.0415 | 0.7628 | 0.1345 |
|
102 |
+
| 3.8879 | 42.0 | 17010 | 5.4915 | 0.0817 | 0.0429 | 0.7596 | 0.1346 |
|
103 |
+
| 3.8198 | 43.0 | 17415 | 5.4736 | 0.0844 | 0.0446 | 0.7582 | 0.1392 |
|
104 |
+
| 3.7596 | 44.0 | 17820 | 5.4517 | 0.0844 | 0.0441 | 0.7378 | 0.1445 |
|
105 |
+
| 3.6937 | 45.0 | 18225 | 5.4321 | 0.0885 | 0.0471 | 0.7370 | 0.1485 |
|
106 |
+
| 3.6347 | 46.0 | 18630 | 5.4178 | 0.0885 | 0.0466 | 0.7330 | 0.1483 |
|
107 |
+
| 3.5765 | 47.0 | 19035 | 5.4049 | 0.0885 | 0.0451 | 0.7231 | 0.1497 |
|
108 |
+
| 3.5117 | 48.0 | 19440 | 5.3857 | 0.0888 | 0.0474 | 0.7198 | 0.1518 |
|
109 |
+
| 3.4533 | 49.0 | 19845 | 5.3703 | 0.0956 | 0.0521 | 0.7213 | 0.1615 |
|
110 |
+
| 3.4016 | 50.0 | 20250 | 5.3694 | 0.0919 | 0.0487 | 0.7123 | 0.1565 |
|
111 |
+
| 3.3415 | 51.0 | 20655 | 5.3566 | 0.0946 | 0.0506 | 0.7069 | 0.1625 |
|
112 |
+
| 3.2799 | 52.0 | 21060 | 5.3463 | 0.0984 | 0.0548 | 0.7022 | 0.1671 |
|
113 |
+
| 3.2316 | 53.0 | 21465 | 5.3303 | 0.0965 | 0.0509 | 0.6954 | 0.1666 |
|
114 |
+
| 3.1678 | 54.0 | 21870 | 5.3204 | 0.0974 | 0.0543 | 0.6862 | 0.1704 |
|
115 |
+
| 3.1159 | 55.0 | 22275 | 5.3036 | 0.0987 | 0.0549 | 0.6873 | 0.1750 |
|
116 |
+
| 3.0581 | 56.0 | 22680 | 5.3015 | 0.1008 | 0.0563 | 0.6721 | 0.1792 |
|
117 |
+
| 3.0063 | 57.0 | 23085 | 5.2938 | 0.1021 | 0.0570 | 0.6702 | 0.1805 |
|
118 |
+
| 2.951 | 58.0 | 23490 | 5.2866 | 0.0999 | 0.0559 | 0.6690 | 0.1768 |
|
119 |
+
| 2.9004 | 59.0 | 23895 | 5.2836 | 0.1018 | 0.0583 | 0.6554 | 0.1808 |
|
120 |
+
| 2.8496 | 60.0 | 24300 | 5.2757 | 0.1018 | 0.0576 | 0.6559 | 0.1833 |
|
121 |
+
| 2.7963 | 61.0 | 24705 | 5.2592 | 0.1042 | 0.0598 | 0.6501 | 0.1877 |
|
122 |
+
| 2.7511 | 62.0 | 25110 | 5.2581 | 0.1070 | 0.0628 | 0.6485 | 0.1916 |
|
123 |
+
| 2.6988 | 63.0 | 25515 | 5.2654 | 0.1045 | 0.0619 | 0.6410 | 0.1917 |
|
124 |
+
| 2.6496 | 64.0 | 25920 | 5.2523 | 0.1045 | 0.0622 | 0.6397 | 0.1921 |
|
125 |
+
| 2.6022 | 65.0 | 26325 | 5.2447 | 0.1058 | 0.0634 | 0.6298 | 0.1927 |
|
126 |
+
| 2.553 | 66.0 | 26730 | 5.2311 | 0.1036 | 0.0627 | 0.6181 | 0.1954 |
|
127 |
+
| 2.5057 | 67.0 | 27135 | 5.2333 | 0.1052 | 0.0628 | 0.6125 | 0.1983 |
|
128 |
+
| 2.4594 | 68.0 | 27540 | 5.2219 | 0.1083 | 0.0649 | 0.6050 | 0.2059 |
|
129 |
+
| 2.416 | 69.0 | 27945 | 5.2276 | 0.1092 | 0.0659 | 0.6112 | 0.2027 |
|
130 |
+
| 2.3675 | 70.0 | 28350 | 5.2181 | 0.1120 | 0.0665 | 0.6081 | 0.2065 |
|
131 |
+
| 2.3272 | 71.0 | 28755 | 5.2168 | 0.1107 | 0.0686 | 0.6050 | 0.2012 |
|
132 |
+
| 2.282 | 72.0 | 29160 | 5.2114 | 0.1098 | 0.0664 | 0.6007 | 0.2071 |
|
133 |
+
| 2.2436 | 73.0 | 29565 | 5.2123 | 0.1117 | 0.0692 | 0.6051 | 0.2068 |
|
134 |
+
| 2.1973 | 74.0 | 29970 | 5.2079 | 0.1129 | 0.0694 | 0.5956 | 0.2096 |
|
135 |
+
| 2.1585 | 75.0 | 30375 | 5.2037 | 0.1141 | 0.0705 | 0.5928 | 0.2162 |
|
136 |
+
| 2.1155 | 76.0 | 30780 | 5.2008 | 0.1151 | 0.0716 | 0.5890 | 0.2171 |
|
137 |
+
| 2.0752 | 77.0 | 31185 | 5.1935 | 0.1166 | 0.0715 | 0.5879 | 0.2177 |
|
138 |
+
| 2.0406 | 78.0 | 31590 | 5.1995 | 0.1144 | 0.0717 | 0.5880 | 0.2197 |
|
139 |
+
| 1.9971 | 79.0 | 31995 | 5.2002 | 0.1151 | 0.0725 | 0.5797 | 0.2205 |
|
140 |
+
| 1.9613 | 80.0 | 32400 | 5.1967 | 0.1148 | 0.0721 | 0.5808 | 0.2218 |
|
141 |
+
| 1.9248 | 81.0 | 32805 | 5.1955 | 0.1141 | 0.0714 | 0.5768 | 0.2222 |
|
142 |
+
| 1.8873 | 82.0 | 33210 | 5.1930 | 0.1151 | 0.0731 | 0.5667 | 0.2257 |
|
143 |
+
| 1.8555 | 83.0 | 33615 | 5.1893 | 0.1166 | 0.0740 | 0.5665 | 0.2300 |
|
144 |
+
| 1.8207 | 84.0 | 34020 | 5.1867 | 0.1185 | 0.0746 | 0.5652 | 0.2299 |
|
145 |
+
| 1.7824 | 85.0 | 34425 | 5.1881 | 0.1203 | 0.0752 | 0.5727 | 0.2292 |
|
146 |
+
| 1.7548 | 86.0 | 34830 | 5.1834 | 0.1188 | 0.0760 | 0.5575 | 0.2310 |
|
147 |
+
| 1.7234 | 87.0 | 35235 | 5.1864 | 0.1194 | 0.0765 | 0.5644 | 0.2324 |
|
148 |
+
| 1.683 | 88.0 | 35640 | 5.1831 | 0.1209 | 0.0776 | 0.5570 | 0.2350 |
|
149 |
+
| 1.6517 | 89.0 | 36045 | 5.1797 | 0.1212 | 0.0772 | 0.5596 | 0.2374 |
|
150 |
+
| 1.6217 | 90.0 | 36450 | 5.1910 | 0.1219 | 0.0783 | 0.5592 | 0.2359 |
|
151 |
+
| 1.5947 | 91.0 | 36855 | 5.1839 | 0.1212 | 0.0773 | 0.5408 | 0.2369 |
|
152 |
+
| 1.5635 | 92.0 | 37260 | 5.1871 | 0.1203 | 0.0770 | 0.5449 | 0.2376 |
|
153 |
+
| 1.5374 | 93.0 | 37665 | 5.1938 | 0.1206 | 0.0790 | 0.5497 | 0.2342 |
|
154 |
+
| 1.5105 | 94.0 | 38070 | 5.1871 | 0.1203 | 0.0764 | 0.5412 | 0.2365 |
|
155 |
+
| 1.4812 | 95.0 | 38475 | 5.1862 | 0.1203 | 0.0771 | 0.5360 | 0.2391 |
|
156 |
+
| 1.4549 | 96.0 | 38880 | 5.1762 | 0.1200 | 0.0763 | 0.5355 | 0.2422 |
|
157 |
+
| 1.4301 | 97.0 | 39285 | 5.1922 | 0.1231 | 0.0801 | 0.5387 | 0.2410 |
|
158 |
+
| 1.4085 | 98.0 | 39690 | 5.1897 | 0.1200 | 0.0752 | 0.5360 | 0.2382 |
|
159 |
+
| 1.3828 | 99.0 | 40095 | 5.1809 | 0.1228 | 0.0777 | 0.5237 | 0.2452 |
|
160 |
+
| 1.3545 | 100.0 | 40500 | 5.1815 | 0.1234 | 0.0781 | 0.5356 | 0.2421 |
|
161 |
+
| 1.3342 | 101.0 | 40905 | 5.1831 | 0.1243 | 0.0791 | 0.5301 | 0.2461 |
|
162 |
+
| 1.31 | 102.0 | 41310 | 5.1805 | 0.1259 | 0.0794 | 0.5247 | 0.2469 |
|
163 |
+
| 1.2894 | 103.0 | 41715 | 5.1917 | 0.1240 | 0.0789 | 0.5241 | 0.2461 |
|
164 |
+
| 1.2688 | 104.0 | 42120 | 5.1814 | 0.1231 | 0.0798 | 0.5142 | 0.2480 |
|
165 |
+
| 1.2457 | 105.0 | 42525 | 5.1859 | 0.1234 | 0.0794 | 0.5182 | 0.2504 |
|
166 |
+
| 1.2258 | 106.0 | 42930 | 5.1862 | 0.1250 | 0.0807 | 0.5170 | 0.2515 |
|
167 |
+
| 1.2089 | 107.0 | 43335 | 5.1884 | 0.1234 | 0.0792 | 0.5090 | 0.2503 |
|
168 |
+
| 1.1873 | 108.0 | 43740 | 5.1829 | 0.1271 | 0.0824 | 0.5154 | 0.2507 |
|
169 |
+
| 1.1686 | 109.0 | 44145 | 5.1854 | 0.1247 | 0.0801 | 0.5078 | 0.2529 |
|
170 |
+
| 1.1497 | 110.0 | 44550 | 5.1969 | 0.1250 | 0.0803 | 0.5123 | 0.2460 |
|
171 |
+
| 1.1334 | 111.0 | 44955 | 5.1870 | 0.1247 | 0.0800 | 0.5101 | 0.2490 |
|
172 |
+
| 1.1141 | 112.0 | 45360 | 5.1869 | 0.1231 | 0.0810 | 0.5111 | 0.2514 |
|
173 |
+
| 1.0999 | 113.0 | 45765 | 5.1936 | 0.1265 | 0.0832 | 0.5116 | 0.2498 |
|
174 |
+
| 1.0836 | 114.0 | 46170 | 5.2010 | 0.1253 | 0.0802 | 0.5103 | 0.2501 |
|
175 |
+
| 1.0671 | 115.0 | 46575 | 5.2002 | 0.1240 | 0.0805 | 0.5108 | 0.2502 |
|
176 |
+
| 1.0521 | 116.0 | 46980 | 5.1985 | 0.1247 | 0.0802 | 0.5059 | 0.2539 |
|
177 |
+
| 1.0399 | 117.0 | 47385 | 5.1978 | 0.1243 | 0.0803 | 0.5056 | 0.2526 |
|
178 |
+
| 1.0248 | 118.0 | 47790 | 5.2000 | 0.1277 | 0.0814 | 0.5069 | 0.2587 |
|
179 |
+
| 1.0083 | 119.0 | 48195 | 5.1964 | 0.1271 | 0.0823 | 0.4971 | 0.2579 |
|
180 |
+
| 0.9999 | 120.0 | 48600 | 5.1958 | 0.1231 | 0.0803 | 0.4966 | 0.2544 |
|
181 |
+
| 0.9846 | 121.0 | 49005 | 5.1976 | 0.1271 | 0.0824 | 0.4938 | 0.2573 |
|
182 |
+
| 0.9737 | 122.0 | 49410 | 5.2000 | 0.1274 | 0.0826 | 0.4963 | 0.2558 |
|
183 |
+
| 0.9617 | 123.0 | 49815 | 5.1993 | 0.1268 | 0.0825 | 0.4997 | 0.2537 |
|
184 |
+
| 0.9517 | 124.0 | 50220 | 5.2047 | 0.1271 | 0.0828 | 0.4953 | 0.2579 |
|
185 |
+
| 0.9377 | 125.0 | 50625 | 5.2050 | 0.1268 | 0.0822 | 0.4991 | 0.2566 |
|
186 |
+
| 0.9286 | 126.0 | 51030 | 5.2054 | 0.1253 | 0.0816 | 0.4964 | 0.2542 |
|
187 |
+
| 0.9195 | 127.0 | 51435 | 5.2047 | 0.1287 | 0.0832 | 0.4977 | 0.2565 |
|
188 |
+
| 0.9124 | 128.0 | 51840 | 5.2049 | 0.1268 | 0.0827 | 0.4899 | 0.2570 |
|
189 |
+
| 0.9025 | 129.0 | 52245 | 5.2069 | 0.1281 | 0.0836 | 0.4938 | 0.2580 |
|
190 |
+
| 0.8916 | 130.0 | 52650 | 5.2015 | 0.1290 | 0.0854 | 0.4877 | 0.2609 |
|
191 |
+
| 0.884 | 131.0 | 53055 | 5.2105 | 0.1265 | 0.0831 | 0.4895 | 0.2581 |
|
192 |
+
| 0.8755 | 132.0 | 53460 | 5.2100 | 0.1281 | 0.0827 | 0.4849 | 0.2576 |
|
193 |
+
| 0.8735 | 133.0 | 53865 | 5.2057 | 0.1299 | 0.0855 | 0.4883 | 0.2639 |
|
194 |
+
| 0.8652 | 134.0 | 54270 | 5.2082 | 0.1293 | 0.0839 | 0.4873 | 0.2604 |
|
195 |
+
| 0.8584 | 135.0 | 54675 | 5.2109 | 0.1296 | 0.0855 | 0.4895 | 0.2590 |
|
196 |
+
| 0.8523 | 136.0 | 55080 | 5.2101 | 0.1290 | 0.0843 | 0.4856 | 0.2627 |
|
197 |
+
| 0.8451 | 137.0 | 55485 | 5.2158 | 0.1265 | 0.0823 | 0.4819 | 0.2622 |
|
198 |
+
| 0.8405 | 138.0 | 55890 | 5.2166 | 0.1287 | 0.0837 | 0.4856 | 0.2610 |
|
199 |
+
| 0.8331 | 139.0 | 56295 | 5.2135 | 0.1287 | 0.0836 | 0.4817 | 0.2629 |
|
200 |
+
| 0.8281 | 140.0 | 56700 | 5.2143 | 0.1284 | 0.0830 | 0.4796 | 0.2631 |
|
201 |
+
| 0.8272 | 141.0 | 57105 | 5.2131 | 0.1271 | 0.0837 | 0.4802 | 0.2633 |
|
202 |
+
| 0.8208 | 142.0 | 57510 | 5.2167 | 0.1284 | 0.0837 | 0.4770 | 0.2625 |
|
203 |
+
| 0.8187 | 143.0 | 57915 | 5.2134 | 0.1290 | 0.0851 | 0.4803 | 0.2630 |
|
204 |
+
| 0.8193 | 144.0 | 58320 | 5.2154 | 0.1305 | 0.0861 | 0.4811 | 0.2651 |
|
205 |
+
| 0.8125 | 145.0 | 58725 | 5.2153 | 0.1296 | 0.0852 | 0.4802 | 0.2644 |
|
206 |
+
| 0.8129 | 146.0 | 59130 | 5.2150 | 0.1287 | 0.0849 | 0.4782 | 0.2644 |
|
207 |
+
| 0.8092 | 147.0 | 59535 | 5.2169 | 0.1293 | 0.0847 | 0.4814 | 0.2637 |
|
208 |
+
| 0.8054 | 148.0 | 59940 | 5.2159 | 0.1302 | 0.0854 | 0.4830 | 0.2651 |
|
209 |
+
| 0.8082 | 149.0 | 60345 | 5.2162 | 0.1299 | 0.0850 | 0.4810 | 0.2643 |
|
210 |
+
| 0.8037 | 150.0 | 60750 | 5.2160 | 0.1305 | 0.0856 | 0.4812 | 0.2648 |
|
211 |
+
|
212 |
+
|
213 |
+
### Framework versions
|
214 |
+
|
215 |
+
- Transformers 4.51.3
|
216 |
+
- Pytorch 2.6.0+cu124
|
217 |
+
- Datasets 3.5.0
|
218 |
+
- Tokenizers 0.21.1
|
logs/events.out.tfevents.1745635477.deac701d2f5a.1083.2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1124e2dc783c5686379d2734f8a761b69d9b97fd22ff37456ce8a2cb12eca76a
|
3 |
+
size 211182
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 506604288
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c274e64f810fc512adcfdf04daf104956ae89be700125ff56b97d9e638625514
|
3 |
size 506604288
|
special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"cls_token": "<s>",
|
4 |
+
"eos_token": "</s>",
|
5 |
+
"mask_token": {
|
6 |
+
"content": "<mask>",
|
7 |
+
"lstrip": true,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"pad_token": "<pad>",
|
13 |
+
"sep_token": "</s>",
|
14 |
+
"unk_token": "<unk>"
|
15 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"0": {
|
5 |
+
"content": "<s>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"1": {
|
13 |
+
"content": "<pad>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": true,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"2": {
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": true,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
},
|
28 |
+
"3": {
|
29 |
+
"content": "<unk>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": true,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
+
},
|
36 |
+
"50264": {
|
37 |
+
"content": "<mask>",
|
38 |
+
"lstrip": true,
|
39 |
+
"normalized": false,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": true
|
43 |
+
}
|
44 |
+
},
|
45 |
+
"bos_token": "<s>",
|
46 |
+
"clean_up_tokenization_spaces": false,
|
47 |
+
"cls_token": "<s>",
|
48 |
+
"eos_token": "</s>",
|
49 |
+
"errors": "replace",
|
50 |
+
"extra_special_tokens": {},
|
51 |
+
"mask_token": "<mask>",
|
52 |
+
"model_max_length": 512,
|
53 |
+
"pad_token": "<pad>",
|
54 |
+
"sep_token": "</s>",
|
55 |
+
"tokenizer_class": "RobertaTokenizer",
|
56 |
+
"trim_offsets": true,
|
57 |
+
"unk_token": "<unk>"
|
58 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|