Training in progress, step 500

Browse files

Files changed (8) hide show

.gitattributes +1 -0
README.md +36 -64
config.json +15 -13
model.safetensors +2 -2
sentencepiece.bpe.model +3 -0
special_tokens_map.json +13 -5
tokenizer.json +0 -0
tokenizer_config.json +18 -19

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-license: apache-2.0
-base_model: google-bert/bert-base-uncased
 tags:
 - generated_from_trainer
 metrics:
@@ -9,22 +9,24 @@ metrics:
 - recall
 - f1
 model-index:
-- name: gg-bert-base-uncased
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# gg-bert-base-uncased
-This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.3042
-- Accuracy: 0.6256
-- Precision: 0.6423
-- Recall: 0.6176
-- F1: 0.5920
 ## Model description
@@ -50,67 +52,37 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 500
-- num_epochs: 50
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss | Accuracy | Precision | Recall | F1     |
-|:-------------:|:-----:|:-----:|:---------------:|:--------:|:---------:|:------:|:------:|
-| 2.2916        | 1.0   | 469   | 2.2523          | 0.1248   | 0.1126    | 0.1316 | 0.0648 |
-| 2.1782        | 2.0   | 938   | 2.1328          | 0.3712   | 0.4166    | 0.3539 | 0.3419 |
-| 2.0827        | 3.0   | 1407  | 2.0233          | 0.4      | 0.5060    | 0.3927 | 0.3492 |
-| 2.0142        | 4.0   | 1876  | 1.9946          | 0.384    | 0.4626    | 0.3980 | 0.3605 |
-| 1.9595        | 5.0   | 2345  | 1.8959          | 0.4384   | 0.4411    | 0.4313 | 0.3944 |
-| 1.8538        | 6.0   | 2814  | 1.8370          | 0.4608   | 0.4965    | 0.4545 | 0.4048 |
-| 1.8433        | 7.0   | 3283  | 1.7877          | 0.4752   | 0.4488    | 0.4721 | 0.4176 |
-| 1.8114        | 8.0   | 3752  | 1.7508          | 0.5008   | 0.5705    | 0.4936 | 0.4510 |
-| 1.7872        | 9.0   | 4221  | 1.7364          | 0.464    | 0.4673    | 0.4558 | 0.4401 |
-| 1.7446        | 10.0  | 4690  | 1.6801          | 0.5216   | 0.5153    | 0.5144 | 0.4871 |
-| 1.7021        | 11.0  | 5159  | 1.6621          | 0.5024   | 0.5106    | 0.5007 | 0.4838 |
-| 1.6819        | 12.0  | 5628  | 1.6299          | 0.5504   | 0.5087    | 0.5453 | 0.4962 |
-| 1.6803        | 13.0  | 6097  | 1.6008          | 0.5408   | 0.5580    | 0.5345 | 0.4990 |
-| 1.6591        | 14.0  | 6566  | 1.5753          | 0.56     | 0.6140    | 0.5528 | 0.5174 |
-| 1.5972        | 15.0  | 7035  | 1.5556          | 0.5632   | 0.5939    | 0.5566 | 0.5177 |
-| 1.5749        | 16.0  | 7504  | 1.5304          | 0.5824   | 0.5990    | 0.5708 | 0.5433 |
-| 1.5793        | 17.0  | 7973  | 1.5174          | 0.5664   | 0.6593    | 0.5555 | 0.5065 |
-| 1.569         | 18.0  | 8442  | 1.4926          | 0.5824   | 0.5748    | 0.5704 | 0.5330 |
-| 1.5885        | 19.0  | 8911  | 1.4857          | 0.5776   | 0.6052    | 0.5705 | 0.5333 |
-| 1.5004        | 20.0  | 9380  | 1.4639          | 0.5952   | 0.5878    | 0.5836 | 0.5496 |
-| 1.5046        | 21.0  | 9849  | 1.4582          | 0.5904   | 0.5969    | 0.5846 | 0.5593 |
-| 1.5247        | 22.0  | 10318 | 1.4497          | 0.584    | 0.6200    | 0.5738 | 0.5464 |
-| 1.5079        | 23.0  | 10787 | 1.4411          | 0.5792   | 0.6211    | 0.5729 | 0.5379 |
-| 1.4594        | 24.0  | 11256 | 1.4245          | 0.6032   | 0.5973    | 0.5983 | 0.5763 |
-| 1.4362        | 25.0  | 11725 | 1.4046          | 0.6112   | 0.5904    | 0.6025 | 0.5829 |
-| 1.4554        | 26.0  | 12194 | 1.3992          | 0.6      | 0.5959    | 0.5895 | 0.5661 |
-| 1.4484        | 27.0  | 12663 | 1.3923          | 0.6064   | 0.6297    | 0.5998 | 0.5658 |
-| 1.4666        | 28.0  | 13132 | 1.3787          | 0.6096   | 0.6321    | 0.5971 | 0.5732 |
-| 1.4433        | 29.0  | 13601 | 1.3715          | 0.6112   | 0.6291    | 0.6029 | 0.5732 |
-| 1.4253        | 30.0  | 14070 | 1.3686          | 0.6176   | 0.6069    | 0.6096 | 0.5917 |
-| 1.4928        | 31.0  | 14539 | 1.3635          | 0.6176   | 0.6182    | 0.6103 | 0.5889 |
-| 1.4585        | 32.0  | 15008 | 1.3660          | 0.6016   | 0.6105    | 0.5950 | 0.5655 |
-| 1.3631        | 33.0  | 15477 | 1.3523          | 0.6224   | 0.6451    | 0.6153 | 0.5863 |
-| 1.402         | 34.0  | 15946 | 1.3421          | 0.6192   | 0.6245    | 0.6117 | 0.5797 |
-| 1.416         | 35.0  | 16415 | 1.3425          | 0.6192   | 0.6046    | 0.6139 | 0.5936 |
-| 1.4122        | 36.0  | 16884 | 1.3347          | 0.6192   | 0.6026    | 0.6119 | 0.5916 |
-| 1.361         | 37.0  | 17353 | 1.3325          | 0.6128   | 0.5946    | 0.6045 | 0.5787 |
-| 1.4179        | 38.0  | 17822 | 1.3251          | 0.6128   | 0.6098    | 0.6018 | 0.5783 |
-| 1.3549        | 39.0  | 18291 | 1.3191          | 0.624    | 0.6149    | 0.6150 | 0.5883 |
-| 1.4217        | 40.0  | 18760 | 1.3188          | 0.6272   | 0.6471    | 0.6194 | 0.5935 |
-| 1.3848        | 41.0  | 19229 | 1.3137          | 0.6336   | 0.6261    | 0.6250 | 0.6019 |
-| 1.3956        | 42.0  | 19698 | 1.3141          | 0.632    | 0.6512    | 0.6243 | 0.6008 |
-| 1.3965        | 43.0  | 20167 | 1.3116          | 0.6336   | 0.6523    | 0.6246 | 0.6016 |
-| 1.3523        | 44.0  | 20636 | 1.3076          | 0.6288   | 0.6214    | 0.6204 | 0.5964 |
-| 1.3642        | 45.0  | 21105 | 1.3093          | 0.6256   | 0.6341    | 0.6176 | 0.5921 |
-| 1.3796        | 46.0  | 21574 | 1.3066          | 0.624    | 0.6388    | 0.6159 | 0.5869 |
-| 1.3494        | 47.0  | 22043 | 1.3068          | 0.6272   | 0.6469    | 0.6198 | 0.5958 |
-| 1.3697        | 48.0  | 22512 | 1.3051          | 0.6304   | 0.6369    | 0.6222 | 0.5975 |
-| 1.3977        | 49.0  | 22981 | 1.3044          | 0.6288   | 0.6459    | 0.6208 | 0.5957 |
-| 1.3568        | 50.0  | 23450 | 1.3042          | 0.6256   | 0.6423    | 0.6176 | 0.5920 |
 ### Framework versions
-- Transformers 4.43.3
 - Pytorch 2.3.1+cu121
 - Datasets 2.20.0
 - Tokenizers 0.19.1

 ---
+license: mit
+base_model: FacebookAI/roberta-large
 tags:
 - generated_from_trainer
 metrics:
 - recall
 - f1
 model-index:
+- name: absa-train-service-roberta-large
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/cunho2803032003/absa-1721959498.2993438/runs/tad25dun)
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/cunho2803032003/absa-1721959940.7872202/runs/bsprskdy)
+# absa-train-service-roberta-large
+This model is a fine-tuned version of [FacebookAI/roberta-large](https://huggingface.co/FacebookAI/roberta-large) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8683
+- Accuracy: 0.7424
+- Precision: 0.7345
+- Recall: 0.7367
+- F1: 0.7302
 ## Model description
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 500
+- num_epochs: 20
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1     |
+|:-------------:|:-----:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
+| 2.2255        | 1.0   | 469  | 2.0677          | 0.3296   | 0.1937    | 0.3250 | 0.2297 |
+| 1.8236        | 2.0   | 938  | 1.7061          | 0.504    | 0.5413    | 0.4914 | 0.4567 |
+| 1.5384        | 3.0   | 1407 | 1.4381          | 0.552    | 0.5944    | 0.5549 | 0.5196 |
+| 1.4301        | 4.0   | 1876 | 1.3316          | 0.5984   | 0.6000    | 0.5990 | 0.5618 |
+| 1.3776        | 5.0   | 2345 | 1.1645          | 0.6576   | 0.6817    | 0.6491 | 0.6332 |
+| 1.2078        | 6.0   | 2814 | 1.0967          | 0.6448   | 0.7035    | 0.6348 | 0.6110 |
+| 1.2535        | 7.0   | 3283 | 1.0565          | 0.7008   | 0.7467    | 0.6967 | 0.7066 |
+| 1.2921        | 8.0   | 3752 | 1.0049          | 0.6976   | 0.7013    | 0.6884 | 0.6813 |
+| 1.178         | 9.0   | 4221 | 1.0438          | 0.648    | 0.7746    | 0.6423 | 0.6387 |
+| 1.2324        | 10.0  | 4690 | 1.0203          | 0.6896   | 0.7096    | 0.6831 | 0.6704 |
+| 1.1899        | 11.0  | 5159 | 1.0193          | 0.6864   | 0.7391    | 0.6819 | 0.6834 |
+| 1.1515        | 12.0  | 5628 | 0.9722          | 0.6944   | 0.7164    | 0.6924 | 0.6860 |
+| 1.1604        | 13.0  | 6097 | 0.9372          | 0.7312   | 0.7543    | 0.7311 | 0.7259 |
+| 1.1229        | 14.0  | 6566 | 0.9265          | 0.72     | 0.7278    | 0.7139 | 0.7147 |
+| 1.1459        | 15.0  | 7035 | 0.8896          | 0.7376   | 0.7264    | 0.7323 | 0.7183 |
+| 1.1281        | 16.0  | 7504 | 0.9074          | 0.7152   | 0.7107    | 0.7087 | 0.7012 |
+| 1.1794        | 17.0  | 7973 | 0.8914          | 0.7424   | 0.7293    | 0.7354 | 0.7266 |
+| 1.1101        | 18.0  | 8442 | 0.8707          | 0.7216   | 0.7161    | 0.7141 | 0.7059 |
+| 1.1215        | 19.0  | 8911 | 0.8656          | 0.7408   | 0.7322    | 0.7348 | 0.7274 |
+| 1.0483        | 20.0  | 9380 | 0.8683          | 0.7424   | 0.7345    | 0.7367 | 0.7302 |
 ### Framework versions
+- Transformers 4.43.2
 - Pytorch 2.3.1+cu121
 - Datasets 2.20.0
 - Tokenizers 0.19.1

config.json CHANGED Viewed

@@ -1,14 +1,15 @@
 {
-  "_name_or_path": "google-bert/bert-base-uncased",
   "architectures": [
-    "BertForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
-  "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "id2label": {
     "0": "LABEL_0",
     "1": "LABEL_1",
@@ -23,7 +24,7 @@
     "10": "LABEL_10"
   },
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
   "label2id": {
     "LABEL_0": 0,
     "LABEL_1": 1,
@@ -37,17 +38,18 @@
     "LABEL_8": 8,
     "LABEL_9": 9
   },
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.43.3",
-  "type_vocab_size": 2,
   "use_cache": true,
-  "vocab_size": 30522
 }

 {
+  "_name_or_path": "FacebookAI/xlm-roberta-large",
   "architectures": [
+    "XLMRobertaForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
   "classifier_dropout": null,
+  "eos_token_id": 2,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
   "id2label": {
     "0": "LABEL_0",
     "1": "LABEL_1",
     "10": "LABEL_10"
   },
   "initializer_range": 0.02,
+  "intermediate_size": 4096,
   "label2id": {
     "LABEL_0": 0,
     "LABEL_1": 1,
     "LABEL_8": 8,
     "LABEL_9": 9
   },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "xlm-roberta",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "output_past": true,
+  "pad_token_id": 1,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.43.3",
+  "type_vocab_size": 1,
   "use_cache": true,
+  "vocab_size": 250002
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:faa1fd9c156a39ac98f9a6b62195487b384a414bd63a83b5a1fc6c44b481723f
-size 437986332

 version https://git-lfs.github.com/spec/v1
+oid sha256:c75432df9541d28706d24a0dc841485bcf1c36f61c00d1eaafb0a95d83b93ec3
+size 2239655572

sentencepiece.bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,15 @@
 {
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
 }

 {
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
 }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,55 +1,54 @@
 {
   "added_tokens_decoder": {
     "0": {
-      "content": "[PAD]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "100": {
-      "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "101": {
-      "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "102": {
-      "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "103": {
-      "content": "[MASK]",
-      "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "clean_up_tokenization_spaces": true,
-  "cls_token": "[CLS]",
-  "do_lower_case": true,
-  "mask_token": "[MASK]",
   "model_max_length": 512,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "strip_accents": null,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "BertTokenizer",
-  "unk_token": "[UNK]"
 }

 {
   "added_tokens_decoder": {
     "0": {
+      "content": "<s>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "1": {
+      "content": "<pad>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "2": {
+      "content": "</s>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "3": {
+      "content": "<unk>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
+  "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": "<mask>",
   "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "unk_token": "<unk>"
 }