End of training

Browse files

Files changed (6) hide show

README.md +69 -91
config.json +76 -91
model.safetensors +2 -2
special_tokens_map.json +37 -37
tokenizer_config.json +60 -58
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,91 +1,69 @@
----
-library_name: transformers
-license: cc-by-4.0
-base_model: NazaGara/NER-fine-tuned-BETO
-tags:
-- generated_from_trainer
-datasets:
-- biobert_json
-metrics:
-- precision
-- recall
-- f1
-- accuracy
-model-index:
-- name: NER-fine-tuned-BETO-finetuned-ner
-  results:
-  - task:
-      name: Token Classification
-      type: token-classification
-    dataset:
-      name: biobert_json
-      type: biobert_json
-      config: Biobert_json
-      split: validation
-      args: Biobert_json
-    metrics:
-    - name: Precision
-      type: precision
-      value: 0.9260690093141406
-    - name: Recall
-      type: recall
-      value: 0.9508259074114322
-    - name: F1
-      type: f1
-      value: 0.93828418230563
-    - name: Accuracy
-      type: accuracy
-      value: 0.9680427807486631
----
-<!-- This model card has been generated automatically according to the information the Trainer had access to. You
-should probably proofread and complete it, then remove this comment. -->
-# NER-fine-tuned-BETO-finetuned-ner
-This model is a fine-tuned version of [NazaGara/NER-fine-tuned-BETO](https://huggingface.co/NazaGara/NER-fine-tuned-BETO) on the biobert_json dataset.
-It achieves the following results on the evaluation set:
-- Loss: 0.1199
-- Precision: 0.9261
-- Recall: 0.9508
-- F1: 0.9383
-- Accuracy: 0.9680
-## Model description
-More information needed
-## Intended uses & limitations
-More information needed
-## Training and evaluation data
-More information needed
-## Training procedure
-### Training hyperparameters
-The following hyperparameters were used during training:
-- learning_rate: 2e-05
-- train_batch_size: 16
-- eval_batch_size: 16
-- seed: 42
-- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
-- lr_scheduler_type: linear
-- num_epochs: 1
-### Training results
-| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
-|:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
-| 0.3593        | 1.0   | 612  | 0.1199          | 0.9261    | 0.9508 | 0.9383 | 0.9680   |
-### Framework versions
-- Transformers 4.46.2
-- Pytorch 2.5.1
-- Datasets 3.1.0
-- Tokenizers 0.20.3

+---
+library_name: transformers
+license: cc-by-4.0
+base_model: NazaGara/NER-fine-tuned-BETO
+tags:
+- generated_from_trainer
+metrics:
+- precision
+- recall
+- f1
+- accuracy
+model-index:
+- name: NER-fine-tuned-BETO-finetuned-ner
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# NER-fine-tuned-BETO-finetuned-ner
+This model is a fine-tuned version of [NazaGara/NER-fine-tuned-BETO](https://huggingface.co/NazaGara/NER-fine-tuned-BETO) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.2509
+- Precision: 0.7185
+- Recall: 0.6715
+- F1: 0.6942
+- Accuracy: 0.8935
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- num_epochs: 3
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
+|:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
+| 0.8944        | 1.0   | 777  | 0.3823          | 0.6589    | 0.5547 | 0.6023 | 0.8467   |
+| 0.4465        | 2.0   | 1554 | 0.2852          | 0.6810    | 0.6399 | 0.6598 | 0.8775   |
+| 0.3745        | 3.0   | 2331 | 0.2509          | 0.7185    | 0.6715 | 0.6942 | 0.8935   |
+### Framework versions
+- Transformers 4.52.2
+- Pytorch 2.6.0+cu124
+- Datasets 2.14.4
+- Tokenizers 0.21.1

config.json CHANGED Viewed

@@ -1,91 +1,76 @@
-{
-  "_name_or_path": "NazaGara/NER-fine-tuned-BETO",
-  "architectures": [
-    "BertForTokenClassification"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "id2label": {
-    "0": "B_CANCER_CONCEPT",
-    "1": "B_CHEMOTHERAPY",
-    "2": "B_DATE",
-    "3": "B_DRUG",
-    "4": "B_FAMILY",
-    "5": "B_FREQ",
-    "6": "B_IMPLICIT_DATE",
-    "7": "B_INTERVAL",
-    "8": "B_METRIC",
-    "9": "B_OCURRENCE_EVENT",
-    "10": "B_QUANTITY",
-    "11": "B_RADIOTHERAPY",
-    "12": "B_SMOKER_STATUS",
-    "13": "B_STAGE",
-    "14": "B_SURGERY",
-    "15": "B_TNM",
-    "16": "I_CANCER_CONCEPT",
-    "17": "I_DATE",
-    "18": "I_DRUG",
-    "19": "I_FAMILY",
-    "20": "I_FREQ",
-    "21": "I_IMPLICIT_DATE",
-    "22": "I_INTERVAL",
-    "23": "I_METRIC",
-    "24": "I_OCURRENCE_EVENT",
-    "25": "I_SMOKER_STATUS",
-    "26": "I_STAGE",
-    "27": "I_SURGERY",
-    "28": "I_TNM",
-    "29": "O"
-  },
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "label2id": {
-    "B_CANCER_CONCEPT": 0,
-    "B_CHEMOTHERAPY": 1,
-    "B_DATE": 2,
-    "B_DRUG": 3,
-    "B_FAMILY": 4,
-    "B_FREQ": 5,
-    "B_IMPLICIT_DATE": 6,
-    "B_INTERVAL": 7,
-    "B_METRIC": 8,
-    "B_OCURRENCE_EVENT": 9,
-    "B_QUANTITY": 10,
-    "B_RADIOTHERAPY": 11,
-    "B_SMOKER_STATUS": 12,
-    "B_STAGE": 13,
-    "B_SURGERY": 14,
-    "B_TNM": 15,
-    "I_CANCER_CONCEPT": 16,
-    "I_DATE": 17,
-    "I_DRUG": 18,
-    "I_FAMILY": 19,
-    "I_FREQ": 20,
-    "I_IMPLICIT_DATE": 21,
-    "I_INTERVAL": 22,
-    "I_METRIC": 23,
-    "I_OCURRENCE_EVENT": 24,
-    "I_SMOKER_STATUS": 25,
-    "I_STAGE": 26,
-    "I_SURGERY": 27,
-    "I_TNM": 28,
-    "O": 29
-  },
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "output_past": true,
-  "pad_token_id": 1,
-  "position_embedding_type": "absolute",
-  "torch_dtype": "float32",
-  "transformers_version": "4.46.2",
-  "type_vocab_size": 2,
-  "use_cache": true,
-  "vocab_size": 31002
-}

+{
+  "architectures": [
+    "BertForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": ",",
+    "1": "0",
+    "2": "B-BIOMARCADOR",
+    "3": "B-CANCER",
+    "4": "B-CIRUGIA",
+    "5": "B-DOSIS",
+    "6": "B-EDAD",
+    "7": "B-FECHA",
+    "8": "B-GLEASON",
+    "9": "B-MEDICAMENTO",
+    "10": "B-TNM",
+    "11": "B-TRATAMIENTO",
+    "12": "I-BIOMARCADOR",
+    "13": "I-CANCER",
+    "14": "I-CIRUGIA",
+    "15": "I-DOSIS",
+    "16": "I-EDAD",
+    "17": "I-FECHA",
+    "18": "I-GLEASON",
+    "19": "I-MEDICAMENTO",
+    "20": "I-TNM",
+    "21": "I-TRATAMIENTO",
+    "22": "O"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    ",": 0,
+    "0": 1,
+    "B-BIOMARCADOR": 2,
+    "B-CANCER": 3,
+    "B-CIRUGIA": 4,
+    "B-DOSIS": 5,
+    "B-EDAD": 6,
+    "B-FECHA": 7,
+    "B-GLEASON": 8,
+    "B-MEDICAMENTO": 9,
+    "B-TNM": 10,
+    "B-TRATAMIENTO": 11,
+    "I-BIOMARCADOR": 12,
+    "I-CANCER": 13,
+    "I-CIRUGIA": 14,
+    "I-DOSIS": 15,
+    "I-EDAD": 16,
+    "I-FECHA": 17,
+    "I-GLEASON": 18,
+    "I-MEDICAMENTO": 19,
+    "I-TNM": 20,
+    "I-TRATAMIENTO": 21,
+    "O": 22
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 31002
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:490ecd05689a719ac555110e493f0fbefb760669bab36c4ea462c0cecc70ba82
-size 437156776

 version https://git-lfs.github.com/spec/v1
+oid sha256:2dfa6e09e4bc822d11aaab8b2d5cbdad238a4a894564446e72d9b3b0175e613b
+size 437135244

special_tokens_map.json CHANGED Viewed

@@ -1,37 +1,37 @@
-{
-  "cls_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "mask_token": {
-    "content": "[MASK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "[PAD]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "[UNK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
-}

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer_config.json CHANGED Viewed

@@ -1,58 +1,60 @@
-{
-  "added_tokens_decoder": {
-    "0": {
-      "content": "[MASK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "[PAD]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "3": {
-      "content": "[UNK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "4": {
-      "content": "[CLS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "5": {
-      "content": "[SEP]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "clean_up_tokenization_spaces": false,
-  "cls_token": "[CLS]",
-  "do_basic_tokenize": true,
-  "do_lower_case": false,
-  "mask_token": "[MASK]",
-  "model_max_length": 512,
-  "never_split": null,
-  "num_labels": 9,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "strip_accents": false,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "BertTokenizer",
-  "unk_token": "[UNK]"
-}

+{
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": false,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "num_labels": 9,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": false,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33c0ec32171b1b99c608110e45aa47131bf748b55b5fb7aefd70bedf210968c0
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:62b565154933b9674fcdca1c3dc26457dfe761d3bc8885a9618dd76c970dcf5f
 size 5304