End of training

Files changed (4) hide show

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-base_model: bert-base-uncased
 tags:
 - generated_from_trainer
 metrics:
@@ -15,10 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
 # office-character
-This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.5407
-- Accuracy: 0.1139
 ## Model description
@@ -37,25 +37,25 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 1e-06
 - train_batch_size: 20
 - eval_batch_size: 20
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 1.5
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|
-| 2.5927        | 0.2   | 120  | 2.5634          | 0.1007   |
-| 2.5723        | 0.4   | 240  | 2.5531          | 0.1054   |
-| 2.5774        | 0.6   | 360  | 2.5483          | 0.1043   |
-| 2.5738        | 0.8   | 480  | 2.5448          | 0.1095   |
-| 2.5755        | 1.0   | 600  | 2.5432          | 0.1095   |
-| 2.5688        | 1.2   | 720  | 2.5416          | 0.1132   |
-| 2.5632        | 1.4   | 840  | 2.5407          | 0.1139   |
 ### Framework versions

 ---
 license: apache-2.0
+base_model: distilbert-base-uncased
 tags:
 - generated_from_trainer
 metrics:
 # office-character
+This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.0632
+- Accuracy: 0.2481
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 5e-05
 - train_batch_size: 20
 - eval_batch_size: 20
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 3
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|
+| 2.0701        | 0.43  | 120  | 2.0673          | 0.16     |
+| 2.0385        | 0.86  | 240  | 2.0445          | 0.1825   |
+| 1.94          | 1.29  | 360  | 2.0320          | 0.2156   |
+| 1.8606        | 1.71  | 480  | 2.0290          | 0.225    |
+| 1.7839        | 2.14  | 600  | 2.0392          | 0.2431   |
+| 1.5807        | 2.57  | 720  | 2.0641          | 0.24     |
+| 1.5676        | 3.0   | 840  | 2.0632          | 0.2481   |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,57 +1,45 @@
 {
-  "_name_or_path": "bert-base-uncased",
   "architectures": [
-    "BertForSequenceClassification"
   ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "id2label": {
     "0": "Andy",
     "1": "Angela",
-    "2": "Darryl",
-    "3": "Dwight",
-    "4": "Erin",
-    "5": "Jim",
-    "6": "Kelly",
-    "7": "Kevin",
-    "8": "Michael",
-    "9": "Oscar",
-    "10": "Pam",
-    "11": "Phyllis",
-    "12": "Ryan"
   },
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
   "label2id": {
     "Andy": 0,
     "Angela": 1,
-    "Darryl": 2,
-    "Dwight": 3,
-    "Erin": 4,
-    "Jim": 5,
-    "Kelly": 6,
-    "Kevin": 7,
-    "Michael": 8,
-    "Oscar": 9,
-    "Pam": 10,
-    "Phyllis": 11,
-    "Ryan": 12
   },
-  "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
   "pad_token_id": 0,
-  "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.32.1",
-  "type_vocab_size": 2,
-  "use_cache": true,
   "vocab_size": 30522
 }

 {
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
   "architectures": [
+    "DistilBertForSequenceClassification"
   ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
   "id2label": {
     "0": "Andy",
     "1": "Angela",
+    "2": "Dwight",
+    "3": "Jim",
+    "4": "Kevin",
+    "5": "Michael",
+    "6": "Oscar",
+    "7": "Pam"
   },
   "initializer_range": 0.02,
   "label2id": {
     "Andy": 0,
     "Angela": 1,
+    "Dwight": 2,
+    "Jim": 3,
+    "Kevin": 4,
+    "Michael": 5,
+    "Oscar": 6,
+    "Pam": 7
   },
   "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
   "pad_token_id": 0,
   "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
   "torch_dtype": "float32",
   "transformers_version": "4.32.1",
   "vocab_size": 30522
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7439284d09a45f3cfe964f3bfe671e8c033d6ead5d39084ec717da854201195
-size 437992484

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e35e8f79cd2a7f47b004f270f7add5e927359a3694b804c90af09162f93e3cb
+size 267851024

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd55bd9630a3e593db1b61e77b3caf29661dbd33dd3736a23bb39271e7ed029c
 size 4027

 version https://git-lfs.github.com/spec/v1
+oid sha256:0de371ea6a4d604e9bb82ebc2fd3fc78dca32ef2c6eeec57aab9d1f46631a8b8
 size 4027