yunosuken commited on
Commit
bc63d9b
1 Parent(s): 22bd122

End of training

Browse files
Files changed (5) hide show
  1. README.md +13 -13
  2. config.json +1 -1
  3. pytorch_model.bin +1 -1
  4. trainer_state.json +54 -54
  5. training_args.bin +2 -2
README.md CHANGED
@@ -7,20 +7,20 @@ metrics:
7
  - accuracy
8
  - f1
9
  model-index:
10
- - name: bert-large-japanease-v2-gpt4-relevance-learned
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # bert-large-japanease-v2-gpt4-relevance-learned
18
 
19
  This model is a fine-tuned version of [cl-tohoku/bert-large-japanese-v2](https://huggingface.co/cl-tohoku/bert-large-japanese-v2) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 3.1789
22
- - Accuracy: 0.196
23
- - F1: 0.1399
24
 
25
  ## Model description
26
 
@@ -40,8 +40,8 @@ More information needed
40
 
41
  The following hyperparameters were used during training:
42
  - learning_rate: 2e-05
43
- - train_batch_size: 8
44
- - eval_batch_size: 8
45
  - seed: 42
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
@@ -51,16 +51,16 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 |
53
  |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|
54
- | 3.5154 | 1.0 | 1125 | 3.4451 | 0.146 | 0.0567 |
55
- | 3.2778 | 2.0 | 2250 | 3.1801 | 0.197 | 0.0963 |
56
- | 3.0802 | 3.0 | 3375 | 3.1374 | 0.174 | 0.1120 |
57
- | 2.8756 | 4.0 | 4500 | 3.1239 | 0.191 | 0.1230 |
58
- | 2.6657 | 5.0 | 5625 | 3.1789 | 0.196 | 0.1399 |
59
 
60
 
61
  ### Framework versions
62
 
63
- - Transformers 4.33.1
64
  - Pytorch 2.0.1+cu118
65
  - Datasets 2.14.5
66
  - Tokenizers 0.13.3
 
7
  - accuracy
8
  - f1
9
  model-index:
10
+ - name: results
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
+ # results
18
 
19
  This model is a fine-tuned version of [cl-tohoku/bert-large-japanese-v2](https://huggingface.co/cl-tohoku/bert-large-japanese-v2) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 3.2693
22
+ - Accuracy: 0.885
23
+ - F1: 0.8788
24
 
25
  ## Model description
26
 
 
40
 
41
  The following hyperparameters were used during training:
42
  - learning_rate: 2e-05
43
+ - train_batch_size: 16
44
+ - eval_batch_size: 16
45
  - seed: 42
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 |
53
  |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|
54
+ | 3.3692 | 1.0 | 563 | 3.2122 | 0.872 | 0.8560 |
55
+ | 3.0963 | 2.0 | 1126 | 3.1045 | 0.866 | 0.8625 |
56
+ | 2.8698 | 3.0 | 1689 | 3.1410 | 0.882 | 0.8755 |
57
+ | 2.6212 | 4.0 | 2252 | 3.2119 | 0.876 | 0.8702 |
58
+ | 2.407 | 5.0 | 2815 | 3.2693 | 0.885 | 0.8788 |
59
 
60
 
61
  ### Framework versions
62
 
63
+ - Transformers 4.33.2
64
  - Pytorch 2.0.1+cu118
65
  - Datasets 2.14.5
66
  - Tokenizers 0.13.3
config.json CHANGED
@@ -225,7 +225,7 @@
225
  "position_embedding_type": "absolute",
226
  "problem_type": "single_label_classification",
227
  "torch_dtype": "float32",
228
- "transformers_version": "4.33.1",
229
  "type_vocab_size": 2,
230
  "use_cache": true,
231
  "vocab_size": 32768
 
225
  "position_embedding_type": "absolute",
226
  "problem_type": "single_label_classification",
227
  "torch_dtype": "float32",
228
+ "transformers_version": "4.33.2",
229
  "type_vocab_size": 2,
230
  "use_cache": true,
231
  "vocab_size": 32768
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:075c57655ed23cfe35476741f850cf2e39a33aaa014b3595474b61bbd72023f5
3
  size 1350315697
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00733e21f9777592db355d663917e83017977899ba42bdc5fad8dad17350a55e
3
  size 1350315697
trainer_state.json CHANGED
@@ -3,7 +3,7 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 5625,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,98 +11,98 @@
11
  {
12
  "epoch": 1.0,
13
  "learning_rate": 1.6000000000000003e-05,
14
- "loss": 3.5154,
15
- "step": 1125
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_accuracy": 0.146,
20
- "eval_f1": 0.05673406159055716,
21
- "eval_loss": 3.445145606994629,
22
- "eval_runtime": 16.8588,
23
- "eval_samples_per_second": 59.316,
24
- "eval_steps_per_second": 7.415,
25
- "step": 1125
26
  },
27
  {
28
  "epoch": 2.0,
29
  "learning_rate": 1.2e-05,
30
- "loss": 3.2778,
31
- "step": 2250
32
  },
33
  {
34
  "epoch": 2.0,
35
- "eval_accuracy": 0.197,
36
- "eval_f1": 0.09626799246503706,
37
- "eval_loss": 3.1801486015319824,
38
- "eval_runtime": 16.85,
39
- "eval_samples_per_second": 59.347,
40
- "eval_steps_per_second": 7.418,
41
- "step": 2250
42
  },
43
  {
44
  "epoch": 3.0,
45
  "learning_rate": 8.000000000000001e-06,
46
- "loss": 3.0802,
47
- "step": 3375
48
  },
49
  {
50
  "epoch": 3.0,
51
- "eval_accuracy": 0.174,
52
- "eval_f1": 0.11203679919237455,
53
- "eval_loss": 3.1373538970947266,
54
- "eval_runtime": 16.8552,
55
- "eval_samples_per_second": 59.329,
56
- "eval_steps_per_second": 7.416,
57
- "step": 3375
58
  },
59
  {
60
  "epoch": 4.0,
61
  "learning_rate": 4.000000000000001e-06,
62
- "loss": 2.8756,
63
- "step": 4500
64
  },
65
  {
66
  "epoch": 4.0,
67
- "eval_accuracy": 0.191,
68
- "eval_f1": 0.1229927332781174,
69
- "eval_loss": 3.123941421508789,
70
- "eval_runtime": 16.904,
71
- "eval_samples_per_second": 59.158,
72
- "eval_steps_per_second": 7.395,
73
- "step": 4500
74
  },
75
  {
76
  "epoch": 5.0,
77
  "learning_rate": 0.0,
78
- "loss": 2.6657,
79
- "step": 5625
80
  },
81
  {
82
  "epoch": 5.0,
83
- "eval_accuracy": 0.196,
84
- "eval_f1": 0.1399460092685649,
85
- "eval_loss": 3.178866147994995,
86
- "eval_runtime": 16.8498,
87
- "eval_samples_per_second": 59.348,
88
- "eval_steps_per_second": 7.419,
89
- "step": 5625
90
  },
91
  {
92
  "epoch": 5.0,
93
- "step": 5625,
94
- "total_flos": 4.030110203721907e+16,
95
- "train_loss": 3.0829483940972224,
96
- "train_runtime": 3366.8478,
97
- "train_samples_per_second": 13.366,
98
- "train_steps_per_second": 1.671
99
  }
100
  ],
101
  "logging_steps": 500,
102
- "max_steps": 5625,
103
  "num_train_epochs": 5,
104
  "save_steps": 500,
105
- "total_flos": 4.030110203721907e+16,
106
  "trial_name": null,
107
  "trial_params": null
108
  }
 
3
  "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 2815,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 1.0,
13
  "learning_rate": 1.6000000000000003e-05,
14
+ "loss": 3.3692,
15
+ "step": 563
16
  },
17
  {
18
  "epoch": 1.0,
19
+ "eval_accuracy": 0.872,
20
+ "eval_f1": 0.856009752654711,
21
+ "eval_loss": 3.21221923828125,
22
+ "eval_runtime": 16.3666,
23
+ "eval_samples_per_second": 61.1,
24
+ "eval_steps_per_second": 3.849,
25
+ "step": 563
26
  },
27
  {
28
  "epoch": 2.0,
29
  "learning_rate": 1.2e-05,
30
+ "loss": 3.0963,
31
+ "step": 1126
32
  },
33
  {
34
  "epoch": 2.0,
35
+ "eval_accuracy": 0.866,
36
+ "eval_f1": 0.8624612482571968,
37
+ "eval_loss": 3.1044771671295166,
38
+ "eval_runtime": 16.3224,
39
+ "eval_samples_per_second": 61.265,
40
+ "eval_steps_per_second": 3.86,
41
+ "step": 1126
42
  },
43
  {
44
  "epoch": 3.0,
45
  "learning_rate": 8.000000000000001e-06,
46
+ "loss": 2.8698,
47
+ "step": 1689
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "eval_accuracy": 0.882,
52
+ "eval_f1": 0.8755033031176542,
53
+ "eval_loss": 3.1409871578216553,
54
+ "eval_runtime": 16.3796,
55
+ "eval_samples_per_second": 61.051,
56
+ "eval_steps_per_second": 3.846,
57
+ "step": 1689
58
  },
59
  {
60
  "epoch": 4.0,
61
  "learning_rate": 4.000000000000001e-06,
62
+ "loss": 2.6212,
63
+ "step": 2252
64
  },
65
  {
66
  "epoch": 4.0,
67
+ "eval_accuracy": 0.876,
68
+ "eval_f1": 0.8701998582565557,
69
+ "eval_loss": 3.2119336128234863,
70
+ "eval_runtime": 16.3349,
71
+ "eval_samples_per_second": 61.219,
72
+ "eval_steps_per_second": 3.857,
73
+ "step": 2252
74
  },
75
  {
76
  "epoch": 5.0,
77
  "learning_rate": 0.0,
78
+ "loss": 2.407,
79
+ "step": 2815
80
  },
81
  {
82
  "epoch": 5.0,
83
+ "eval_accuracy": 0.885,
84
+ "eval_f1": 0.8788301734570391,
85
+ "eval_loss": 3.269321918487549,
86
+ "eval_runtime": 16.3269,
87
+ "eval_samples_per_second": 61.249,
88
+ "eval_steps_per_second": 3.859,
89
+ "step": 2815
90
  },
91
  {
92
  "epoch": 5.0,
93
+ "step": 2815,
94
+ "total_flos": 4.169197960165814e+16,
95
+ "train_loss": 2.8726869085118785,
96
+ "train_runtime": 3265.1327,
97
+ "train_samples_per_second": 13.782,
98
+ "train_steps_per_second": 0.862
99
  }
100
  ],
101
  "logging_steps": 500,
102
+ "max_steps": 2815,
103
  "num_train_epochs": 5,
104
  "save_steps": 500,
105
+ "total_flos": 4.169197960165814e+16,
106
  "trial_name": null,
107
  "trial_params": null
108
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:886b5840ff9d3374ae58d020c0dda37e9836c32e9c097841f2dd9e1ddf57052e
3
- size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f82fa4d643b419de7da5747f8a36936f20c8fe65a42386e256c7e43a673cc3d3
3
+ size 4027