gokulsrinivasagan commited on
Commit
50b711a
·
verified ·
1 Parent(s): a9d96c0

End of training

Browse files
README.md CHANGED
@@ -1,15 +1,33 @@
1
  ---
2
  library_name: transformers
 
 
3
  license: apache-2.0
4
  base_model: gokulsrinivasagan/bert_base_train_book_ent_15p_s_init
5
  tags:
6
  - generated_from_trainer
 
 
7
  metrics:
8
  - accuracy
9
  - f1
10
  model-index:
11
  - name: bert_base_train_book_ent_15p_s_init_mrpc
12
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  ---
14
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -17,9 +35,9 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  # bert_base_train_book_ent_15p_s_init_mrpc
19
 
20
- This model is a fine-tuned version of [gokulsrinivasagan/bert_base_train_book_ent_15p_s_init](https://huggingface.co/gokulsrinivasagan/bert_base_train_book_ent_15p_s_init) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 0.6260
23
  - Accuracy: 0.6838
24
  - F1: 0.8122
25
  - Combined Score: 0.7480
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - en
5
  license: apache-2.0
6
  base_model: gokulsrinivasagan/bert_base_train_book_ent_15p_s_init
7
  tags:
8
  - generated_from_trainer
9
+ datasets:
10
+ - glue
11
  metrics:
12
  - accuracy
13
  - f1
14
  model-index:
15
  - name: bert_base_train_book_ent_15p_s_init_mrpc
16
+ results:
17
+ - task:
18
+ name: Text Classification
19
+ type: text-classification
20
+ dataset:
21
+ name: GLUE MRPC
22
+ type: glue
23
+ args: mrpc
24
+ metrics:
25
+ - name: Accuracy
26
+ type: accuracy
27
+ value: 0.6838235294117647
28
+ - name: F1
29
+ type: f1
30
+ value: 0.8122270742358079
31
  ---
32
 
33
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
35
 
36
  # bert_base_train_book_ent_15p_s_init_mrpc
37
 
38
+ This model is a fine-tuned version of [gokulsrinivasagan/bert_base_train_book_ent_15p_s_init](https://huggingface.co/gokulsrinivasagan/bert_base_train_book_ent_15p_s_init) on the GLUE MRPC dataset.
39
  It achieves the following results on the evaluation set:
40
+ - Loss: 0.6236
41
  - Accuracy: 0.6838
42
  - F1: 0.8122
43
  - Combined Score: 0.7480
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "eval_accuracy": 0.6838235294117647,
4
+ "eval_combined_score": 0.7480253018237863,
5
+ "eval_f1": 0.8122270742358079,
6
+ "eval_loss": 0.6236213445663452,
7
+ "eval_runtime": 0.42,
8
+ "eval_samples": 408,
9
+ "eval_samples_per_second": 971.447,
10
+ "eval_steps_per_second": 4.762,
11
+ "total_flos": 5790548106362880.0,
12
+ "train_loss": 0.6358856678009033,
13
+ "train_runtime": 111.4221,
14
+ "train_samples": 3668,
15
+ "train_samples_per_second": 1645.993,
16
+ "train_steps_per_second": 6.731
17
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "eval_accuracy": 0.6838235294117647,
4
+ "eval_combined_score": 0.7480253018237863,
5
+ "eval_f1": 0.8122270742358079,
6
+ "eval_loss": 0.6236213445663452,
7
+ "eval_runtime": 0.42,
8
+ "eval_samples": 408,
9
+ "eval_samples_per_second": 971.447,
10
+ "eval_steps_per_second": 4.762
11
+ }
logs/events.out.tfevents.1745511664.ki-g0008.3436350.21 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3cdc9f3440749029907b479b114c1a8d2dc577d80d346f2dc85082f840760ea
3
+ size 515
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "total_flos": 5790548106362880.0,
4
+ "train_loss": 0.6358856678009033,
5
+ "train_runtime": 111.4221,
6
+ "train_samples": 3668,
7
+ "train_samples_per_second": 1645.993,
8
+ "train_steps_per_second": 6.731
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 105,
3
+ "best_metric": 0.6236213445663452,
4
+ "best_model_checkpoint": "bert_base_train_book_ent_15p_s_init_mrpc/checkpoint-105",
5
+ "epoch": 12.0,
6
+ "eval_steps": 500,
7
+ "global_step": 180,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "grad_norm": 0.7837658524513245,
15
+ "learning_rate": 4.906666666666667e-05,
16
+ "loss": 0.6428,
17
+ "step": 15
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.6838235294117647,
22
+ "eval_combined_score": 0.7480253018237863,
23
+ "eval_f1": 0.8122270742358079,
24
+ "eval_loss": 0.6254978775978088,
25
+ "eval_runtime": 0.3699,
26
+ "eval_samples_per_second": 1102.979,
27
+ "eval_steps_per_second": 5.407,
28
+ "step": 15
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 2.680393934249878,
33
+ "learning_rate": 4.806666666666667e-05,
34
+ "loss": 0.6363,
35
+ "step": 30
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.6838235294117647,
40
+ "eval_combined_score": 0.7480253018237863,
41
+ "eval_f1": 0.8122270742358079,
42
+ "eval_loss": 0.6296674013137817,
43
+ "eval_runtime": 0.3745,
44
+ "eval_samples_per_second": 1089.444,
45
+ "eval_steps_per_second": 5.34,
46
+ "step": 30
47
+ },
48
+ {
49
+ "epoch": 3.0,
50
+ "grad_norm": 1.3696545362472534,
51
+ "learning_rate": 4.706666666666667e-05,
52
+ "loss": 0.6317,
53
+ "step": 45
54
+ },
55
+ {
56
+ "epoch": 3.0,
57
+ "eval_accuracy": 0.6838235294117647,
58
+ "eval_combined_score": 0.7480253018237863,
59
+ "eval_f1": 0.8122270742358079,
60
+ "eval_loss": 0.6300982236862183,
61
+ "eval_runtime": 0.3764,
62
+ "eval_samples_per_second": 1084.091,
63
+ "eval_steps_per_second": 5.314,
64
+ "step": 45
65
+ },
66
+ {
67
+ "epoch": 4.0,
68
+ "grad_norm": 0.9995052814483643,
69
+ "learning_rate": 4.606666666666667e-05,
70
+ "loss": 0.6411,
71
+ "step": 60
72
+ },
73
+ {
74
+ "epoch": 4.0,
75
+ "eval_accuracy": 0.6838235294117647,
76
+ "eval_combined_score": 0.7480253018237863,
77
+ "eval_f1": 0.8122270742358079,
78
+ "eval_loss": 0.6254978775978088,
79
+ "eval_runtime": 0.3663,
80
+ "eval_samples_per_second": 1113.836,
81
+ "eval_steps_per_second": 5.46,
82
+ "step": 60
83
+ },
84
+ {
85
+ "epoch": 5.0,
86
+ "grad_norm": 0.5899142026901245,
87
+ "learning_rate": 4.5066666666666667e-05,
88
+ "loss": 0.6336,
89
+ "step": 75
90
+ },
91
+ {
92
+ "epoch": 5.0,
93
+ "eval_accuracy": 0.6838235294117647,
94
+ "eval_combined_score": 0.7480253018237863,
95
+ "eval_f1": 0.8122270742358079,
96
+ "eval_loss": 0.6254308223724365,
97
+ "eval_runtime": 0.363,
98
+ "eval_samples_per_second": 1123.901,
99
+ "eval_steps_per_second": 5.509,
100
+ "step": 75
101
+ },
102
+ {
103
+ "epoch": 6.0,
104
+ "grad_norm": 1.0961157083511353,
105
+ "learning_rate": 4.406666666666667e-05,
106
+ "loss": 0.6348,
107
+ "step": 90
108
+ },
109
+ {
110
+ "epoch": 6.0,
111
+ "eval_accuracy": 0.6838235294117647,
112
+ "eval_combined_score": 0.7480253018237863,
113
+ "eval_f1": 0.8122270742358079,
114
+ "eval_loss": 0.6238797903060913,
115
+ "eval_runtime": 0.3681,
116
+ "eval_samples_per_second": 1108.448,
117
+ "eval_steps_per_second": 5.434,
118
+ "step": 90
119
+ },
120
+ {
121
+ "epoch": 7.0,
122
+ "grad_norm": 0.8835726976394653,
123
+ "learning_rate": 4.3066666666666665e-05,
124
+ "loss": 0.6346,
125
+ "step": 105
126
+ },
127
+ {
128
+ "epoch": 7.0,
129
+ "eval_accuracy": 0.6838235294117647,
130
+ "eval_combined_score": 0.7480253018237863,
131
+ "eval_f1": 0.8122270742358079,
132
+ "eval_loss": 0.6236213445663452,
133
+ "eval_runtime": 0.3895,
134
+ "eval_samples_per_second": 1047.538,
135
+ "eval_steps_per_second": 5.135,
136
+ "step": 105
137
+ },
138
+ {
139
+ "epoch": 8.0,
140
+ "grad_norm": 0.5998325943946838,
141
+ "learning_rate": 4.206666666666667e-05,
142
+ "loss": 0.6364,
143
+ "step": 120
144
+ },
145
+ {
146
+ "epoch": 8.0,
147
+ "eval_accuracy": 0.6838235294117647,
148
+ "eval_combined_score": 0.7480253018237863,
149
+ "eval_f1": 0.8122270742358079,
150
+ "eval_loss": 0.6241862177848816,
151
+ "eval_runtime": 0.3699,
152
+ "eval_samples_per_second": 1102.886,
153
+ "eval_steps_per_second": 5.406,
154
+ "step": 120
155
+ },
156
+ {
157
+ "epoch": 9.0,
158
+ "grad_norm": 1.0318900346755981,
159
+ "learning_rate": 4.106666666666667e-05,
160
+ "loss": 0.6309,
161
+ "step": 135
162
+ },
163
+ {
164
+ "epoch": 9.0,
165
+ "eval_accuracy": 0.6838235294117647,
166
+ "eval_combined_score": 0.7480253018237863,
167
+ "eval_f1": 0.8122270742358079,
168
+ "eval_loss": 0.6320992112159729,
169
+ "eval_runtime": 0.3664,
170
+ "eval_samples_per_second": 1113.41,
171
+ "eval_steps_per_second": 5.458,
172
+ "step": 135
173
+ },
174
+ {
175
+ "epoch": 10.0,
176
+ "grad_norm": 0.612123429775238,
177
+ "learning_rate": 4.006666666666667e-05,
178
+ "loss": 0.6392,
179
+ "step": 150
180
+ },
181
+ {
182
+ "epoch": 10.0,
183
+ "eval_accuracy": 0.6838235294117647,
184
+ "eval_combined_score": 0.7480253018237863,
185
+ "eval_f1": 0.8122270742358079,
186
+ "eval_loss": 0.624152660369873,
187
+ "eval_runtime": 0.378,
188
+ "eval_samples_per_second": 1079.327,
189
+ "eval_steps_per_second": 5.291,
190
+ "step": 150
191
+ },
192
+ {
193
+ "epoch": 11.0,
194
+ "grad_norm": 1.4060471057891846,
195
+ "learning_rate": 3.906666666666667e-05,
196
+ "loss": 0.6353,
197
+ "step": 165
198
+ },
199
+ {
200
+ "epoch": 11.0,
201
+ "eval_accuracy": 0.6838235294117647,
202
+ "eval_combined_score": 0.7480253018237863,
203
+ "eval_f1": 0.8122270742358079,
204
+ "eval_loss": 0.6270584464073181,
205
+ "eval_runtime": 0.3649,
206
+ "eval_samples_per_second": 1118.136,
207
+ "eval_steps_per_second": 5.481,
208
+ "step": 165
209
+ },
210
+ {
211
+ "epoch": 12.0,
212
+ "grad_norm": 0.859621524810791,
213
+ "learning_rate": 3.8066666666666666e-05,
214
+ "loss": 0.6339,
215
+ "step": 180
216
+ },
217
+ {
218
+ "epoch": 12.0,
219
+ "eval_accuracy": 0.6838235294117647,
220
+ "eval_combined_score": 0.7480253018237863,
221
+ "eval_f1": 0.8122270742358079,
222
+ "eval_loss": 0.6259621381759644,
223
+ "eval_runtime": 0.3756,
224
+ "eval_samples_per_second": 1086.399,
225
+ "eval_steps_per_second": 5.325,
226
+ "step": 180
227
+ },
228
+ {
229
+ "epoch": 12.0,
230
+ "step": 180,
231
+ "total_flos": 5790548106362880.0,
232
+ "train_loss": 0.6358856678009033,
233
+ "train_runtime": 111.4221,
234
+ "train_samples_per_second": 1645.993,
235
+ "train_steps_per_second": 6.731
236
+ }
237
+ ],
238
+ "logging_steps": 1,
239
+ "max_steps": 750,
240
+ "num_input_tokens_seen": 0,
241
+ "num_train_epochs": 50,
242
+ "save_steps": 500,
243
+ "stateful_callbacks": {
244
+ "EarlyStoppingCallback": {
245
+ "args": {
246
+ "early_stopping_patience": 5,
247
+ "early_stopping_threshold": 0.0
248
+ },
249
+ "attributes": {
250
+ "early_stopping_patience_counter": 5
251
+ }
252
+ },
253
+ "TrainerControl": {
254
+ "args": {
255
+ "should_epoch_stop": false,
256
+ "should_evaluate": false,
257
+ "should_log": false,
258
+ "should_save": true,
259
+ "should_training_stop": true
260
+ },
261
+ "attributes": {}
262
+ }
263
+ },
264
+ "total_flos": 5790548106362880.0,
265
+ "train_batch_size": 256,
266
+ "trial_name": null,
267
+ "trial_params": null
268
+ }