hungnm commited on
Commit
bce5871
·
verified ·
1 Parent(s): 70d6715

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -2
  2. all_results.json +11 -11
  3. eval_results.json +7 -7
  4. train_results.json +5 -5
  5. trainer_state.json +22 -115
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.1497
21
- - Accuracy: 0.7445
22
 
23
  ## Model description
24
 
 
17
 
18
  This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.1478
21
+ - Accuracy: 0.7447
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_accuracy": 0.746344930565925,
4
- "eval_loss": 1.1388483047485352,
5
- "eval_runtime": 65.6634,
6
  "eval_samples": 19998,
7
- "eval_samples_per_second": 304.553,
8
- "eval_steps_per_second": 3.183,
9
- "perplexity": 3.1231693537875205,
10
- "train_loss": 1.1363053430210461,
11
- "train_runtime": 1002.059,
12
  "train_samples": 100000,
13
- "train_samples_per_second": 199.589,
14
- "train_steps_per_second": 0.132
15
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.7447392650575019,
4
+ "eval_loss": 1.147769808769226,
5
+ "eval_runtime": 66.0103,
6
  "eval_samples": 19998,
7
+ "eval_samples_per_second": 302.952,
8
+ "eval_steps_per_second": 3.166,
9
+ "perplexity": 3.1511573837576843,
10
+ "train_loss": 1.1451126827913172,
11
+ "train_runtime": 507.5022,
12
  "train_samples": 100000,
13
+ "train_samples_per_second": 197.043,
14
+ "train_steps_per_second": 0.033
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_accuracy": 0.746344930565925,
4
- "eval_loss": 1.1388483047485352,
5
- "eval_runtime": 65.6634,
6
  "eval_samples": 19998,
7
- "eval_samples_per_second": 304.553,
8
- "eval_steps_per_second": 3.183,
9
- "perplexity": 3.1231693537875205
10
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.7447392650575019,
4
+ "eval_loss": 1.147769808769226,
5
+ "eval_runtime": 66.0103,
6
  "eval_samples": 19998,
7
+ "eval_samples_per_second": 302.952,
8
+ "eval_steps_per_second": 3.166,
9
+ "perplexity": 3.1511573837576843
10
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.0,
3
- "train_loss": 1.1363053430210461,
4
- "train_runtime": 1002.059,
5
  "train_samples": 100000,
6
- "train_samples_per_second": 199.589,
7
- "train_steps_per_second": 0.132
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 1.1451126827913172,
4
+ "train_runtime": 507.5022,
5
  "train_samples": 100000,
6
+ "train_samples_per_second": 197.043,
7
+ "train_steps_per_second": 0.033
8
  }
trainer_state.json CHANGED
@@ -2,136 +2,43 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.0,
6
  "eval_steps": 500,
7
- "global_step": 132,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
- {
13
- "epoch": 0.15355086372360843,
14
- "grad_norm": 0.251953125,
15
- "learning_rate": 5.785714285714286e-05,
16
- "loss": 1.158,
17
- "step": 10
18
- },
19
- {
20
- "epoch": 0.30710172744721687,
21
- "grad_norm": 0.2734375,
22
- "learning_rate": 8.61864406779661e-05,
23
- "loss": 1.1475,
24
- "step": 20
25
- },
26
- {
27
- "epoch": 0.46065259117082535,
28
- "grad_norm": 0.2734375,
29
- "learning_rate": 7.85593220338983e-05,
30
- "loss": 1.136,
31
- "step": 30
32
- },
33
  {
34
  "epoch": 0.6142034548944337,
35
- "grad_norm": 0.2578125,
36
- "learning_rate": 7.093220338983051e-05,
37
- "loss": 1.1378,
38
- "step": 40
39
- },
40
- {
41
- "epoch": 0.7677543186180422,
42
- "grad_norm": 0.24609375,
43
- "learning_rate": 6.330508474576271e-05,
44
- "loss": 1.1399,
45
- "step": 50
46
- },
47
- {
48
- "epoch": 0.9213051823416507,
49
- "grad_norm": 0.2431640625,
50
- "learning_rate": 5.567796610169492e-05,
51
- "loss": 1.1298,
52
- "step": 60
53
  },
54
  {
55
  "epoch": 1.0,
56
- "eval_accuracy": 0.7456526713019853,
57
- "eval_loss": 1.1419495344161987,
58
- "eval_runtime": 65.7795,
59
- "eval_samples_per_second": 304.016,
60
- "eval_steps_per_second": 3.177,
61
- "step": 66
62
  },
63
  {
64
- "epoch": 1.0614203454894433,
65
- "grad_norm": 0.236328125,
66
- "learning_rate": 4.805084745762713e-05,
67
- "loss": 1.1291,
68
- "step": 70
69
- },
70
- {
71
- "epoch": 1.2149712092130518,
72
- "grad_norm": 0.24609375,
73
- "learning_rate": 4.0423728813559324e-05,
74
- "loss": 1.1346,
75
- "step": 80
76
- },
77
- {
78
- "epoch": 1.3685220729366603,
79
- "grad_norm": 0.25390625,
80
- "learning_rate": 3.279661016949153e-05,
81
- "loss": 1.1352,
82
- "step": 90
83
- },
84
- {
85
- "epoch": 1.5220729366602685,
86
- "grad_norm": 0.2451171875,
87
- "learning_rate": 2.5169491525423728e-05,
88
- "loss": 1.1352,
89
- "step": 100
90
- },
91
- {
92
- "epoch": 1.6756238003838773,
93
- "grad_norm": 0.2373046875,
94
- "learning_rate": 1.7542372881355935e-05,
95
- "loss": 1.1287,
96
- "step": 110
97
- },
98
- {
99
- "epoch": 1.8291746641074855,
100
- "grad_norm": 0.2490234375,
101
- "learning_rate": 9.915254237288136e-06,
102
- "loss": 1.129,
103
- "step": 120
104
- },
105
- {
106
- "epoch": 1.982725527831094,
107
- "grad_norm": 0.2490234375,
108
- "learning_rate": 2.288135593220339e-06,
109
- "loss": 1.138,
110
- "step": 130
111
- },
112
- {
113
- "epoch": 2.0,
114
- "eval_accuracy": 0.7461360777177145,
115
- "eval_loss": 1.1379570960998535,
116
- "eval_runtime": 65.8306,
117
- "eval_samples_per_second": 303.78,
118
- "eval_steps_per_second": 3.175,
119
- "step": 132
120
- },
121
- {
122
- "epoch": 2.0,
123
- "step": 132,
124
- "total_flos": 1.363631407104e+17,
125
- "train_loss": 1.1363053430210461,
126
- "train_runtime": 1002.059,
127
- "train_samples_per_second": 199.589,
128
- "train_steps_per_second": 0.132
129
  }
130
  ],
131
  "logging_steps": 10,
132
- "max_steps": 132,
133
  "num_input_tokens_seen": 0,
134
- "num_train_epochs": 2,
135
  "save_steps": 5000,
136
  "stateful_callbacks": {
137
  "TrainerControl": {
@@ -145,7 +52,7 @@
145
  "attributes": {}
146
  }
147
  },
148
- "total_flos": 1.363631407104e+17,
149
  "train_batch_size": 96,
150
  "trial_name": null,
151
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 17,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  {
13
  "epoch": 0.6142034548944337,
14
+ "grad_norm": 0.15625,
15
+ "learning_rate": 4.8e-05,
16
+ "loss": 1.148,
17
+ "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_accuracy": 0.7445295852526854,
22
+ "eval_loss": 1.1497305631637573,
23
+ "eval_runtime": 66.9763,
24
+ "eval_samples_per_second": 298.583,
25
+ "eval_steps_per_second": 3.121,
26
+ "step": 17
27
  },
28
  {
29
+ "epoch": 1.0,
30
+ "step": 17,
31
+ "total_flos": 6.81815703552e+16,
32
+ "train_loss": 1.1451126827913172,
33
+ "train_runtime": 507.5022,
34
+ "train_samples_per_second": 197.043,
35
+ "train_steps_per_second": 0.033
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
  ],
38
  "logging_steps": 10,
39
+ "max_steps": 17,
40
  "num_input_tokens_seen": 0,
41
+ "num_train_epochs": 1,
42
  "save_steps": 5000,
43
  "stateful_callbacks": {
44
  "TrainerControl": {
 
52
  "attributes": {}
53
  }
54
  },
55
+ "total_flos": 6.81815703552e+16,
56
  "train_batch_size": 96,
57
  "trial_name": null,
58
  "trial_params": null