dehanalkautsar commited on
Commit
a2bf017
·
verified ·
1 Parent(s): 528597f

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +213 -0
trainer_state.json ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.4656140804290771,
3
+ "best_model_checkpoint": "models/dehanalkautsar/mbert-3-with-multilingual-tokenizer-30k/checkpoint-24000",
4
+ "epoch": 9.996235629260353,
5
+ "eval_steps": 2000,
6
+ "global_step": 24570,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8139179977617255,
13
+ "grad_norm": 18.654558181762695,
14
+ "learning_rate": 9.185999185999187e-05,
15
+ "loss": 4.0579,
16
+ "step": 2000
17
+ },
18
+ {
19
+ "epoch": 0.8139179977617255,
20
+ "eval_loss": 2.5225632190704346,
21
+ "eval_runtime": 73.2103,
22
+ "eval_samples_per_second": 136.593,
23
+ "eval_steps_per_second": 2.145,
24
+ "step": 2000
25
+ },
26
+ {
27
+ "epoch": 1.6275307762742903,
28
+ "grad_norm": 15.436388969421387,
29
+ "learning_rate": 8.371998371998372e-05,
30
+ "loss": 2.3568,
31
+ "step": 4000
32
+ },
33
+ {
34
+ "epoch": 1.6275307762742903,
35
+ "eval_loss": 2.068246603012085,
36
+ "eval_runtime": 73.2939,
37
+ "eval_samples_per_second": 136.437,
38
+ "eval_steps_per_second": 2.142,
39
+ "step": 4000
40
+ },
41
+ {
42
+ "epoch": 2.4411435547868554,
43
+ "grad_norm": 13.966560363769531,
44
+ "learning_rate": 7.557997557997558e-05,
45
+ "loss": 2.0445,
46
+ "step": 6000
47
+ },
48
+ {
49
+ "epoch": 2.4411435547868554,
50
+ "eval_loss": 1.8596677780151367,
51
+ "eval_runtime": 73.1178,
52
+ "eval_samples_per_second": 136.766,
53
+ "eval_steps_per_second": 2.147,
54
+ "step": 6000
55
+ },
56
+ {
57
+ "epoch": 3.25475633329942,
58
+ "grad_norm": 14.865880012512207,
59
+ "learning_rate": 6.743996743996744e-05,
60
+ "loss": 1.8887,
61
+ "step": 8000
62
+ },
63
+ {
64
+ "epoch": 3.25475633329942,
65
+ "eval_loss": 1.7477023601531982,
66
+ "eval_runtime": 73.1164,
67
+ "eval_samples_per_second": 136.768,
68
+ "eval_steps_per_second": 2.147,
69
+ "step": 8000
70
+ },
71
+ {
72
+ "epoch": 4.068369111811985,
73
+ "grad_norm": 13.934277534484863,
74
+ "learning_rate": 5.929995929995931e-05,
75
+ "loss": 1.7863,
76
+ "step": 10000
77
+ },
78
+ {
79
+ "epoch": 4.068369111811985,
80
+ "eval_loss": 1.677494764328003,
81
+ "eval_runtime": 73.1043,
82
+ "eval_samples_per_second": 136.791,
83
+ "eval_steps_per_second": 2.148,
84
+ "step": 10000
85
+ },
86
+ {
87
+ "epoch": 4.882287109573711,
88
+ "grad_norm": 13.954177856445312,
89
+ "learning_rate": 5.115995115995116e-05,
90
+ "loss": 1.7168,
91
+ "step": 12000
92
+ },
93
+ {
94
+ "epoch": 4.882287109573711,
95
+ "eval_loss": 1.6307039260864258,
96
+ "eval_runtime": 73.09,
97
+ "eval_samples_per_second": 136.818,
98
+ "eval_steps_per_second": 2.148,
99
+ "step": 12000
100
+ },
101
+ {
102
+ "epoch": 5.6958998880862755,
103
+ "grad_norm": 13.182450294494629,
104
+ "learning_rate": 4.301994301994302e-05,
105
+ "loss": 1.6626,
106
+ "step": 14000
107
+ },
108
+ {
109
+ "epoch": 5.6958998880862755,
110
+ "eval_loss": 1.5682964324951172,
111
+ "eval_runtime": 73.1095,
112
+ "eval_samples_per_second": 136.781,
113
+ "eval_steps_per_second": 2.147,
114
+ "step": 14000
115
+ },
116
+ {
117
+ "epoch": 6.50951266659884,
118
+ "grad_norm": 13.514598846435547,
119
+ "learning_rate": 3.487993487993488e-05,
120
+ "loss": 1.6187,
121
+ "step": 16000
122
+ },
123
+ {
124
+ "epoch": 6.50951266659884,
125
+ "eval_loss": 1.5308398008346558,
126
+ "eval_runtime": 73.125,
127
+ "eval_samples_per_second": 136.752,
128
+ "eval_steps_per_second": 2.147,
129
+ "step": 16000
130
+ },
131
+ {
132
+ "epoch": 7.323125445111405,
133
+ "grad_norm": 14.169295310974121,
134
+ "learning_rate": 2.673992673992674e-05,
135
+ "loss": 1.5873,
136
+ "step": 18000
137
+ },
138
+ {
139
+ "epoch": 7.323125445111405,
140
+ "eval_loss": 1.5098525285720825,
141
+ "eval_runtime": 73.107,
142
+ "eval_samples_per_second": 136.786,
143
+ "eval_steps_per_second": 2.148,
144
+ "step": 18000
145
+ },
146
+ {
147
+ "epoch": 8.13673822362397,
148
+ "grad_norm": 13.590775489807129,
149
+ "learning_rate": 1.85999185999186e-05,
150
+ "loss": 1.5568,
151
+ "step": 20000
152
+ },
153
+ {
154
+ "epoch": 8.13673822362397,
155
+ "eval_loss": 1.4912240505218506,
156
+ "eval_runtime": 73.2463,
157
+ "eval_samples_per_second": 136.526,
158
+ "eval_steps_per_second": 2.143,
159
+ "step": 20000
160
+ },
161
+ {
162
+ "epoch": 8.950656221385696,
163
+ "grad_norm": 13.897253036499023,
164
+ "learning_rate": 1.045991045991046e-05,
165
+ "loss": 1.5348,
166
+ "step": 22000
167
+ },
168
+ {
169
+ "epoch": 8.950656221385696,
170
+ "eval_loss": 1.4697929620742798,
171
+ "eval_runtime": 73.2504,
172
+ "eval_samples_per_second": 136.518,
173
+ "eval_steps_per_second": 2.143,
174
+ "step": 22000
175
+ },
176
+ {
177
+ "epoch": 9.76426899989826,
178
+ "grad_norm": 13.867361068725586,
179
+ "learning_rate": 2.31990231990232e-06,
180
+ "loss": 1.5172,
181
+ "step": 24000
182
+ },
183
+ {
184
+ "epoch": 9.76426899989826,
185
+ "eval_loss": 1.4656140804290771,
186
+ "eval_runtime": 73.2214,
187
+ "eval_samples_per_second": 136.572,
188
+ "eval_steps_per_second": 2.144,
189
+ "step": 24000
190
+ }
191
+ ],
192
+ "logging_steps": 2000,
193
+ "max_steps": 24570,
194
+ "num_input_tokens_seen": 0,
195
+ "num_train_epochs": 10,
196
+ "save_steps": 2000,
197
+ "stateful_callbacks": {
198
+ "TrainerControl": {
199
+ "args": {
200
+ "should_epoch_stop": false,
201
+ "should_evaluate": false,
202
+ "should_log": false,
203
+ "should_save": true,
204
+ "should_training_stop": true
205
+ },
206
+ "attributes": {}
207
+ }
208
+ },
209
+ "total_flos": 1.6550302836575232e+18,
210
+ "train_batch_size": 32,
211
+ "trial_name": null,
212
+ "trial_params": null
213
+ }