b-atmaja commited on
Commit
e55f38f
1 Parent(s): 7b51472

new model from indonesian-nlp

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 60.0,
3
+ "eval_loss": 0.3271900713443756,
4
+ "eval_runtime": 107.3558,
5
+ "eval_samples": 1844,
6
+ "eval_samples_per_second": 17.177,
7
+ "eval_steps_per_second": 2.152,
8
+ "eval_wer": 0.3851389147865001,
9
+ "train_loss": 0.8272683016715511,
10
+ "train_runtime": 19767.0967,
11
+ "train_samples": 3965,
12
+ "train_samples_per_second": 12.035,
13
+ "train_steps_per_second": 0.376
14
+ }
config.json CHANGED
@@ -1,12 +1,13 @@
1
  {
2
  "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
- "activation_dropout": 0.0,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
  "Wav2Vec2ForCTC"
7
  ],
8
- "attention_dropout": 0.1,
9
  "bos_token_id": 1,
 
10
  "codevector_dim": 768,
11
  "contrastive_logits_temperature": 0.1,
12
  "conv_bias": true,
@@ -38,24 +39,24 @@
38
  2
39
  ],
40
  "ctc_loss_reduction": "mean",
41
- "ctc_zero_infinity": false,
42
  "diversity_loss_weight": 0.1,
43
  "do_stable_layer_norm": true,
44
  "eos_token_id": 2,
45
  "feat_extract_activation": "gelu",
46
  "feat_extract_dropout": 0.0,
47
  "feat_extract_norm": "layer",
48
- "feat_proj_dropout": 0.0,
49
  "feat_quantizer_dropout": 0.0,
50
  "final_dropout": 0.0,
51
  "gradient_checkpointing": true,
52
  "hidden_act": "gelu",
53
- "hidden_dropout": 0.1,
54
  "hidden_size": 1024,
55
  "initializer_range": 0.02,
56
  "intermediate_size": 4096,
57
  "layer_norm_eps": 1e-05,
58
- "layerdrop": 0.1,
59
  "mask_channel_length": 10,
60
  "mask_channel_min_space": 1,
61
  "mask_channel_other": 0.0,
@@ -66,7 +67,7 @@
66
  "mask_time_length": 10,
67
  "mask_time_min_space": 1,
68
  "mask_time_other": 0.0,
69
- "mask_time_prob": 0.05,
70
  "mask_time_selection": "static",
71
  "model_type": "wav2vec2",
72
  "num_attention_heads": 16,
@@ -77,8 +78,10 @@
77
  "num_feat_extract_layers": 7,
78
  "num_hidden_layers": 24,
79
  "num_negatives": 100,
80
- "pad_token_id": 62,
81
  "proj_codevector_dim": 768,
82
- "transformers_version": "4.4.0",
83
- "vocab_size": 63
 
 
84
  }
 
1
  {
2
  "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
+ "activation_dropout": 0.055,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
  "Wav2Vec2ForCTC"
7
  ],
8
+ "attention_dropout": 0.094,
9
  "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
  "codevector_dim": 768,
12
  "contrastive_logits_temperature": 0.1,
13
  "conv_bias": true,
 
39
  2
40
  ],
41
  "ctc_loss_reduction": "mean",
42
+ "ctc_zero_infinity": true,
43
  "diversity_loss_weight": 0.1,
44
  "do_stable_layer_norm": true,
45
  "eos_token_id": 2,
46
  "feat_extract_activation": "gelu",
47
  "feat_extract_dropout": 0.0,
48
  "feat_extract_norm": "layer",
49
+ "feat_proj_dropout": 0.04,
50
  "feat_quantizer_dropout": 0.0,
51
  "final_dropout": 0.0,
52
  "gradient_checkpointing": true,
53
  "hidden_act": "gelu",
54
+ "hidden_dropout": 0.047,
55
  "hidden_size": 1024,
56
  "initializer_range": 0.02,
57
  "intermediate_size": 4096,
58
  "layer_norm_eps": 1e-05,
59
+ "layerdrop": 0.041,
60
  "mask_channel_length": 10,
61
  "mask_channel_min_space": 1,
62
  "mask_channel_other": 0.0,
 
67
  "mask_time_length": 10,
68
  "mask_time_min_space": 1,
69
  "mask_time_other": 0.0,
70
+ "mask_time_prob": 0.4,
71
  "mask_time_selection": "static",
72
  "model_type": "wav2vec2",
73
  "num_attention_heads": 16,
 
78
  "num_feat_extract_layers": 7,
79
  "num_hidden_layers": 24,
80
  "num_negatives": 100,
81
+ "pad_token_id": 30,
82
  "proj_codevector_dim": 768,
83
+ "torch_dtype": "float32",
84
+ "transformers_version": "4.10.3",
85
+ "use_weighted_layer_sum": false,
86
+ "vocab_size": 31
87
  }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 60.0,
3
+ "eval_loss": 0.3271900713443756,
4
+ "eval_runtime": 107.3558,
5
+ "eval_samples": 1844,
6
+ "eval_samples_per_second": 17.177,
7
+ "eval_steps_per_second": 2.152,
8
+ "eval_wer": 0.3851389147865001
9
+ }
preprocessor_config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "do_normalize": true,
 
3
  "feature_size": 1,
4
  "padding_side": "right",
5
  "padding_value": 0.0,
 
1
  {
2
  "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 60.0,
3
+ "train_loss": 0.8272683016715511,
4
+ "train_runtime": 19767.0967,
5
+ "train_samples": 3965,
6
+ "train_samples_per_second": 12.035,
7
+ "train_steps_per_second": 0.376
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 60.0,
5
+ "global_step": 7440,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.81,
12
+ "learning_rate": 1.3172043010752688e-05,
13
+ "loss": 13.9663,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.81,
18
+ "eval_loss": 15.575762748718262,
19
+ "eval_runtime": 98.9721,
20
+ "eval_samples_per_second": 18.632,
21
+ "eval_steps_per_second": 2.334,
22
+ "eval_wer": 1.0,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 1.61,
27
+ "learning_rate": 2.6344086021505376e-05,
28
+ "loss": 6.7623,
29
+ "step": 200
30
+ },
31
+ {
32
+ "epoch": 1.61,
33
+ "eval_loss": 3.259247064590454,
34
+ "eval_runtime": 99.4059,
35
+ "eval_samples_per_second": 18.55,
36
+ "eval_steps_per_second": 2.324,
37
+ "eval_wer": 1.0,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 2.42,
42
+ "learning_rate": 3.978494623655914e-05,
43
+ "loss": 3.0242,
44
+ "step": 300
45
+ },
46
+ {
47
+ "epoch": 2.42,
48
+ "eval_loss": 2.9369125366210938,
49
+ "eval_runtime": 98.1481,
50
+ "eval_samples_per_second": 18.788,
51
+ "eval_steps_per_second": 2.354,
52
+ "eval_wer": 1.0,
53
+ "step": 300
54
+ },
55
+ {
56
+ "epoch": 3.23,
57
+ "learning_rate": 5.32258064516129e-05,
58
+ "loss": 2.8806,
59
+ "step": 400
60
+ },
61
+ {
62
+ "epoch": 3.23,
63
+ "eval_loss": 2.8887438774108887,
64
+ "eval_runtime": 97.5774,
65
+ "eval_samples_per_second": 18.898,
66
+ "eval_steps_per_second": 2.367,
67
+ "eval_wer": 1.0,
68
+ "step": 400
69
+ },
70
+ {
71
+ "epoch": 4.03,
72
+ "learning_rate": 6.666666666666667e-05,
73
+ "loss": 2.8344,
74
+ "step": 500
75
+ },
76
+ {
77
+ "epoch": 4.03,
78
+ "eval_loss": 2.8125975131988525,
79
+ "eval_runtime": 97.5647,
80
+ "eval_samples_per_second": 18.9,
81
+ "eval_steps_per_second": 2.368,
82
+ "eval_wer": 1.0,
83
+ "step": 500
84
+ },
85
+ {
86
+ "epoch": 4.84,
87
+ "learning_rate": 8.010752688172043e-05,
88
+ "loss": 2.7158,
89
+ "step": 600
90
+ },
91
+ {
92
+ "epoch": 4.84,
93
+ "eval_loss": 2.304689884185791,
94
+ "eval_runtime": 97.2319,
95
+ "eval_samples_per_second": 18.965,
96
+ "eval_steps_per_second": 2.376,
97
+ "eval_wer": 1.003542793212754,
98
+ "step": 600
99
+ },
100
+ {
101
+ "epoch": 5.65,
102
+ "learning_rate": 9.35483870967742e-05,
103
+ "loss": 1.7164,
104
+ "step": 700
105
+ },
106
+ {
107
+ "epoch": 5.65,
108
+ "eval_loss": 0.7104876041412354,
109
+ "eval_runtime": 96.4932,
110
+ "eval_samples_per_second": 19.11,
111
+ "eval_steps_per_second": 2.394,
112
+ "eval_wer": 0.7594629871340668,
113
+ "step": 700
114
+ },
115
+ {
116
+ "epoch": 6.45,
117
+ "learning_rate": 0.0001,
118
+ "loss": 1.0757,
119
+ "step": 800
120
+ },
121
+ {
122
+ "epoch": 6.45,
123
+ "eval_loss": 0.5160450339317322,
124
+ "eval_runtime": 97.694,
125
+ "eval_samples_per_second": 18.875,
126
+ "eval_steps_per_second": 2.365,
127
+ "eval_wer": 0.6501025545403692,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 7.26,
132
+ "learning_rate": 0.0001,
133
+ "loss": 0.9208,
134
+ "step": 900
135
+ },
136
+ {
137
+ "epoch": 7.26,
138
+ "eval_loss": 0.43188726902008057,
139
+ "eval_runtime": 101.9111,
140
+ "eval_samples_per_second": 18.094,
141
+ "eval_steps_per_second": 2.267,
142
+ "eval_wer": 0.5842811859034123,
143
+ "step": 900
144
+ },
145
+ {
146
+ "epoch": 8.06,
147
+ "learning_rate": 0.0001,
148
+ "loss": 0.8012,
149
+ "step": 1000
150
+ },
151
+ {
152
+ "epoch": 8.06,
153
+ "eval_loss": 0.39437660574913025,
154
+ "eval_runtime": 97.2792,
155
+ "eval_samples_per_second": 18.956,
156
+ "eval_steps_per_second": 2.375,
157
+ "eval_wer": 0.5456833861644602,
158
+ "step": 1000
159
+ },
160
+ {
161
+ "epoch": 8.87,
162
+ "learning_rate": 0.0001,
163
+ "loss": 0.7334,
164
+ "step": 1100
165
+ },
166
+ {
167
+ "epoch": 8.87,
168
+ "eval_loss": 0.36806735396385193,
169
+ "eval_runtime": 97.7448,
170
+ "eval_samples_per_second": 18.865,
171
+ "eval_steps_per_second": 2.363,
172
+ "eval_wer": 0.5120268506432967,
173
+ "step": 1100
174
+ },
175
+ {
176
+ "epoch": 9.68,
177
+ "learning_rate": 0.0001,
178
+ "loss": 0.6839,
179
+ "step": 1200
180
+ },
181
+ {
182
+ "epoch": 9.68,
183
+ "eval_loss": 0.35295596718788147,
184
+ "eval_runtime": 104.7228,
185
+ "eval_samples_per_second": 17.608,
186
+ "eval_steps_per_second": 2.206,
187
+ "eval_wer": 0.49822860339362296,
188
+ "step": 1200
189
+ },
190
+ {
191
+ "epoch": 10.48,
192
+ "learning_rate": 0.0001,
193
+ "loss": 0.6599,
194
+ "step": 1300
195
+ },
196
+ {
197
+ "epoch": 10.48,
198
+ "eval_loss": 0.34405317902565,
199
+ "eval_runtime": 100.6994,
200
+ "eval_samples_per_second": 18.312,
201
+ "eval_steps_per_second": 2.294,
202
+ "eval_wer": 0.49561812418422524,
203
+ "step": 1300
204
+ },
205
+ {
206
+ "epoch": 11.29,
207
+ "learning_rate": 0.0001,
208
+ "loss": 0.6101,
209
+ "step": 1400
210
+ },
211
+ {
212
+ "epoch": 11.29,
213
+ "eval_loss": 0.3226686716079712,
214
+ "eval_runtime": 97.8943,
215
+ "eval_samples_per_second": 18.837,
216
+ "eval_steps_per_second": 2.36,
217
+ "eval_wer": 0.4828454223382435,
218
+ "step": 1400
219
+ },
220
+ {
221
+ "epoch": 12.1,
222
+ "learning_rate": 0.0001,
223
+ "loss": 0.5844,
224
+ "step": 1500
225
+ },
226
+ {
227
+ "epoch": 12.1,
228
+ "eval_loss": 0.34125906229019165,
229
+ "eval_runtime": 98.6933,
230
+ "eval_samples_per_second": 18.684,
231
+ "eval_steps_per_second": 2.341,
232
+ "eval_wer": 0.47445459630803655,
233
+ "step": 1500
234
+ },
235
+ {
236
+ "epoch": 12.9,
237
+ "learning_rate": 0.0001,
238
+ "loss": 0.5754,
239
+ "step": 1600
240
+ },
241
+ {
242
+ "epoch": 12.9,
243
+ "eval_loss": 0.33336642384529114,
244
+ "eval_runtime": 99.0007,
245
+ "eval_samples_per_second": 18.626,
246
+ "eval_steps_per_second": 2.333,
247
+ "eval_wer": 0.46140220026104795,
248
+ "step": 1600
249
+ },
250
+ {
251
+ "epoch": 13.71,
252
+ "learning_rate": 0.0001,
253
+ "loss": 0.5611,
254
+ "step": 1700
255
+ },
256
+ {
257
+ "epoch": 13.71,
258
+ "eval_loss": 0.30597391724586487,
259
+ "eval_runtime": 101.2827,
260
+ "eval_samples_per_second": 18.206,
261
+ "eval_steps_per_second": 2.281,
262
+ "eval_wer": 0.4672757784821928,
263
+ "step": 1700
264
+ },
265
+ {
266
+ "epoch": 14.52,
267
+ "learning_rate": 0.0001,
268
+ "loss": 0.5476,
269
+ "step": 1800
270
+ },
271
+ {
272
+ "epoch": 14.52,
273
+ "eval_loss": 0.3194342851638794,
274
+ "eval_runtime": 105.7302,
275
+ "eval_samples_per_second": 17.441,
276
+ "eval_steps_per_second": 2.185,
277
+ "eval_wer": 0.44881596121573747,
278
+ "step": 1800
279
+ },
280
+ {
281
+ "epoch": 15.32,
282
+ "learning_rate": 0.0001,
283
+ "loss": 0.526,
284
+ "step": 1900
285
+ },
286
+ {
287
+ "epoch": 15.32,
288
+ "eval_loss": 0.30019786953926086,
289
+ "eval_runtime": 96.396,
290
+ "eval_samples_per_second": 19.129,
291
+ "eval_steps_per_second": 2.396,
292
+ "eval_wer": 0.4449934738019765,
293
+ "step": 1900
294
+ },
295
+ {
296
+ "epoch": 16.13,
297
+ "learning_rate": 0.0001,
298
+ "loss": 0.5267,
299
+ "step": 2000
300
+ },
301
+ {
302
+ "epoch": 16.13,
303
+ "eval_loss": 0.3249629735946655,
304
+ "eval_runtime": 105.1317,
305
+ "eval_samples_per_second": 17.54,
306
+ "eval_steps_per_second": 2.197,
307
+ "eval_wer": 0.43660264777176955,
308
+ "step": 2000
309
+ },
310
+ {
311
+ "epoch": 16.94,
312
+ "learning_rate": 0.0001,
313
+ "loss": 0.49,
314
+ "step": 2100
315
+ },
316
+ {
317
+ "epoch": 16.94,
318
+ "eval_loss": 0.2999746799468994,
319
+ "eval_runtime": 97.027,
320
+ "eval_samples_per_second": 19.005,
321
+ "eval_steps_per_second": 2.381,
322
+ "eval_wer": 0.4337124743613649,
323
+ "step": 2100
324
+ },
325
+ {
326
+ "epoch": 17.74,
327
+ "learning_rate": 0.0001,
328
+ "loss": 0.4945,
329
+ "step": 2200
330
+ },
331
+ {
332
+ "epoch": 17.74,
333
+ "eval_loss": 0.30671271681785583,
334
+ "eval_runtime": 96.8855,
335
+ "eval_samples_per_second": 19.033,
336
+ "eval_steps_per_second": 2.384,
337
+ "eval_wer": 0.43100876375163155,
338
+ "step": 2200
339
+ },
340
+ {
341
+ "epoch": 18.55,
342
+ "learning_rate": 0.0001,
343
+ "loss": 0.485,
344
+ "step": 2300
345
+ },
346
+ {
347
+ "epoch": 18.55,
348
+ "eval_loss": 0.3134038746356964,
349
+ "eval_runtime": 98.1951,
350
+ "eval_samples_per_second": 18.779,
351
+ "eval_steps_per_second": 2.352,
352
+ "eval_wer": 0.43706880477344767,
353
+ "step": 2300
354
+ },
355
+ {
356
+ "epoch": 19.35,
357
+ "learning_rate": 0.0001,
358
+ "loss": 0.486,
359
+ "step": 2400
360
+ },
361
+ {
362
+ "epoch": 19.35,
363
+ "eval_loss": 0.3182927072048187,
364
+ "eval_runtime": 100.6349,
365
+ "eval_samples_per_second": 18.324,
366
+ "eval_steps_per_second": 2.295,
367
+ "eval_wer": 0.43044937534961775,
368
+ "step": 2400
369
+ },
370
+ {
371
+ "epoch": 20.16,
372
+ "learning_rate": 0.0001,
373
+ "loss": 0.4617,
374
+ "step": 2500
375
+ },
376
+ {
377
+ "epoch": 20.16,
378
+ "eval_loss": 0.3004864752292633,
379
+ "eval_runtime": 102.1006,
380
+ "eval_samples_per_second": 18.061,
381
+ "eval_steps_per_second": 2.262,
382
+ "eval_wer": 0.4238299459257878,
383
+ "step": 2500
384
+ },
385
+ {
386
+ "epoch": 20.97,
387
+ "learning_rate": 0.0001,
388
+ "loss": 0.465,
389
+ "step": 2600
390
+ },
391
+ {
392
+ "epoch": 20.97,
393
+ "eval_loss": 0.313532292842865,
394
+ "eval_runtime": 101.2873,
395
+ "eval_samples_per_second": 18.206,
396
+ "eval_steps_per_second": 2.281,
397
+ "eval_wer": 0.4311019951519672,
398
+ "step": 2600
399
+ },
400
+ {
401
+ "epoch": 21.77,
402
+ "learning_rate": 0.0001,
403
+ "loss": 0.4414,
404
+ "step": 2700
405
+ },
406
+ {
407
+ "epoch": 21.77,
408
+ "eval_loss": 0.3118414580821991,
409
+ "eval_runtime": 97.4069,
410
+ "eval_samples_per_second": 18.931,
411
+ "eval_steps_per_second": 2.371,
412
+ "eval_wer": 0.4256013425321648,
413
+ "step": 2700
414
+ },
415
+ {
416
+ "epoch": 22.58,
417
+ "learning_rate": 0.0001,
418
+ "loss": 0.4424,
419
+ "step": 2800
420
+ },
421
+ {
422
+ "epoch": 22.58,
423
+ "eval_loss": 0.3086116313934326,
424
+ "eval_runtime": 97.4809,
425
+ "eval_samples_per_second": 18.917,
426
+ "eval_steps_per_second": 2.37,
427
+ "eval_wer": 0.4160917396979303,
428
+ "step": 2800
429
+ },
430
+ {
431
+ "epoch": 23.39,
432
+ "learning_rate": 0.0001,
433
+ "loss": 0.4352,
434
+ "step": 2900
435
+ },
436
+ {
437
+ "epoch": 23.39,
438
+ "eval_loss": 0.3213385045528412,
439
+ "eval_runtime": 96.5991,
440
+ "eval_samples_per_second": 19.089,
441
+ "eval_steps_per_second": 2.391,
442
+ "eval_wer": 0.41879545030766363,
443
+ "step": 2900
444
+ },
445
+ {
446
+ "epoch": 24.19,
447
+ "learning_rate": 0.0001,
448
+ "loss": 0.4324,
449
+ "step": 3000
450
+ },
451
+ {
452
+ "epoch": 24.19,
453
+ "eval_loss": 0.3011772036552429,
454
+ "eval_runtime": 97.1915,
455
+ "eval_samples_per_second": 18.973,
456
+ "eval_steps_per_second": 2.377,
457
+ "eval_wer": 0.4236434831251165,
458
+ "step": 3000
459
+ },
460
+ {
461
+ "epoch": 25.0,
462
+ "learning_rate": 0.0001,
463
+ "loss": 0.4246,
464
+ "step": 3100
465
+ },
466
+ {
467
+ "epoch": 25.0,
468
+ "eval_loss": 0.30194172263145447,
469
+ "eval_runtime": 100.0292,
470
+ "eval_samples_per_second": 18.435,
471
+ "eval_steps_per_second": 2.309,
472
+ "eval_wer": 0.4165578966996084,
473
+ "step": 3100
474
+ },
475
+ {
476
+ "epoch": 25.81,
477
+ "learning_rate": 0.0001,
478
+ "loss": 0.4132,
479
+ "step": 3200
480
+ },
481
+ {
482
+ "epoch": 25.81,
483
+ "eval_loss": 0.299437552690506,
484
+ "eval_runtime": 97.7952,
485
+ "eval_samples_per_second": 18.856,
486
+ "eval_steps_per_second": 2.362,
487
+ "eval_wer": 0.4160917396979303,
488
+ "step": 3200
489
+ },
490
+ {
491
+ "epoch": 26.61,
492
+ "learning_rate": 0.0001,
493
+ "loss": 0.4074,
494
+ "step": 3300
495
+ },
496
+ {
497
+ "epoch": 26.61,
498
+ "eval_loss": 0.31787610054016113,
499
+ "eval_runtime": 97.3925,
500
+ "eval_samples_per_second": 18.934,
501
+ "eval_steps_per_second": 2.372,
502
+ "eval_wer": 0.41795636770464295,
503
+ "step": 3300
504
+ },
505
+ {
506
+ "epoch": 27.42,
507
+ "learning_rate": 0.0001,
508
+ "loss": 0.4031,
509
+ "step": 3400
510
+ },
511
+ {
512
+ "epoch": 27.42,
513
+ "eval_loss": 0.296748548746109,
514
+ "eval_runtime": 101.7762,
515
+ "eval_samples_per_second": 18.118,
516
+ "eval_steps_per_second": 2.27,
517
+ "eval_wer": 0.40760768226738764,
518
+ "step": 3400
519
+ },
520
+ {
521
+ "epoch": 28.23,
522
+ "learning_rate": 0.0001,
523
+ "loss": 0.4082,
524
+ "step": 3500
525
+ },
526
+ {
527
+ "epoch": 28.23,
528
+ "eval_loss": 0.31329259276390076,
529
+ "eval_runtime": 102.7944,
530
+ "eval_samples_per_second": 17.939,
531
+ "eval_steps_per_second": 2.247,
532
+ "eval_wer": 0.41310833488719,
533
+ "step": 3500
534
+ },
535
+ {
536
+ "epoch": 29.03,
537
+ "learning_rate": 0.0001,
538
+ "loss": 0.3912,
539
+ "step": 3600
540
+ },
541
+ {
542
+ "epoch": 29.03,
543
+ "eval_loss": 0.3259478509426117,
544
+ "eval_runtime": 98.5616,
545
+ "eval_samples_per_second": 18.709,
546
+ "eval_steps_per_second": 2.344,
547
+ "eval_wer": 0.41133693828081297,
548
+ "step": 3600
549
+ },
550
+ {
551
+ "epoch": 29.84,
552
+ "learning_rate": 0.0001,
553
+ "loss": 0.3922,
554
+ "step": 3700
555
+ },
556
+ {
557
+ "epoch": 29.84,
558
+ "eval_loss": 0.32406386733055115,
559
+ "eval_runtime": 96.8005,
560
+ "eval_samples_per_second": 19.049,
561
+ "eval_steps_per_second": 2.386,
562
+ "eval_wer": 0.41226925228416933,
563
+ "step": 3700
564
+ },
565
+ {
566
+ "epoch": 30.65,
567
+ "learning_rate": 9.795698924731184e-05,
568
+ "loss": 0.3851,
569
+ "step": 3800
570
+ },
571
+ {
572
+ "epoch": 30.65,
573
+ "eval_loss": 0.29852330684661865,
574
+ "eval_runtime": 105.0804,
575
+ "eval_samples_per_second": 17.548,
576
+ "eval_steps_per_second": 2.198,
577
+ "eval_wer": 0.41133693828081297,
578
+ "step": 3800
579
+ },
580
+ {
581
+ "epoch": 31.45,
582
+ "learning_rate": 9.526881720430108e-05,
583
+ "loss": 0.3768,
584
+ "step": 3900
585
+ },
586
+ {
587
+ "epoch": 31.45,
588
+ "eval_loss": 0.3223101794719696,
589
+ "eval_runtime": 97.6458,
590
+ "eval_samples_per_second": 18.885,
591
+ "eval_steps_per_second": 2.366,
592
+ "eval_wer": 0.41040462427745666,
593
+ "step": 3900
594
+ },
595
+ {
596
+ "epoch": 32.26,
597
+ "learning_rate": 9.258064516129033e-05,
598
+ "loss": 0.3728,
599
+ "step": 4000
600
+ },
601
+ {
602
+ "epoch": 32.26,
603
+ "eval_loss": 0.3212699890136719,
604
+ "eval_runtime": 100.4336,
605
+ "eval_samples_per_second": 18.36,
606
+ "eval_steps_per_second": 2.3,
607
+ "eval_wer": 0.40770091366772326,
608
+ "step": 4000
609
+ },
610
+ {
611
+ "epoch": 33.06,
612
+ "learning_rate": 8.989247311827958e-05,
613
+ "loss": 0.3646,
614
+ "step": 4100
615
+ },
616
+ {
617
+ "epoch": 33.06,
618
+ "eval_loss": 0.31770506501197815,
619
+ "eval_runtime": 97.2462,
620
+ "eval_samples_per_second": 18.962,
621
+ "eval_steps_per_second": 2.375,
622
+ "eval_wer": 0.4116166324818199,
623
+ "step": 4100
624
+ },
625
+ {
626
+ "epoch": 33.87,
627
+ "learning_rate": 8.720430107526883e-05,
628
+ "loss": 0.3681,
629
+ "step": 4200
630
+ },
631
+ {
632
+ "epoch": 33.87,
633
+ "eval_loss": 0.3010982871055603,
634
+ "eval_runtime": 99.148,
635
+ "eval_samples_per_second": 18.598,
636
+ "eval_steps_per_second": 2.33,
637
+ "eval_wer": 0.4105910870781279,
638
+ "step": 4200
639
+ },
640
+ {
641
+ "epoch": 34.68,
642
+ "learning_rate": 8.451612903225808e-05,
643
+ "loss": 0.3634,
644
+ "step": 4300
645
+ },
646
+ {
647
+ "epoch": 34.68,
648
+ "eval_loss": 0.3075733780860901,
649
+ "eval_runtime": 99.8799,
650
+ "eval_samples_per_second": 18.462,
651
+ "eval_steps_per_second": 2.313,
652
+ "eval_wer": 0.4069550624650382,
653
+ "step": 4300
654
+ },
655
+ {
656
+ "epoch": 35.48,
657
+ "learning_rate": 8.182795698924732e-05,
658
+ "loss": 0.3471,
659
+ "step": 4400
660
+ },
661
+ {
662
+ "epoch": 35.48,
663
+ "eval_loss": 0.31304171681404114,
664
+ "eval_runtime": 99.6768,
665
+ "eval_samples_per_second": 18.5,
666
+ "eval_steps_per_second": 2.317,
667
+ "eval_wer": 0.40266641804959913,
668
+ "step": 4400
669
+ },
670
+ {
671
+ "epoch": 36.29,
672
+ "learning_rate": 7.913978494623657e-05,
673
+ "loss": 0.3398,
674
+ "step": 4500
675
+ },
676
+ {
677
+ "epoch": 36.29,
678
+ "eval_loss": 0.306076318025589,
679
+ "eval_runtime": 103.5936,
680
+ "eval_samples_per_second": 17.8,
681
+ "eval_steps_per_second": 2.23,
682
+ "eval_wer": 0.4037851948536267,
683
+ "step": 4500
684
+ },
685
+ {
686
+ "epoch": 37.1,
687
+ "learning_rate": 7.645161290322582e-05,
688
+ "loss": 0.3394,
689
+ "step": 4600
690
+ },
691
+ {
692
+ "epoch": 37.1,
693
+ "eval_loss": 0.31049424409866333,
694
+ "eval_runtime": 98.5424,
695
+ "eval_samples_per_second": 18.713,
696
+ "eval_steps_per_second": 2.344,
697
+ "eval_wer": 0.4037851948536267,
698
+ "step": 4600
699
+ },
700
+ {
701
+ "epoch": 37.9,
702
+ "learning_rate": 7.376344086021507e-05,
703
+ "loss": 0.331,
704
+ "step": 4700
705
+ },
706
+ {
707
+ "epoch": 37.9,
708
+ "eval_loss": 0.3191221058368683,
709
+ "eval_runtime": 102.0886,
710
+ "eval_samples_per_second": 18.063,
711
+ "eval_steps_per_second": 2.263,
712
+ "eval_wer": 0.3992168562371807,
713
+ "step": 4700
714
+ },
715
+ {
716
+ "epoch": 38.71,
717
+ "learning_rate": 7.10752688172043e-05,
718
+ "loss": 0.3221,
719
+ "step": 4800
720
+ },
721
+ {
722
+ "epoch": 38.71,
723
+ "eval_loss": 0.3119024336338043,
724
+ "eval_runtime": 99.689,
725
+ "eval_samples_per_second": 18.498,
726
+ "eval_steps_per_second": 2.317,
727
+ "eval_wer": 0.39958978183852323,
728
+ "step": 4800
729
+ },
730
+ {
731
+ "epoch": 39.52,
732
+ "learning_rate": 6.838709677419355e-05,
733
+ "loss": 0.3207,
734
+ "step": 4900
735
+ },
736
+ {
737
+ "epoch": 39.52,
738
+ "eval_loss": 0.3276441991329193,
739
+ "eval_runtime": 96.7884,
740
+ "eval_samples_per_second": 19.052,
741
+ "eval_steps_per_second": 2.387,
742
+ "eval_wer": 0.40313257505127725,
743
+ "step": 4900
744
+ },
745
+ {
746
+ "epoch": 40.32,
747
+ "learning_rate": 6.56989247311828e-05,
748
+ "loss": 0.3186,
749
+ "step": 5000
750
+ },
751
+ {
752
+ "epoch": 40.32,
753
+ "eval_loss": 0.3335840106010437,
754
+ "eval_runtime": 98.1942,
755
+ "eval_samples_per_second": 18.779,
756
+ "eval_steps_per_second": 2.352,
757
+ "eval_wer": 0.4032258064516129,
758
+ "step": 5000
759
+ },
760
+ {
761
+ "epoch": 41.13,
762
+ "learning_rate": 6.301075268817205e-05,
763
+ "loss": 0.3152,
764
+ "step": 5100
765
+ },
766
+ {
767
+ "epoch": 41.13,
768
+ "eval_loss": 0.3129470944404602,
769
+ "eval_runtime": 99.396,
770
+ "eval_samples_per_second": 18.552,
771
+ "eval_steps_per_second": 2.324,
772
+ "eval_wer": 0.3941823606190565,
773
+ "step": 5100
774
+ },
775
+ {
776
+ "epoch": 41.94,
777
+ "learning_rate": 6.0322580645161295e-05,
778
+ "loss": 0.3093,
779
+ "step": 5200
780
+ },
781
+ {
782
+ "epoch": 41.94,
783
+ "eval_loss": 0.31502029299736023,
784
+ "eval_runtime": 97.1686,
785
+ "eval_samples_per_second": 18.977,
786
+ "eval_steps_per_second": 2.377,
787
+ "eval_wer": 0.3930635838150289,
788
+ "step": 5200
789
+ },
790
+ {
791
+ "epoch": 42.74,
792
+ "learning_rate": 5.763440860215054e-05,
793
+ "loss": 0.2949,
794
+ "step": 5300
795
+ },
796
+ {
797
+ "epoch": 42.74,
798
+ "eval_loss": 0.3235335052013397,
799
+ "eval_runtime": 102.899,
800
+ "eval_samples_per_second": 17.92,
801
+ "eval_steps_per_second": 2.245,
802
+ "eval_wer": 0.3953011374230841,
803
+ "step": 5300
804
+ },
805
+ {
806
+ "epoch": 43.55,
807
+ "learning_rate": 5.494623655913979e-05,
808
+ "loss": 0.3013,
809
+ "step": 5400
810
+ },
811
+ {
812
+ "epoch": 43.55,
813
+ "eval_loss": 0.3291964530944824,
814
+ "eval_runtime": 97.4704,
815
+ "eval_samples_per_second": 18.919,
816
+ "eval_steps_per_second": 2.37,
817
+ "eval_wer": 0.3934365094163714,
818
+ "step": 5400
819
+ },
820
+ {
821
+ "epoch": 44.35,
822
+ "learning_rate": 5.225806451612903e-05,
823
+ "loss": 0.3092,
824
+ "step": 5500
825
+ },
826
+ {
827
+ "epoch": 44.35,
828
+ "eval_loss": 0.3066520392894745,
829
+ "eval_runtime": 96.9166,
830
+ "eval_samples_per_second": 19.027,
831
+ "eval_steps_per_second": 2.383,
832
+ "eval_wer": 0.38616446019019207,
833
+ "step": 5500
834
+ },
835
+ {
836
+ "epoch": 45.16,
837
+ "learning_rate": 4.956989247311828e-05,
838
+ "loss": 0.2938,
839
+ "step": 5600
840
+ },
841
+ {
842
+ "epoch": 45.16,
843
+ "eval_loss": 0.32365530729293823,
844
+ "eval_runtime": 98.8953,
845
+ "eval_samples_per_second": 18.646,
846
+ "eval_steps_per_second": 2.336,
847
+ "eval_wer": 0.38756293119522656,
848
+ "step": 5600
849
+ },
850
+ {
851
+ "epoch": 45.97,
852
+ "learning_rate": 4.688172043010753e-05,
853
+ "loss": 0.2985,
854
+ "step": 5700
855
+ },
856
+ {
857
+ "epoch": 45.97,
858
+ "eval_loss": 0.3358381390571594,
859
+ "eval_runtime": 96.7245,
860
+ "eval_samples_per_second": 19.064,
861
+ "eval_steps_per_second": 2.388,
862
+ "eval_wer": 0.3876561625955622,
863
+ "step": 5700
864
+ },
865
+ {
866
+ "epoch": 46.77,
867
+ "learning_rate": 4.4193548387096775e-05,
868
+ "loss": 0.2943,
869
+ "step": 5800
870
+ },
871
+ {
872
+ "epoch": 46.77,
873
+ "eval_loss": 0.3283197283744812,
874
+ "eval_runtime": 98.5499,
875
+ "eval_samples_per_second": 18.711,
876
+ "eval_steps_per_second": 2.344,
877
+ "eval_wer": 0.3928771210143576,
878
+ "step": 5800
879
+ },
880
+ {
881
+ "epoch": 47.58,
882
+ "learning_rate": 4.1505376344086025e-05,
883
+ "loss": 0.2754,
884
+ "step": 5900
885
+ },
886
+ {
887
+ "epoch": 47.58,
888
+ "eval_loss": 0.33124613761901855,
889
+ "eval_runtime": 97.2557,
890
+ "eval_samples_per_second": 18.96,
891
+ "eval_steps_per_second": 2.375,
892
+ "eval_wer": 0.38952079060227485,
893
+ "step": 5900
894
+ },
895
+ {
896
+ "epoch": 48.39,
897
+ "learning_rate": 3.881720430107527e-05,
898
+ "loss": 0.2906,
899
+ "step": 6000
900
+ },
901
+ {
902
+ "epoch": 48.39,
903
+ "eval_loss": 0.33012691140174866,
904
+ "eval_runtime": 99.7418,
905
+ "eval_samples_per_second": 18.488,
906
+ "eval_steps_per_second": 2.316,
907
+ "eval_wer": 0.39045310460563115,
908
+ "step": 6000
909
+ },
910
+ {
911
+ "epoch": 49.19,
912
+ "learning_rate": 3.612903225806452e-05,
913
+ "loss": 0.2751,
914
+ "step": 6100
915
+ },
916
+ {
917
+ "epoch": 49.19,
918
+ "eval_loss": 0.3261188566684723,
919
+ "eval_runtime": 98.7276,
920
+ "eval_samples_per_second": 18.678,
921
+ "eval_steps_per_second": 2.34,
922
+ "eval_wer": 0.3873764683945553,
923
+ "step": 6100
924
+ },
925
+ {
926
+ "epoch": 50.0,
927
+ "learning_rate": 3.344086021505377e-05,
928
+ "loss": 0.2825,
929
+ "step": 6200
930
+ },
931
+ {
932
+ "epoch": 50.0,
933
+ "eval_loss": 0.3322080671787262,
934
+ "eval_runtime": 97.0188,
935
+ "eval_samples_per_second": 19.007,
936
+ "eval_steps_per_second": 2.381,
937
+ "eval_wer": 0.38700354279321275,
938
+ "step": 6200
939
+ },
940
+ {
941
+ "epoch": 50.81,
942
+ "learning_rate": 3.075268817204301e-05,
943
+ "loss": 0.2727,
944
+ "step": 6300
945
+ },
946
+ {
947
+ "epoch": 50.81,
948
+ "eval_loss": 0.3448057770729065,
949
+ "eval_runtime": 98.9754,
950
+ "eval_samples_per_second": 18.631,
951
+ "eval_steps_per_second": 2.334,
952
+ "eval_wer": 0.3928771210143576,
953
+ "step": 6300
954
+ },
955
+ {
956
+ "epoch": 51.61,
957
+ "learning_rate": 2.806451612903226e-05,
958
+ "loss": 0.2896,
959
+ "step": 6400
960
+ },
961
+ {
962
+ "epoch": 51.61,
963
+ "eval_loss": 0.33276790380477905,
964
+ "eval_runtime": 96.9108,
965
+ "eval_samples_per_second": 19.028,
966
+ "eval_steps_per_second": 2.384,
967
+ "eval_wer": 0.38858847659891854,
968
+ "step": 6400
969
+ },
970
+ {
971
+ "epoch": 52.42,
972
+ "learning_rate": 2.537634408602151e-05,
973
+ "loss": 0.2753,
974
+ "step": 6500
975
+ },
976
+ {
977
+ "epoch": 52.42,
978
+ "eval_loss": 0.32856670022010803,
979
+ "eval_runtime": 105.68,
980
+ "eval_samples_per_second": 17.449,
981
+ "eval_steps_per_second": 2.186,
982
+ "eval_wer": 0.3876561625955622,
983
+ "step": 6500
984
+ },
985
+ {
986
+ "epoch": 53.23,
987
+ "learning_rate": 2.268817204301075e-05,
988
+ "loss": 0.271,
989
+ "step": 6600
990
+ },
991
+ {
992
+ "epoch": 53.23,
993
+ "eval_loss": 0.3287724256515503,
994
+ "eval_runtime": 100.7427,
995
+ "eval_samples_per_second": 18.304,
996
+ "eval_steps_per_second": 2.293,
997
+ "eval_wer": 0.3876561625955622,
998
+ "step": 6600
999
+ },
1000
+ {
1001
+ "epoch": 54.03,
1002
+ "learning_rate": 2e-05,
1003
+ "loss": 0.2664,
1004
+ "step": 6700
1005
+ },
1006
+ {
1007
+ "epoch": 54.03,
1008
+ "eval_loss": 0.3290824890136719,
1009
+ "eval_runtime": 98.5577,
1010
+ "eval_samples_per_second": 18.71,
1011
+ "eval_steps_per_second": 2.344,
1012
+ "eval_wer": 0.3893343278016036,
1013
+ "step": 6700
1014
+ },
1015
+ {
1016
+ "epoch": 54.84,
1017
+ "learning_rate": 1.7311827956989248e-05,
1018
+ "loss": 0.2491,
1019
+ "step": 6800
1020
+ },
1021
+ {
1022
+ "epoch": 54.84,
1023
+ "eval_loss": 0.33568552136421204,
1024
+ "eval_runtime": 103.2565,
1025
+ "eval_samples_per_second": 17.858,
1026
+ "eval_steps_per_second": 2.237,
1027
+ "eval_wer": 0.388215550997576,
1028
+ "step": 6800
1029
+ },
1030
+ {
1031
+ "epoch": 55.65,
1032
+ "learning_rate": 1.4623655913978495e-05,
1033
+ "loss": 0.2569,
1034
+ "step": 6900
1035
+ },
1036
+ {
1037
+ "epoch": 55.65,
1038
+ "eval_loss": 0.3253258466720581,
1039
+ "eval_runtime": 104.0214,
1040
+ "eval_samples_per_second": 17.727,
1041
+ "eval_steps_per_second": 2.221,
1042
+ "eval_wer": 0.3876561625955622,
1043
+ "step": 6900
1044
+ },
1045
+ {
1046
+ "epoch": 56.45,
1047
+ "learning_rate": 1.1935483870967743e-05,
1048
+ "loss": 0.2568,
1049
+ "step": 7000
1050
+ },
1051
+ {
1052
+ "epoch": 56.45,
1053
+ "eval_loss": 0.3199854791164398,
1054
+ "eval_runtime": 103.944,
1055
+ "eval_samples_per_second": 17.74,
1056
+ "eval_steps_per_second": 2.222,
1057
+ "eval_wer": 0.3868170799925415,
1058
+ "step": 7000
1059
+ },
1060
+ {
1061
+ "epoch": 57.26,
1062
+ "learning_rate": 9.24731182795699e-06,
1063
+ "loss": 0.2668,
1064
+ "step": 7100
1065
+ },
1066
+ {
1067
+ "epoch": 57.26,
1068
+ "eval_loss": 0.32368558645248413,
1069
+ "eval_runtime": 103.5633,
1070
+ "eval_samples_per_second": 17.806,
1071
+ "eval_steps_per_second": 2.231,
1072
+ "eval_wer": 0.3862576915905277,
1073
+ "step": 7100
1074
+ },
1075
+ {
1076
+ "epoch": 58.06,
1077
+ "learning_rate": 6.586021505376344e-06,
1078
+ "loss": 0.253,
1079
+ "step": 7200
1080
+ },
1081
+ {
1082
+ "epoch": 58.06,
1083
+ "eval_loss": 0.3248105049133301,
1084
+ "eval_runtime": 103.7822,
1085
+ "eval_samples_per_second": 17.768,
1086
+ "eval_steps_per_second": 2.226,
1087
+ "eval_wer": 0.38504568338616446,
1088
+ "step": 7200
1089
+ },
1090
+ {
1091
+ "epoch": 58.87,
1092
+ "learning_rate": 3.8978494623655915e-06,
1093
+ "loss": 0.2621,
1094
+ "step": 7300
1095
+ },
1096
+ {
1097
+ "epoch": 58.87,
1098
+ "eval_loss": 0.3238443434238434,
1099
+ "eval_runtime": 97.9464,
1100
+ "eval_samples_per_second": 18.827,
1101
+ "eval_steps_per_second": 2.358,
1102
+ "eval_wer": 0.38448629498415066,
1103
+ "step": 7300
1104
+ },
1105
+ {
1106
+ "epoch": 59.68,
1107
+ "learning_rate": 1.2096774193548388e-06,
1108
+ "loss": 0.2571,
1109
+ "step": 7400
1110
+ },
1111
+ {
1112
+ "epoch": 59.68,
1113
+ "eval_loss": 0.3273279666900635,
1114
+ "eval_runtime": 101.5169,
1115
+ "eval_samples_per_second": 18.164,
1116
+ "eval_steps_per_second": 2.275,
1117
+ "eval_wer": 0.3852321461868357,
1118
+ "step": 7400
1119
+ },
1120
+ {
1121
+ "epoch": 60.0,
1122
+ "step": 7440,
1123
+ "total_flos": 2.7388066347434852e+19,
1124
+ "train_loss": 0.8272683016715511,
1125
+ "train_runtime": 19767.0967,
1126
+ "train_samples_per_second": 12.035,
1127
+ "train_steps_per_second": 0.376
1128
+ }
1129
+ ],
1130
+ "max_steps": 7440,
1131
+ "num_train_epochs": 60,
1132
+ "total_flos": 2.7388066347434852e+19,
1133
+ "trial_name": null,
1134
+ "trial_params": null
1135
+ }
vocab.json CHANGED
@@ -1 +1 @@
1
- {"f": 0, "o": 1, "R": 2, "r": 3, "C": 4, "k": 5, "\uff0c": 6, "H": 7, "s": 8, "A": 9, "I": 10, "T": 11, "?": 12, ",": 13, "z": 14, "u": 15, ":": 16, "t": 17, "n": 18, "j": 19, "y": 20, "Z": 21, ".": 22, "Y": 23, "K": 24, "-": 25, "a": 26, "O": 27, "F": 28, "M": 29, "S": 30, "G": 31, "D": 32, "U": 33, "W": 34, "c": 35, "L": 36, "E": 37, "m": 39, "J": 40, "l": 41, "b": 42, "\u00e9": 43, "!": 44, "v": 45, "N": 46, "B": 47, "P": 48, "w": 49, "V": 50, "'": 51, "\uff01": 52, "p": 53, "g": 54, "i": 55, "d": 56, "x": 57, "e": 58, "\"": 59, "h": 60, "|": 38, "[UNK]": 61, "[PAD]": 62}
 
1
+ {"y": 0, "h": 1, "c": 2, "d": 3, "r": 4, "t": 5, "é": 6, "n": 8, "": 9, "x": 10, "v": 11, "p": 12, "z": 13, "l": 14, "j": 15, "m": 16, "b": 17, "i": 18, "e": 19, "g": 20, "a": 21, "": 22, "u": 23, "f": 24, "s": 25, "k": 26, "o": 27, "w": 28, "|": 7, "[UNK]": 29, "[PAD]": 30}