yuweiiizz commited on
Commit
270b8f3
·
verified ·
1 Parent(s): 6fb62a0

Training in progress, step 8000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4aa66ee572d638da161b0876a24a0495b141e6a283ebe22e54c7bb4b5cecc85d
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6272cdab34ad742ff5d8928b1a3b73208418c7bfce7d87a0db8c4712d83527d
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3095de81d8b920e91d85b8e2ee04f326ef5aa3917fee9ca74a8d0a152e8b3447
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7de0ef00f4d8d9f4fa9b0e18f25384399f28c54b520f62c3dd31eca12ff60e2
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ada5f6f7cb1b6a49d79d11cd5642321498733c76d6eb8ca5030fe74fa4bc331
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bcd75decc8ec809bdd000c1a023eecd569d9a9775fe640822926fa2ab60021b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e217ad856cdf0ac7c67db1e21b2cb21b2f44c6c7063ade06b9a1720236888449
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64cb8f98c15d4c048d873b0e5f521cf693b6283121ee23f325cb6b6e1b684e58
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 47.53661784287617,
3
  "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-7000",
4
- "epoch": 2.8,
5
  "eval_steps": 1000,
6
- "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2030,6 +2030,295 @@
2030
  "eval_samples_per_second": 2.27,
2031
  "eval_steps_per_second": 0.284,
2032
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2033
  }
2034
  ],
2035
  "logging_steps": 25,
@@ -2037,7 +2326,7 @@
2037
  "num_input_tokens_seen": 0,
2038
  "num_train_epochs": 4,
2039
  "save_steps": 1000,
2040
- "total_flos": 3.232156483584e+19,
2041
  "train_batch_size": 8,
2042
  "trial_name": null,
2043
  "trial_params": null
 
1
  {
2
  "best_metric": 47.53661784287617,
3
  "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-7000",
4
+ "epoch": 3.2,
5
  "eval_steps": 1000,
6
+ "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2030
  "eval_samples_per_second": 2.27,
2031
  "eval_steps_per_second": 0.284,
2032
  "step": 7000
2033
+ },
2034
+ {
2035
+ "epoch": 2.81,
2036
+ "grad_norm": 12.68954086303711,
2037
+ "learning_rate": 2.438271604938272e-06,
2038
+ "loss": 0.6009,
2039
+ "step": 7025
2040
+ },
2041
+ {
2042
+ "epoch": 2.82,
2043
+ "grad_norm": 10.792320251464844,
2044
+ "learning_rate": 2.4074074074074075e-06,
2045
+ "loss": 0.5913,
2046
+ "step": 7050
2047
+ },
2048
+ {
2049
+ "epoch": 2.83,
2050
+ "grad_norm": 10.7244291305542,
2051
+ "learning_rate": 2.3765432098765435e-06,
2052
+ "loss": 0.5993,
2053
+ "step": 7075
2054
+ },
2055
+ {
2056
+ "epoch": 2.84,
2057
+ "grad_norm": 10.002340316772461,
2058
+ "learning_rate": 2.345679012345679e-06,
2059
+ "loss": 0.5822,
2060
+ "step": 7100
2061
+ },
2062
+ {
2063
+ "epoch": 2.85,
2064
+ "grad_norm": 9.782127380371094,
2065
+ "learning_rate": 2.314814814814815e-06,
2066
+ "loss": 0.6265,
2067
+ "step": 7125
2068
+ },
2069
+ {
2070
+ "epoch": 2.86,
2071
+ "grad_norm": 11.622259140014648,
2072
+ "learning_rate": 2.283950617283951e-06,
2073
+ "loss": 0.6251,
2074
+ "step": 7150
2075
+ },
2076
+ {
2077
+ "epoch": 2.87,
2078
+ "grad_norm": 9.750394821166992,
2079
+ "learning_rate": 2.2530864197530865e-06,
2080
+ "loss": 0.5412,
2081
+ "step": 7175
2082
+ },
2083
+ {
2084
+ "epoch": 2.88,
2085
+ "grad_norm": 11.742593765258789,
2086
+ "learning_rate": 2.222222222222222e-06,
2087
+ "loss": 0.6171,
2088
+ "step": 7200
2089
+ },
2090
+ {
2091
+ "epoch": 2.89,
2092
+ "grad_norm": 11.124135971069336,
2093
+ "learning_rate": 2.191358024691358e-06,
2094
+ "loss": 0.555,
2095
+ "step": 7225
2096
+ },
2097
+ {
2098
+ "epoch": 2.9,
2099
+ "grad_norm": 13.9995756149292,
2100
+ "learning_rate": 2.1604938271604937e-06,
2101
+ "loss": 0.63,
2102
+ "step": 7250
2103
+ },
2104
+ {
2105
+ "epoch": 2.91,
2106
+ "grad_norm": 10.930980682373047,
2107
+ "learning_rate": 2.1296296296296298e-06,
2108
+ "loss": 0.565,
2109
+ "step": 7275
2110
+ },
2111
+ {
2112
+ "epoch": 2.92,
2113
+ "grad_norm": 10.784317970275879,
2114
+ "learning_rate": 2.0987654320987654e-06,
2115
+ "loss": 0.5634,
2116
+ "step": 7300
2117
+ },
2118
+ {
2119
+ "epoch": 2.93,
2120
+ "grad_norm": 14.178128242492676,
2121
+ "learning_rate": 2.0679012345679015e-06,
2122
+ "loss": 0.5464,
2123
+ "step": 7325
2124
+ },
2125
+ {
2126
+ "epoch": 2.94,
2127
+ "grad_norm": 14.49055004119873,
2128
+ "learning_rate": 2.037037037037037e-06,
2129
+ "loss": 0.6182,
2130
+ "step": 7350
2131
+ },
2132
+ {
2133
+ "epoch": 2.95,
2134
+ "grad_norm": 9.5054349899292,
2135
+ "learning_rate": 2.006172839506173e-06,
2136
+ "loss": 0.6202,
2137
+ "step": 7375
2138
+ },
2139
+ {
2140
+ "epoch": 2.96,
2141
+ "grad_norm": 11.535719871520996,
2142
+ "learning_rate": 1.9753086419753087e-06,
2143
+ "loss": 0.5973,
2144
+ "step": 7400
2145
+ },
2146
+ {
2147
+ "epoch": 2.9699999999999998,
2148
+ "grad_norm": 12.069352149963379,
2149
+ "learning_rate": 1.944444444444445e-06,
2150
+ "loss": 0.5828,
2151
+ "step": 7425
2152
+ },
2153
+ {
2154
+ "epoch": 2.98,
2155
+ "grad_norm": 10.448741912841797,
2156
+ "learning_rate": 1.9135802469135804e-06,
2157
+ "loss": 0.5961,
2158
+ "step": 7450
2159
+ },
2160
+ {
2161
+ "epoch": 2.99,
2162
+ "grad_norm": 9.852815628051758,
2163
+ "learning_rate": 1.8827160493827162e-06,
2164
+ "loss": 0.6516,
2165
+ "step": 7475
2166
+ },
2167
+ {
2168
+ "epoch": 3.0,
2169
+ "grad_norm": 15.387414932250977,
2170
+ "learning_rate": 1.8518518518518519e-06,
2171
+ "loss": 0.5692,
2172
+ "step": 7500
2173
+ },
2174
+ {
2175
+ "epoch": 3.01,
2176
+ "grad_norm": 7.3440070152282715,
2177
+ "learning_rate": 1.820987654320988e-06,
2178
+ "loss": 0.4318,
2179
+ "step": 7525
2180
+ },
2181
+ {
2182
+ "epoch": 3.02,
2183
+ "grad_norm": 7.697348594665527,
2184
+ "learning_rate": 1.7901234567901235e-06,
2185
+ "loss": 0.457,
2186
+ "step": 7550
2187
+ },
2188
+ {
2189
+ "epoch": 3.03,
2190
+ "grad_norm": 11.595518112182617,
2191
+ "learning_rate": 1.7592592592592594e-06,
2192
+ "loss": 0.4364,
2193
+ "step": 7575
2194
+ },
2195
+ {
2196
+ "epoch": 3.04,
2197
+ "grad_norm": 10.860459327697754,
2198
+ "learning_rate": 1.7283950617283952e-06,
2199
+ "loss": 0.4025,
2200
+ "step": 7600
2201
+ },
2202
+ {
2203
+ "epoch": 3.05,
2204
+ "grad_norm": 8.72067642211914,
2205
+ "learning_rate": 1.697530864197531e-06,
2206
+ "loss": 0.4296,
2207
+ "step": 7625
2208
+ },
2209
+ {
2210
+ "epoch": 3.06,
2211
+ "grad_norm": 9.136699676513672,
2212
+ "learning_rate": 1.6666666666666667e-06,
2213
+ "loss": 0.4182,
2214
+ "step": 7650
2215
+ },
2216
+ {
2217
+ "epoch": 3.07,
2218
+ "grad_norm": 9.509159088134766,
2219
+ "learning_rate": 1.6358024691358027e-06,
2220
+ "loss": 0.4468,
2221
+ "step": 7675
2222
+ },
2223
+ {
2224
+ "epoch": 3.08,
2225
+ "grad_norm": 9.242144584655762,
2226
+ "learning_rate": 1.6049382716049383e-06,
2227
+ "loss": 0.4146,
2228
+ "step": 7700
2229
+ },
2230
+ {
2231
+ "epoch": 3.09,
2232
+ "grad_norm": 9.915699005126953,
2233
+ "learning_rate": 1.5740740740740742e-06,
2234
+ "loss": 0.4493,
2235
+ "step": 7725
2236
+ },
2237
+ {
2238
+ "epoch": 3.1,
2239
+ "grad_norm": 9.449831008911133,
2240
+ "learning_rate": 1.54320987654321e-06,
2241
+ "loss": 0.4995,
2242
+ "step": 7750
2243
+ },
2244
+ {
2245
+ "epoch": 3.11,
2246
+ "grad_norm": 10.038348197937012,
2247
+ "learning_rate": 1.5123456790123458e-06,
2248
+ "loss": 0.4491,
2249
+ "step": 7775
2250
+ },
2251
+ {
2252
+ "epoch": 3.12,
2253
+ "grad_norm": 10.652295112609863,
2254
+ "learning_rate": 1.4814814814814815e-06,
2255
+ "loss": 0.4216,
2256
+ "step": 7800
2257
+ },
2258
+ {
2259
+ "epoch": 3.13,
2260
+ "grad_norm": 13.189153671264648,
2261
+ "learning_rate": 1.4506172839506175e-06,
2262
+ "loss": 0.4502,
2263
+ "step": 7825
2264
+ },
2265
+ {
2266
+ "epoch": 3.14,
2267
+ "grad_norm": 9.493179321289062,
2268
+ "learning_rate": 1.4197530864197531e-06,
2269
+ "loss": 0.4084,
2270
+ "step": 7850
2271
+ },
2272
+ {
2273
+ "epoch": 3.15,
2274
+ "grad_norm": 8.161423683166504,
2275
+ "learning_rate": 1.3888888888888892e-06,
2276
+ "loss": 0.441,
2277
+ "step": 7875
2278
+ },
2279
+ {
2280
+ "epoch": 3.16,
2281
+ "grad_norm": 8.957440376281738,
2282
+ "learning_rate": 1.3580246913580248e-06,
2283
+ "loss": 0.3998,
2284
+ "step": 7900
2285
+ },
2286
+ {
2287
+ "epoch": 3.17,
2288
+ "grad_norm": 10.819154739379883,
2289
+ "learning_rate": 1.3271604938271606e-06,
2290
+ "loss": 0.3827,
2291
+ "step": 7925
2292
+ },
2293
+ {
2294
+ "epoch": 3.18,
2295
+ "grad_norm": 8.5838041305542,
2296
+ "learning_rate": 1.2962962962962962e-06,
2297
+ "loss": 0.4418,
2298
+ "step": 7950
2299
+ },
2300
+ {
2301
+ "epoch": 3.19,
2302
+ "grad_norm": 9.781057357788086,
2303
+ "learning_rate": 1.2654320987654323e-06,
2304
+ "loss": 0.4375,
2305
+ "step": 7975
2306
+ },
2307
+ {
2308
+ "epoch": 3.2,
2309
+ "grad_norm": 9.840452194213867,
2310
+ "learning_rate": 1.234567901234568e-06,
2311
+ "loss": 0.3952,
2312
+ "step": 8000
2313
+ },
2314
+ {
2315
+ "epoch": 3.2,
2316
+ "eval_cer": 48.29247278138952,
2317
+ "eval_loss": 0.9025322794914246,
2318
+ "eval_runtime": 1733.8061,
2319
+ "eval_samples_per_second": 2.27,
2320
+ "eval_steps_per_second": 0.284,
2321
+ "step": 8000
2322
  }
2323
  ],
2324
  "logging_steps": 25,
 
2326
  "num_input_tokens_seen": 0,
2327
  "num_train_epochs": 4,
2328
  "save_steps": 1000,
2329
+ "total_flos": 3.693893124096e+19,
2330
  "train_batch_size": 8,
2331
  "trial_name": null,
2332
  "trial_params": null