nahidalam commited on
Commit
64ef9ab
·
1 Parent(s): b8bf4a2

initial 3k steps

Browse files
config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/shapla/.cache/huggingface/hub/models--nahidalam--so100_pickplace_small_20250322_193929/snapshots/6c62b5b510cdf908683f63ef3b48034e66ed4f59",
3
+ "action_dim": 32,
4
+ "action_head_cfg": {
5
+ "action_dim": 32,
6
+ "action_horizon": 16,
7
+ "add_pos_embed": true,
8
+ "diffusion_model_cfg": {
9
+ "attention_head_dim": 48,
10
+ "dropout": 0.2,
11
+ "final_dropout": true,
12
+ "interleave_self_attention": true,
13
+ "norm_type": "ada_norm",
14
+ "num_attention_heads": 32,
15
+ "num_layers": 16,
16
+ "output_dim": 1024,
17
+ "positional_embeddings": null
18
+ },
19
+ "freeze_decode_layer": false,
20
+ "hidden_size": 1024,
21
+ "input_embedding_dim": 1536,
22
+ "load_pretrained_det_decode_layer_path": null,
23
+ "max_action_dim": 32,
24
+ "max_state_dim": 64,
25
+ "model_dtype": "float32",
26
+ "noise_beta_alpha": 1.5,
27
+ "noise_beta_beta": 1.0,
28
+ "noise_s": 0.999,
29
+ "num_inference_timesteps": 16,
30
+ "num_timestep_buckets": 1000,
31
+ "tune_diffusion_model": true,
32
+ "tune_projector": true
33
+ },
34
+ "action_horizon": 16,
35
+ "architectures": [
36
+ "GR00T_N1"
37
+ ],
38
+ "attn_implementation": null,
39
+ "backbone_cfg": {
40
+ "allow_reshape_visual": true,
41
+ "load_pretrained_det_eagle_path": null,
42
+ "model_name": "$GR00T_BACKBONE_PATH/eagle2_hg_model",
43
+ "processor_cfg": {
44
+ "max_input_tiles": 1,
45
+ "model_path": "$GR00T_BACKBONE_PATH/eagle2_hg_model",
46
+ "model_spec": {
47
+ "num_image_token": 64,
48
+ "template": "qwen2-chat"
49
+ }
50
+ },
51
+ "projector_dim": 2048,
52
+ "remove_llm": false,
53
+ "reproject_vision": false,
54
+ "scale_image_resolution": 1,
55
+ "select_layer": 12,
56
+ "tune_llm": false,
57
+ "tune_visual": true
58
+ },
59
+ "compute_dtype": "bfloat16",
60
+ "hidden_size": 1536,
61
+ "model_dtype": "float32",
62
+ "model_type": "gr00t_n1",
63
+ "torch_dtype": "float32",
64
+ "transformers_version": "4.45.2"
65
+ }
experiment_cfg/metadata.json ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "statistics": {
4
+ "state": {
5
+ "main_arm": {
6
+ "max": [
7
+ 17.9296875,
8
+ 191.513671875,
9
+ 184.833984375,
10
+ 103.7109375,
11
+ 39.638671875
12
+ ],
13
+ "min": [
14
+ -47.63671875,
15
+ 42.275390625,
16
+ 43.76953125,
17
+ -16.69921875,
18
+ -86.484375
19
+ ],
20
+ "mean": [
21
+ -3.9514260292053223,
22
+ 115.6729507446289,
23
+ 119.81109619140625,
24
+ 59.654396057128906,
25
+ -7.58962345123291
26
+ ],
27
+ "std": [
28
+ 11.631893157958984,
29
+ 45.66527557373047,
30
+ 39.17742919921875,
31
+ 18.39881134033203,
32
+ 13.31604290008545
33
+ ],
34
+ "q01": [
35
+ -36.826171875,
36
+ 55.386914062500004,
37
+ 56.25,
38
+ 14.853515625,
39
+ -57.48046875
40
+ ],
41
+ "q99": [
42
+ 8.525390625,
43
+ 191.162109375,
44
+ 184.39453125,
45
+ 97.294921875,
46
+ 20.478515625
47
+ ]
48
+ },
49
+ "main_gripper": {
50
+ "max": [
51
+ 44.17647171020508
52
+ ],
53
+ "min": [
54
+ -0.05882352963089943
55
+ ],
56
+ "mean": [
57
+ 7.006686687469482
58
+ ],
59
+ "std": [
60
+ 9.721986770629883
61
+ ],
62
+ "q01": [
63
+ 0.05882352963089943
64
+ ],
65
+ "q99": [
66
+ 33.70588302612305
67
+ ]
68
+ },
69
+ "cv_arm": {
70
+ "max": [
71
+ 46.40625,
72
+ 181.7578125,
73
+ 159.43359375,
74
+ 98.4375,
75
+ 56.953125
76
+ ],
77
+ "min": [
78
+ -24.521484375,
79
+ 35.947265625,
80
+ 39.638671875,
81
+ -7.119140625,
82
+ -87.5390625
83
+ ],
84
+ "mean": [
85
+ -0.4299631416797638,
86
+ 113.0190200805664,
87
+ 108.52798461914062,
88
+ 62.34052658081055,
89
+ -5.02552604675293
90
+ ],
91
+ "std": [
92
+ 12.961642265319824,
93
+ 46.619747161865234,
94
+ 33.21040344238281,
95
+ 15.13332462310791,
96
+ 14.51511001586914
97
+ ],
98
+ "q01": [
99
+ -16.69921875,
100
+ 47.021484375,
101
+ 55.107421875,
102
+ 18.648632812500004,
103
+ -56.77734375
104
+ ],
105
+ "q99": [
106
+ 35.843554687499335,
107
+ 181.0546875,
108
+ 158.994140625,
109
+ 95.361328125,
110
+ 33.3984375
111
+ ]
112
+ },
113
+ "cv_gripper": {
114
+ "max": [
115
+ 47.11538314819336
116
+ ],
117
+ "min": [
118
+ -2.1291208267211914
119
+ ],
120
+ "mean": [
121
+ 5.908763408660889
122
+ ],
123
+ "std": [
124
+ 10.08362102508545
125
+ ],
126
+ "q01": [
127
+ -2.1291208267211914
128
+ ],
129
+ "q99": [
130
+ 34.409339904785156
131
+ ]
132
+ }
133
+ },
134
+ "action": {
135
+ "main_arm": {
136
+ "max": [
137
+ 15.64453125,
138
+ 191.865234375,
139
+ 193.53515625,
140
+ 103.974609375,
141
+ 39.814453125
142
+ ],
143
+ "min": [
144
+ -47.98828125,
145
+ 42.451171875,
146
+ 36.826171875,
147
+ -19.51171875,
148
+ -94.306640625
149
+ ],
150
+ "mean": [
151
+ -3.7247848510742188,
152
+ 117.95967102050781,
153
+ 118.52682495117188,
154
+ 59.04578399658203,
155
+ -7.619071006774902
156
+ ],
157
+ "std": [
158
+ 11.699190139770508,
159
+ 44.85472106933594,
160
+ 42.83469009399414,
161
+ 18.53948211669922,
162
+ 13.3831148147583
163
+ ],
164
+ "q01": [
165
+ -36.73828125,
166
+ 56.6015625,
167
+ 51.15234375,
168
+ 13.88671875,
169
+ -57.728320312499996
170
+ ],
171
+ "q99": [
172
+ 10.107421875,
173
+ 191.865234375,
174
+ 192.65625,
175
+ 97.20703125,
176
+ 20.56640625
177
+ ]
178
+ },
179
+ "main_gripper": {
180
+ "max": [
181
+ 45.640567779541016
182
+ ],
183
+ "min": [
184
+ -0.26690390706062317
185
+ ],
186
+ "mean": [
187
+ 6.632084369659424
188
+ ],
189
+ "std": [
190
+ 10.121220588684082
191
+ ],
192
+ "q01": [
193
+ -0.17793594300746918
194
+ ],
195
+ "q99": [
196
+ 34.25267028808594
197
+ ]
198
+ },
199
+ "cv_arm": {
200
+ "max": [
201
+ 46.58203125,
202
+ 181.669921875,
203
+ 158.115234375,
204
+ 98.61328125,
205
+ 57.3046875
206
+ ],
207
+ "min": [
208
+ -27.509765625,
209
+ 36.826171875,
210
+ 36.474609375,
211
+ -7.55859375,
212
+ -88.06640625
213
+ ],
214
+ "mean": [
215
+ -0.4648164212703705,
216
+ 115.15673828125,
217
+ 105.80558776855469,
218
+ 61.761329650878906,
219
+ -5.051438808441162
220
+ ],
221
+ "std": [
222
+ 12.976478576660156,
223
+ 45.85354232788086,
224
+ 33.97755813598633,
225
+ 15.249994277954102,
226
+ 14.54588508605957
227
+ ],
228
+ "q01": [
229
+ -17.05078125,
230
+ 48.076171875,
231
+ 51.064453125,
232
+ 18.209179687500004,
233
+ -56.849414062499996
234
+ ],
235
+ "q99": [
236
+ 35.771484375,
237
+ 181.669921875,
238
+ 157.5,
239
+ 95.185546875,
240
+ 33.486328125
241
+ ]
242
+ },
243
+ "cv_gripper": {
244
+ "max": [
245
+ 49.21514129638672
246
+ ],
247
+ "min": [
248
+ -3.139427423477173
249
+ ],
250
+ "mean": [
251
+ 3.895470142364502
252
+ ],
253
+ "std": [
254
+ 11.047956466674805
255
+ ],
256
+ "q01": [
257
+ -3.047091484069824
258
+ ],
259
+ "q99": [
260
+ 35.08771896362305
261
+ ]
262
+ }
263
+ }
264
+ },
265
+ "modalities": {
266
+ "video": {
267
+ "main": {
268
+ "resolution": [
269
+ 640,
270
+ 480
271
+ ],
272
+ "channels": 3,
273
+ "fps": 30.0
274
+ },
275
+ "cv": {
276
+ "resolution": [
277
+ 640,
278
+ 480
279
+ ],
280
+ "channels": 3,
281
+ "fps": 30.0
282
+ },
283
+ "webcam": {
284
+ "resolution": [
285
+ 640,
286
+ 480
287
+ ],
288
+ "channels": 3,
289
+ "fps": 30.0
290
+ }
291
+ },
292
+ "state": {
293
+ "main_arm": {
294
+ "absolute": true,
295
+ "rotation_type": null,
296
+ "shape": [
297
+ 5
298
+ ],
299
+ "continuous": true
300
+ },
301
+ "main_gripper": {
302
+ "absolute": true,
303
+ "rotation_type": null,
304
+ "shape": [
305
+ 1
306
+ ],
307
+ "continuous": true
308
+ },
309
+ "cv_arm": {
310
+ "absolute": true,
311
+ "rotation_type": null,
312
+ "shape": [
313
+ 5
314
+ ],
315
+ "continuous": true
316
+ },
317
+ "cv_gripper": {
318
+ "absolute": true,
319
+ "rotation_type": null,
320
+ "shape": [
321
+ 1
322
+ ],
323
+ "continuous": true
324
+ }
325
+ },
326
+ "action": {
327
+ "main_arm": {
328
+ "absolute": true,
329
+ "rotation_type": null,
330
+ "shape": [
331
+ 5
332
+ ],
333
+ "continuous": true
334
+ },
335
+ "main_gripper": {
336
+ "absolute": true,
337
+ "rotation_type": null,
338
+ "shape": [
339
+ 1
340
+ ],
341
+ "continuous": true
342
+ },
343
+ "cv_arm": {
344
+ "absolute": true,
345
+ "rotation_type": null,
346
+ "shape": [
347
+ 5
348
+ ],
349
+ "continuous": true
350
+ },
351
+ "cv_gripper": {
352
+ "absolute": true,
353
+ "rotation_type": null,
354
+ "shape": [
355
+ 1
356
+ ],
357
+ "continuous": true
358
+ }
359
+ }
360
+ },
361
+ "embodiment_tag": "new_embodiment"
362
+ }
363
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b84d7c66b513588f6fec08af92b437cb2b8962faa19e5832b06a94cdffcb422f
3
+ size 4938446392
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1337d7c7498ee078f67826b8eb271ace3987c673474a33c201b018e3392e9fff
3
+ size 3821736024
model.safetensors.index.json ADDED
@@ -0,0 +1,809 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 8760067008
4
+ },
5
+ "weight_map": {
6
+ "action_head.action_decoder.layer1.W": "model-00002-of-00002.safetensors",
7
+ "action_head.action_decoder.layer1.b": "model-00002-of-00002.safetensors",
8
+ "action_head.action_decoder.layer2.W": "model-00002-of-00002.safetensors",
9
+ "action_head.action_decoder.layer2.b": "model-00002-of-00002.safetensors",
10
+ "action_head.action_encoder.W1.W": "model-00002-of-00002.safetensors",
11
+ "action_head.action_encoder.W1.b": "model-00002-of-00002.safetensors",
12
+ "action_head.action_encoder.W2.W": "model-00002-of-00002.safetensors",
13
+ "action_head.action_encoder.W2.b": "model-00002-of-00002.safetensors",
14
+ "action_head.action_encoder.W3.W": "model-00002-of-00002.safetensors",
15
+ "action_head.action_encoder.W3.b": "model-00002-of-00002.safetensors",
16
+ "action_head.model.proj_out_1.bias": "model-00002-of-00002.safetensors",
17
+ "action_head.model.proj_out_1.weight": "model-00002-of-00002.safetensors",
18
+ "action_head.model.proj_out_2.bias": "model-00002-of-00002.safetensors",
19
+ "action_head.model.proj_out_2.weight": "model-00002-of-00002.safetensors",
20
+ "action_head.model.timestep_encoder.timestep_embedder.linear_1.bias": "model-00002-of-00002.safetensors",
21
+ "action_head.model.timestep_encoder.timestep_embedder.linear_1.weight": "model-00002-of-00002.safetensors",
22
+ "action_head.model.timestep_encoder.timestep_embedder.linear_2.bias": "model-00002-of-00002.safetensors",
23
+ "action_head.model.timestep_encoder.timestep_embedder.linear_2.weight": "model-00002-of-00002.safetensors",
24
+ "action_head.model.transformer_blocks.0.attn1.to_k.bias": "model-00002-of-00002.safetensors",
25
+ "action_head.model.transformer_blocks.0.attn1.to_k.weight": "model-00002-of-00002.safetensors",
26
+ "action_head.model.transformer_blocks.0.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
27
+ "action_head.model.transformer_blocks.0.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
28
+ "action_head.model.transformer_blocks.0.attn1.to_q.bias": "model-00002-of-00002.safetensors",
29
+ "action_head.model.transformer_blocks.0.attn1.to_q.weight": "model-00002-of-00002.safetensors",
30
+ "action_head.model.transformer_blocks.0.attn1.to_v.bias": "model-00002-of-00002.safetensors",
31
+ "action_head.model.transformer_blocks.0.attn1.to_v.weight": "model-00002-of-00002.safetensors",
32
+ "action_head.model.transformer_blocks.0.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
33
+ "action_head.model.transformer_blocks.0.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
34
+ "action_head.model.transformer_blocks.0.ff.net.2.bias": "model-00002-of-00002.safetensors",
35
+ "action_head.model.transformer_blocks.0.ff.net.2.weight": "model-00002-of-00002.safetensors",
36
+ "action_head.model.transformer_blocks.0.norm1.linear.bias": "model-00002-of-00002.safetensors",
37
+ "action_head.model.transformer_blocks.0.norm1.linear.weight": "model-00002-of-00002.safetensors",
38
+ "action_head.model.transformer_blocks.1.attn1.to_k.bias": "model-00002-of-00002.safetensors",
39
+ "action_head.model.transformer_blocks.1.attn1.to_k.weight": "model-00002-of-00002.safetensors",
40
+ "action_head.model.transformer_blocks.1.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
41
+ "action_head.model.transformer_blocks.1.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
42
+ "action_head.model.transformer_blocks.1.attn1.to_q.bias": "model-00002-of-00002.safetensors",
43
+ "action_head.model.transformer_blocks.1.attn1.to_q.weight": "model-00002-of-00002.safetensors",
44
+ "action_head.model.transformer_blocks.1.attn1.to_v.bias": "model-00002-of-00002.safetensors",
45
+ "action_head.model.transformer_blocks.1.attn1.to_v.weight": "model-00002-of-00002.safetensors",
46
+ "action_head.model.transformer_blocks.1.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
47
+ "action_head.model.transformer_blocks.1.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
48
+ "action_head.model.transformer_blocks.1.ff.net.2.bias": "model-00002-of-00002.safetensors",
49
+ "action_head.model.transformer_blocks.1.ff.net.2.weight": "model-00002-of-00002.safetensors",
50
+ "action_head.model.transformer_blocks.1.norm1.linear.bias": "model-00002-of-00002.safetensors",
51
+ "action_head.model.transformer_blocks.1.norm1.linear.weight": "model-00002-of-00002.safetensors",
52
+ "action_head.model.transformer_blocks.10.attn1.to_k.bias": "model-00002-of-00002.safetensors",
53
+ "action_head.model.transformer_blocks.10.attn1.to_k.weight": "model-00002-of-00002.safetensors",
54
+ "action_head.model.transformer_blocks.10.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
55
+ "action_head.model.transformer_blocks.10.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
56
+ "action_head.model.transformer_blocks.10.attn1.to_q.bias": "model-00002-of-00002.safetensors",
57
+ "action_head.model.transformer_blocks.10.attn1.to_q.weight": "model-00002-of-00002.safetensors",
58
+ "action_head.model.transformer_blocks.10.attn1.to_v.bias": "model-00002-of-00002.safetensors",
59
+ "action_head.model.transformer_blocks.10.attn1.to_v.weight": "model-00002-of-00002.safetensors",
60
+ "action_head.model.transformer_blocks.10.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
61
+ "action_head.model.transformer_blocks.10.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
62
+ "action_head.model.transformer_blocks.10.ff.net.2.bias": "model-00002-of-00002.safetensors",
63
+ "action_head.model.transformer_blocks.10.ff.net.2.weight": "model-00002-of-00002.safetensors",
64
+ "action_head.model.transformer_blocks.10.norm1.linear.bias": "model-00002-of-00002.safetensors",
65
+ "action_head.model.transformer_blocks.10.norm1.linear.weight": "model-00002-of-00002.safetensors",
66
+ "action_head.model.transformer_blocks.11.attn1.to_k.bias": "model-00002-of-00002.safetensors",
67
+ "action_head.model.transformer_blocks.11.attn1.to_k.weight": "model-00002-of-00002.safetensors",
68
+ "action_head.model.transformer_blocks.11.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
69
+ "action_head.model.transformer_blocks.11.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
70
+ "action_head.model.transformer_blocks.11.attn1.to_q.bias": "model-00002-of-00002.safetensors",
71
+ "action_head.model.transformer_blocks.11.attn1.to_q.weight": "model-00002-of-00002.safetensors",
72
+ "action_head.model.transformer_blocks.11.attn1.to_v.bias": "model-00002-of-00002.safetensors",
73
+ "action_head.model.transformer_blocks.11.attn1.to_v.weight": "model-00002-of-00002.safetensors",
74
+ "action_head.model.transformer_blocks.11.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
75
+ "action_head.model.transformer_blocks.11.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
76
+ "action_head.model.transformer_blocks.11.ff.net.2.bias": "model-00002-of-00002.safetensors",
77
+ "action_head.model.transformer_blocks.11.ff.net.2.weight": "model-00002-of-00002.safetensors",
78
+ "action_head.model.transformer_blocks.11.norm1.linear.bias": "model-00002-of-00002.safetensors",
79
+ "action_head.model.transformer_blocks.11.norm1.linear.weight": "model-00002-of-00002.safetensors",
80
+ "action_head.model.transformer_blocks.12.attn1.to_k.bias": "model-00002-of-00002.safetensors",
81
+ "action_head.model.transformer_blocks.12.attn1.to_k.weight": "model-00002-of-00002.safetensors",
82
+ "action_head.model.transformer_blocks.12.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
83
+ "action_head.model.transformer_blocks.12.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
84
+ "action_head.model.transformer_blocks.12.attn1.to_q.bias": "model-00002-of-00002.safetensors",
85
+ "action_head.model.transformer_blocks.12.attn1.to_q.weight": "model-00002-of-00002.safetensors",
86
+ "action_head.model.transformer_blocks.12.attn1.to_v.bias": "model-00002-of-00002.safetensors",
87
+ "action_head.model.transformer_blocks.12.attn1.to_v.weight": "model-00002-of-00002.safetensors",
88
+ "action_head.model.transformer_blocks.12.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
89
+ "action_head.model.transformer_blocks.12.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
90
+ "action_head.model.transformer_blocks.12.ff.net.2.bias": "model-00002-of-00002.safetensors",
91
+ "action_head.model.transformer_blocks.12.ff.net.2.weight": "model-00002-of-00002.safetensors",
92
+ "action_head.model.transformer_blocks.12.norm1.linear.bias": "model-00002-of-00002.safetensors",
93
+ "action_head.model.transformer_blocks.12.norm1.linear.weight": "model-00002-of-00002.safetensors",
94
+ "action_head.model.transformer_blocks.13.attn1.to_k.bias": "model-00002-of-00002.safetensors",
95
+ "action_head.model.transformer_blocks.13.attn1.to_k.weight": "model-00002-of-00002.safetensors",
96
+ "action_head.model.transformer_blocks.13.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
97
+ "action_head.model.transformer_blocks.13.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
98
+ "action_head.model.transformer_blocks.13.attn1.to_q.bias": "model-00002-of-00002.safetensors",
99
+ "action_head.model.transformer_blocks.13.attn1.to_q.weight": "model-00002-of-00002.safetensors",
100
+ "action_head.model.transformer_blocks.13.attn1.to_v.bias": "model-00002-of-00002.safetensors",
101
+ "action_head.model.transformer_blocks.13.attn1.to_v.weight": "model-00002-of-00002.safetensors",
102
+ "action_head.model.transformer_blocks.13.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
103
+ "action_head.model.transformer_blocks.13.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
104
+ "action_head.model.transformer_blocks.13.ff.net.2.bias": "model-00002-of-00002.safetensors",
105
+ "action_head.model.transformer_blocks.13.ff.net.2.weight": "model-00002-of-00002.safetensors",
106
+ "action_head.model.transformer_blocks.13.norm1.linear.bias": "model-00002-of-00002.safetensors",
107
+ "action_head.model.transformer_blocks.13.norm1.linear.weight": "model-00002-of-00002.safetensors",
108
+ "action_head.model.transformer_blocks.14.attn1.to_k.bias": "model-00002-of-00002.safetensors",
109
+ "action_head.model.transformer_blocks.14.attn1.to_k.weight": "model-00002-of-00002.safetensors",
110
+ "action_head.model.transformer_blocks.14.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
111
+ "action_head.model.transformer_blocks.14.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
112
+ "action_head.model.transformer_blocks.14.attn1.to_q.bias": "model-00002-of-00002.safetensors",
113
+ "action_head.model.transformer_blocks.14.attn1.to_q.weight": "model-00002-of-00002.safetensors",
114
+ "action_head.model.transformer_blocks.14.attn1.to_v.bias": "model-00002-of-00002.safetensors",
115
+ "action_head.model.transformer_blocks.14.attn1.to_v.weight": "model-00002-of-00002.safetensors",
116
+ "action_head.model.transformer_blocks.14.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
117
+ "action_head.model.transformer_blocks.14.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
118
+ "action_head.model.transformer_blocks.14.ff.net.2.bias": "model-00002-of-00002.safetensors",
119
+ "action_head.model.transformer_blocks.14.ff.net.2.weight": "model-00002-of-00002.safetensors",
120
+ "action_head.model.transformer_blocks.14.norm1.linear.bias": "model-00002-of-00002.safetensors",
121
+ "action_head.model.transformer_blocks.14.norm1.linear.weight": "model-00002-of-00002.safetensors",
122
+ "action_head.model.transformer_blocks.15.attn1.to_k.bias": "model-00002-of-00002.safetensors",
123
+ "action_head.model.transformer_blocks.15.attn1.to_k.weight": "model-00002-of-00002.safetensors",
124
+ "action_head.model.transformer_blocks.15.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
125
+ "action_head.model.transformer_blocks.15.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
126
+ "action_head.model.transformer_blocks.15.attn1.to_q.bias": "model-00002-of-00002.safetensors",
127
+ "action_head.model.transformer_blocks.15.attn1.to_q.weight": "model-00002-of-00002.safetensors",
128
+ "action_head.model.transformer_blocks.15.attn1.to_v.bias": "model-00002-of-00002.safetensors",
129
+ "action_head.model.transformer_blocks.15.attn1.to_v.weight": "model-00002-of-00002.safetensors",
130
+ "action_head.model.transformer_blocks.15.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
131
+ "action_head.model.transformer_blocks.15.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
132
+ "action_head.model.transformer_blocks.15.ff.net.2.bias": "model-00002-of-00002.safetensors",
133
+ "action_head.model.transformer_blocks.15.ff.net.2.weight": "model-00002-of-00002.safetensors",
134
+ "action_head.model.transformer_blocks.15.norm1.linear.bias": "model-00002-of-00002.safetensors",
135
+ "action_head.model.transformer_blocks.15.norm1.linear.weight": "model-00002-of-00002.safetensors",
136
+ "action_head.model.transformer_blocks.2.attn1.to_k.bias": "model-00002-of-00002.safetensors",
137
+ "action_head.model.transformer_blocks.2.attn1.to_k.weight": "model-00002-of-00002.safetensors",
138
+ "action_head.model.transformer_blocks.2.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
139
+ "action_head.model.transformer_blocks.2.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
140
+ "action_head.model.transformer_blocks.2.attn1.to_q.bias": "model-00002-of-00002.safetensors",
141
+ "action_head.model.transformer_blocks.2.attn1.to_q.weight": "model-00002-of-00002.safetensors",
142
+ "action_head.model.transformer_blocks.2.attn1.to_v.bias": "model-00002-of-00002.safetensors",
143
+ "action_head.model.transformer_blocks.2.attn1.to_v.weight": "model-00002-of-00002.safetensors",
144
+ "action_head.model.transformer_blocks.2.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
145
+ "action_head.model.transformer_blocks.2.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
146
+ "action_head.model.transformer_blocks.2.ff.net.2.bias": "model-00002-of-00002.safetensors",
147
+ "action_head.model.transformer_blocks.2.ff.net.2.weight": "model-00002-of-00002.safetensors",
148
+ "action_head.model.transformer_blocks.2.norm1.linear.bias": "model-00002-of-00002.safetensors",
149
+ "action_head.model.transformer_blocks.2.norm1.linear.weight": "model-00002-of-00002.safetensors",
150
+ "action_head.model.transformer_blocks.3.attn1.to_k.bias": "model-00002-of-00002.safetensors",
151
+ "action_head.model.transformer_blocks.3.attn1.to_k.weight": "model-00002-of-00002.safetensors",
152
+ "action_head.model.transformer_blocks.3.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
153
+ "action_head.model.transformer_blocks.3.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
154
+ "action_head.model.transformer_blocks.3.attn1.to_q.bias": "model-00002-of-00002.safetensors",
155
+ "action_head.model.transformer_blocks.3.attn1.to_q.weight": "model-00002-of-00002.safetensors",
156
+ "action_head.model.transformer_blocks.3.attn1.to_v.bias": "model-00002-of-00002.safetensors",
157
+ "action_head.model.transformer_blocks.3.attn1.to_v.weight": "model-00002-of-00002.safetensors",
158
+ "action_head.model.transformer_blocks.3.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
159
+ "action_head.model.transformer_blocks.3.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
160
+ "action_head.model.transformer_blocks.3.ff.net.2.bias": "model-00002-of-00002.safetensors",
161
+ "action_head.model.transformer_blocks.3.ff.net.2.weight": "model-00002-of-00002.safetensors",
162
+ "action_head.model.transformer_blocks.3.norm1.linear.bias": "model-00002-of-00002.safetensors",
163
+ "action_head.model.transformer_blocks.3.norm1.linear.weight": "model-00002-of-00002.safetensors",
164
+ "action_head.model.transformer_blocks.4.attn1.to_k.bias": "model-00002-of-00002.safetensors",
165
+ "action_head.model.transformer_blocks.4.attn1.to_k.weight": "model-00002-of-00002.safetensors",
166
+ "action_head.model.transformer_blocks.4.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
167
+ "action_head.model.transformer_blocks.4.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
168
+ "action_head.model.transformer_blocks.4.attn1.to_q.bias": "model-00002-of-00002.safetensors",
169
+ "action_head.model.transformer_blocks.4.attn1.to_q.weight": "model-00002-of-00002.safetensors",
170
+ "action_head.model.transformer_blocks.4.attn1.to_v.bias": "model-00002-of-00002.safetensors",
171
+ "action_head.model.transformer_blocks.4.attn1.to_v.weight": "model-00002-of-00002.safetensors",
172
+ "action_head.model.transformer_blocks.4.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
173
+ "action_head.model.transformer_blocks.4.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
174
+ "action_head.model.transformer_blocks.4.ff.net.2.bias": "model-00002-of-00002.safetensors",
175
+ "action_head.model.transformer_blocks.4.ff.net.2.weight": "model-00002-of-00002.safetensors",
176
+ "action_head.model.transformer_blocks.4.norm1.linear.bias": "model-00002-of-00002.safetensors",
177
+ "action_head.model.transformer_blocks.4.norm1.linear.weight": "model-00002-of-00002.safetensors",
178
+ "action_head.model.transformer_blocks.5.attn1.to_k.bias": "model-00002-of-00002.safetensors",
179
+ "action_head.model.transformer_blocks.5.attn1.to_k.weight": "model-00002-of-00002.safetensors",
180
+ "action_head.model.transformer_blocks.5.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
181
+ "action_head.model.transformer_blocks.5.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
182
+ "action_head.model.transformer_blocks.5.attn1.to_q.bias": "model-00002-of-00002.safetensors",
183
+ "action_head.model.transformer_blocks.5.attn1.to_q.weight": "model-00002-of-00002.safetensors",
184
+ "action_head.model.transformer_blocks.5.attn1.to_v.bias": "model-00002-of-00002.safetensors",
185
+ "action_head.model.transformer_blocks.5.attn1.to_v.weight": "model-00002-of-00002.safetensors",
186
+ "action_head.model.transformer_blocks.5.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
187
+ "action_head.model.transformer_blocks.5.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
188
+ "action_head.model.transformer_blocks.5.ff.net.2.bias": "model-00002-of-00002.safetensors",
189
+ "action_head.model.transformer_blocks.5.ff.net.2.weight": "model-00002-of-00002.safetensors",
190
+ "action_head.model.transformer_blocks.5.norm1.linear.bias": "model-00002-of-00002.safetensors",
191
+ "action_head.model.transformer_blocks.5.norm1.linear.weight": "model-00002-of-00002.safetensors",
192
+ "action_head.model.transformer_blocks.6.attn1.to_k.bias": "model-00002-of-00002.safetensors",
193
+ "action_head.model.transformer_blocks.6.attn1.to_k.weight": "model-00002-of-00002.safetensors",
194
+ "action_head.model.transformer_blocks.6.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
195
+ "action_head.model.transformer_blocks.6.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
196
+ "action_head.model.transformer_blocks.6.attn1.to_q.bias": "model-00002-of-00002.safetensors",
197
+ "action_head.model.transformer_blocks.6.attn1.to_q.weight": "model-00002-of-00002.safetensors",
198
+ "action_head.model.transformer_blocks.6.attn1.to_v.bias": "model-00002-of-00002.safetensors",
199
+ "action_head.model.transformer_blocks.6.attn1.to_v.weight": "model-00002-of-00002.safetensors",
200
+ "action_head.model.transformer_blocks.6.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
201
+ "action_head.model.transformer_blocks.6.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
202
+ "action_head.model.transformer_blocks.6.ff.net.2.bias": "model-00002-of-00002.safetensors",
203
+ "action_head.model.transformer_blocks.6.ff.net.2.weight": "model-00002-of-00002.safetensors",
204
+ "action_head.model.transformer_blocks.6.norm1.linear.bias": "model-00002-of-00002.safetensors",
205
+ "action_head.model.transformer_blocks.6.norm1.linear.weight": "model-00002-of-00002.safetensors",
206
+ "action_head.model.transformer_blocks.7.attn1.to_k.bias": "model-00002-of-00002.safetensors",
207
+ "action_head.model.transformer_blocks.7.attn1.to_k.weight": "model-00002-of-00002.safetensors",
208
+ "action_head.model.transformer_blocks.7.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
209
+ "action_head.model.transformer_blocks.7.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
210
+ "action_head.model.transformer_blocks.7.attn1.to_q.bias": "model-00002-of-00002.safetensors",
211
+ "action_head.model.transformer_blocks.7.attn1.to_q.weight": "model-00002-of-00002.safetensors",
212
+ "action_head.model.transformer_blocks.7.attn1.to_v.bias": "model-00002-of-00002.safetensors",
213
+ "action_head.model.transformer_blocks.7.attn1.to_v.weight": "model-00002-of-00002.safetensors",
214
+ "action_head.model.transformer_blocks.7.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
215
+ "action_head.model.transformer_blocks.7.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
216
+ "action_head.model.transformer_blocks.7.ff.net.2.bias": "model-00002-of-00002.safetensors",
217
+ "action_head.model.transformer_blocks.7.ff.net.2.weight": "model-00002-of-00002.safetensors",
218
+ "action_head.model.transformer_blocks.7.norm1.linear.bias": "model-00002-of-00002.safetensors",
219
+ "action_head.model.transformer_blocks.7.norm1.linear.weight": "model-00002-of-00002.safetensors",
220
+ "action_head.model.transformer_blocks.8.attn1.to_k.bias": "model-00002-of-00002.safetensors",
221
+ "action_head.model.transformer_blocks.8.attn1.to_k.weight": "model-00002-of-00002.safetensors",
222
+ "action_head.model.transformer_blocks.8.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
223
+ "action_head.model.transformer_blocks.8.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
224
+ "action_head.model.transformer_blocks.8.attn1.to_q.bias": "model-00002-of-00002.safetensors",
225
+ "action_head.model.transformer_blocks.8.attn1.to_q.weight": "model-00002-of-00002.safetensors",
226
+ "action_head.model.transformer_blocks.8.attn1.to_v.bias": "model-00002-of-00002.safetensors",
227
+ "action_head.model.transformer_blocks.8.attn1.to_v.weight": "model-00002-of-00002.safetensors",
228
+ "action_head.model.transformer_blocks.8.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
229
+ "action_head.model.transformer_blocks.8.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
230
+ "action_head.model.transformer_blocks.8.ff.net.2.bias": "model-00002-of-00002.safetensors",
231
+ "action_head.model.transformer_blocks.8.ff.net.2.weight": "model-00002-of-00002.safetensors",
232
+ "action_head.model.transformer_blocks.8.norm1.linear.bias": "model-00002-of-00002.safetensors",
233
+ "action_head.model.transformer_blocks.8.norm1.linear.weight": "model-00002-of-00002.safetensors",
234
+ "action_head.model.transformer_blocks.9.attn1.to_k.bias": "model-00002-of-00002.safetensors",
235
+ "action_head.model.transformer_blocks.9.attn1.to_k.weight": "model-00002-of-00002.safetensors",
236
+ "action_head.model.transformer_blocks.9.attn1.to_out.0.bias": "model-00002-of-00002.safetensors",
237
+ "action_head.model.transformer_blocks.9.attn1.to_out.0.weight": "model-00002-of-00002.safetensors",
238
+ "action_head.model.transformer_blocks.9.attn1.to_q.bias": "model-00002-of-00002.safetensors",
239
+ "action_head.model.transformer_blocks.9.attn1.to_q.weight": "model-00002-of-00002.safetensors",
240
+ "action_head.model.transformer_blocks.9.attn1.to_v.bias": "model-00002-of-00002.safetensors",
241
+ "action_head.model.transformer_blocks.9.attn1.to_v.weight": "model-00002-of-00002.safetensors",
242
+ "action_head.model.transformer_blocks.9.ff.net.0.proj.bias": "model-00002-of-00002.safetensors",
243
+ "action_head.model.transformer_blocks.9.ff.net.0.proj.weight": "model-00002-of-00002.safetensors",
244
+ "action_head.model.transformer_blocks.9.ff.net.2.bias": "model-00002-of-00002.safetensors",
245
+ "action_head.model.transformer_blocks.9.ff.net.2.weight": "model-00002-of-00002.safetensors",
246
+ "action_head.model.transformer_blocks.9.norm1.linear.bias": "model-00002-of-00002.safetensors",
247
+ "action_head.model.transformer_blocks.9.norm1.linear.weight": "model-00002-of-00002.safetensors",
248
+ "action_head.position_embedding.weight": "model-00002-of-00002.safetensors",
249
+ "action_head.state_encoder.layer1.W": "model-00002-of-00002.safetensors",
250
+ "action_head.state_encoder.layer1.b": "model-00002-of-00002.safetensors",
251
+ "action_head.state_encoder.layer2.W": "model-00002-of-00002.safetensors",
252
+ "action_head.state_encoder.layer2.b": "model-00002-of-00002.safetensors",
253
+ "backbone.linear.bias": "model-00002-of-00002.safetensors",
254
+ "backbone.linear.weight": "model-00002-of-00002.safetensors",
255
+ "backbone.model.language_model.model.embed_tokens.weight": "model-00001-of-00002.safetensors",
256
+ "backbone.model.language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
257
+ "backbone.model.language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
258
+ "backbone.model.language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
259
+ "backbone.model.language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
260
+ "backbone.model.language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
261
+ "backbone.model.language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
262
+ "backbone.model.language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
263
+ "backbone.model.language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
264
+ "backbone.model.language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
265
+ "backbone.model.language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
266
+ "backbone.model.language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
267
+ "backbone.model.language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
268
+ "backbone.model.language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
269
+ "backbone.model.language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
270
+ "backbone.model.language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
271
+ "backbone.model.language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
272
+ "backbone.model.language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
273
+ "backbone.model.language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
274
+ "backbone.model.language_model.model.layers.10.input_layernorm.weight": "model-00002-of-00002.safetensors",
275
+ "backbone.model.language_model.model.layers.10.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
276
+ "backbone.model.language_model.model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
277
+ "backbone.model.language_model.model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
278
+ "backbone.model.language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
279
+ "backbone.model.language_model.model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
280
+ "backbone.model.language_model.model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
281
+ "backbone.model.language_model.model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
282
+ "backbone.model.language_model.model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
283
+ "backbone.model.language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00002.safetensors",
284
+ "backbone.model.language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
285
+ "backbone.model.language_model.model.layers.11.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
286
+ "backbone.model.language_model.model.layers.11.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
287
+ "backbone.model.language_model.model.layers.11.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
288
+ "backbone.model.language_model.model.layers.11.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
289
+ "backbone.model.language_model.model.layers.11.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
290
+ "backbone.model.language_model.model.layers.11.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
291
+ "backbone.model.language_model.model.layers.11.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
292
+ "backbone.model.language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
293
+ "backbone.model.language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
294
+ "backbone.model.language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
295
+ "backbone.model.language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
296
+ "backbone.model.language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
297
+ "backbone.model.language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
298
+ "backbone.model.language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
299
+ "backbone.model.language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
300
+ "backbone.model.language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
301
+ "backbone.model.language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
302
+ "backbone.model.language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
303
+ "backbone.model.language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
304
+ "backbone.model.language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
305
+ "backbone.model.language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
306
+ "backbone.model.language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
307
+ "backbone.model.language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
308
+ "backbone.model.language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
309
+ "backbone.model.language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
310
+ "backbone.model.language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
311
+ "backbone.model.language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
312
+ "backbone.model.language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
313
+ "backbone.model.language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
314
+ "backbone.model.language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
315
+ "backbone.model.language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
316
+ "backbone.model.language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
317
+ "backbone.model.language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
318
+ "backbone.model.language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
319
+ "backbone.model.language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
320
+ "backbone.model.language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
321
+ "backbone.model.language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
322
+ "backbone.model.language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
323
+ "backbone.model.language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
324
+ "backbone.model.language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
325
+ "backbone.model.language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
326
+ "backbone.model.language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
327
+ "backbone.model.language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
328
+ "backbone.model.language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
329
+ "backbone.model.language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
330
+ "backbone.model.language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
331
+ "backbone.model.language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
332
+ "backbone.model.language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
333
+ "backbone.model.language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
334
+ "backbone.model.language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
335
+ "backbone.model.language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
336
+ "backbone.model.language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
337
+ "backbone.model.language_model.model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
338
+ "backbone.model.language_model.model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
339
+ "backbone.model.language_model.model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
340
+ "backbone.model.language_model.model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
341
+ "backbone.model.language_model.model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
342
+ "backbone.model.language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
343
+ "backbone.model.language_model.model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
344
+ "backbone.model.language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
345
+ "backbone.model.language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
346
+ "backbone.model.language_model.model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
347
+ "backbone.model.language_model.model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
348
+ "backbone.model.language_model.model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
349
+ "backbone.model.language_model.model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
350
+ "backbone.model.language_model.model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
351
+ "backbone.model.language_model.model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
352
+ "backbone.model.language_model.model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
353
+ "backbone.model.language_model.model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
354
+ "backbone.model.language_model.model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
355
+ "backbone.model.language_model.model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
356
+ "backbone.model.language_model.model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
357
+ "backbone.model.language_model.model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
358
+ "backbone.model.language_model.model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
359
+ "backbone.model.language_model.model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
360
+ "backbone.model.language_model.model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
361
+ "backbone.model.language_model.model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
362
+ "backbone.model.language_model.model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
363
+ "backbone.model.language_model.model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
364
+ "backbone.model.language_model.model.norm.weight": "model-00002-of-00002.safetensors",
365
+ "backbone.model.mlp1.0.bias": "model-00002-of-00002.safetensors",
366
+ "backbone.model.mlp1.0.weight": "model-00002-of-00002.safetensors",
367
+ "backbone.model.mlp1.1.bias": "model-00002-of-00002.safetensors",
368
+ "backbone.model.mlp1.1.weight": "model-00002-of-00002.safetensors",
369
+ "backbone.model.mlp1.3.bias": "model-00002-of-00002.safetensors",
370
+ "backbone.model.mlp1.3.weight": "model-00002-of-00002.safetensors",
371
+ "backbone.model.vision_model.vision_model.embeddings.patch_embedding.bias": "model-00001-of-00002.safetensors",
372
+ "backbone.model.vision_model.vision_model.embeddings.patch_embedding.weight": "model-00001-of-00002.safetensors",
373
+ "backbone.model.vision_model.vision_model.embeddings.position_embedding.weight": "model-00001-of-00002.safetensors",
374
+ "backbone.model.vision_model.vision_model.encoder.layers.0.layer_norm1.bias": "model-00001-of-00002.safetensors",
375
+ "backbone.model.vision_model.vision_model.encoder.layers.0.layer_norm1.weight": "model-00001-of-00002.safetensors",
376
+ "backbone.model.vision_model.vision_model.encoder.layers.0.layer_norm2.bias": "model-00001-of-00002.safetensors",
377
+ "backbone.model.vision_model.vision_model.encoder.layers.0.layer_norm2.weight": "model-00001-of-00002.safetensors",
378
+ "backbone.model.vision_model.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00001-of-00002.safetensors",
379
+ "backbone.model.vision_model.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00001-of-00002.safetensors",
380
+ "backbone.model.vision_model.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00001-of-00002.safetensors",
381
+ "backbone.model.vision_model.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00001-of-00002.safetensors",
382
+ "backbone.model.vision_model.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
383
+ "backbone.model.vision_model.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
384
+ "backbone.model.vision_model.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
385
+ "backbone.model.vision_model.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
386
+ "backbone.model.vision_model.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
387
+ "backbone.model.vision_model.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
388
+ "backbone.model.vision_model.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
389
+ "backbone.model.vision_model.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
390
+ "backbone.model.vision_model.vision_model.encoder.layers.1.layer_norm1.bias": "model-00001-of-00002.safetensors",
391
+ "backbone.model.vision_model.vision_model.encoder.layers.1.layer_norm1.weight": "model-00001-of-00002.safetensors",
392
+ "backbone.model.vision_model.vision_model.encoder.layers.1.layer_norm2.bias": "model-00001-of-00002.safetensors",
393
+ "backbone.model.vision_model.vision_model.encoder.layers.1.layer_norm2.weight": "model-00001-of-00002.safetensors",
394
+ "backbone.model.vision_model.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00001-of-00002.safetensors",
395
+ "backbone.model.vision_model.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00001-of-00002.safetensors",
396
+ "backbone.model.vision_model.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00001-of-00002.safetensors",
397
+ "backbone.model.vision_model.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00001-of-00002.safetensors",
398
+ "backbone.model.vision_model.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
399
+ "backbone.model.vision_model.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
400
+ "backbone.model.vision_model.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
401
+ "backbone.model.vision_model.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
402
+ "backbone.model.vision_model.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
403
+ "backbone.model.vision_model.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
404
+ "backbone.model.vision_model.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
405
+ "backbone.model.vision_model.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
406
+ "backbone.model.vision_model.vision_model.encoder.layers.10.layer_norm1.bias": "model-00001-of-00002.safetensors",
407
+ "backbone.model.vision_model.vision_model.encoder.layers.10.layer_norm1.weight": "model-00001-of-00002.safetensors",
408
+ "backbone.model.vision_model.vision_model.encoder.layers.10.layer_norm2.bias": "model-00001-of-00002.safetensors",
409
+ "backbone.model.vision_model.vision_model.encoder.layers.10.layer_norm2.weight": "model-00001-of-00002.safetensors",
410
+ "backbone.model.vision_model.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00001-of-00002.safetensors",
411
+ "backbone.model.vision_model.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00001-of-00002.safetensors",
412
+ "backbone.model.vision_model.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00001-of-00002.safetensors",
413
+ "backbone.model.vision_model.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00001-of-00002.safetensors",
414
+ "backbone.model.vision_model.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
415
+ "backbone.model.vision_model.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
416
+ "backbone.model.vision_model.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
417
+ "backbone.model.vision_model.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
418
+ "backbone.model.vision_model.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
419
+ "backbone.model.vision_model.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
420
+ "backbone.model.vision_model.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
421
+ "backbone.model.vision_model.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
422
+ "backbone.model.vision_model.vision_model.encoder.layers.11.layer_norm1.bias": "model-00001-of-00002.safetensors",
423
+ "backbone.model.vision_model.vision_model.encoder.layers.11.layer_norm1.weight": "model-00001-of-00002.safetensors",
424
+ "backbone.model.vision_model.vision_model.encoder.layers.11.layer_norm2.bias": "model-00001-of-00002.safetensors",
425
+ "backbone.model.vision_model.vision_model.encoder.layers.11.layer_norm2.weight": "model-00001-of-00002.safetensors",
426
+ "backbone.model.vision_model.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00001-of-00002.safetensors",
427
+ "backbone.model.vision_model.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00001-of-00002.safetensors",
428
+ "backbone.model.vision_model.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00001-of-00002.safetensors",
429
+ "backbone.model.vision_model.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00001-of-00002.safetensors",
430
+ "backbone.model.vision_model.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
431
+ "backbone.model.vision_model.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
432
+ "backbone.model.vision_model.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
433
+ "backbone.model.vision_model.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
434
+ "backbone.model.vision_model.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
435
+ "backbone.model.vision_model.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
436
+ "backbone.model.vision_model.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
437
+ "backbone.model.vision_model.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
438
+ "backbone.model.vision_model.vision_model.encoder.layers.12.layer_norm1.bias": "model-00001-of-00002.safetensors",
439
+ "backbone.model.vision_model.vision_model.encoder.layers.12.layer_norm1.weight": "model-00001-of-00002.safetensors",
440
+ "backbone.model.vision_model.vision_model.encoder.layers.12.layer_norm2.bias": "model-00001-of-00002.safetensors",
441
+ "backbone.model.vision_model.vision_model.encoder.layers.12.layer_norm2.weight": "model-00001-of-00002.safetensors",
442
+ "backbone.model.vision_model.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00001-of-00002.safetensors",
443
+ "backbone.model.vision_model.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00001-of-00002.safetensors",
444
+ "backbone.model.vision_model.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00001-of-00002.safetensors",
445
+ "backbone.model.vision_model.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00001-of-00002.safetensors",
446
+ "backbone.model.vision_model.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
447
+ "backbone.model.vision_model.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
448
+ "backbone.model.vision_model.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
449
+ "backbone.model.vision_model.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
450
+ "backbone.model.vision_model.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
451
+ "backbone.model.vision_model.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
452
+ "backbone.model.vision_model.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
453
+ "backbone.model.vision_model.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
454
+ "backbone.model.vision_model.vision_model.encoder.layers.13.layer_norm1.bias": "model-00001-of-00002.safetensors",
455
+ "backbone.model.vision_model.vision_model.encoder.layers.13.layer_norm1.weight": "model-00001-of-00002.safetensors",
456
+ "backbone.model.vision_model.vision_model.encoder.layers.13.layer_norm2.bias": "model-00001-of-00002.safetensors",
457
+ "backbone.model.vision_model.vision_model.encoder.layers.13.layer_norm2.weight": "model-00001-of-00002.safetensors",
458
+ "backbone.model.vision_model.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00001-of-00002.safetensors",
459
+ "backbone.model.vision_model.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00001-of-00002.safetensors",
460
+ "backbone.model.vision_model.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00001-of-00002.safetensors",
461
+ "backbone.model.vision_model.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00001-of-00002.safetensors",
462
+ "backbone.model.vision_model.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
463
+ "backbone.model.vision_model.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
464
+ "backbone.model.vision_model.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
465
+ "backbone.model.vision_model.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
466
+ "backbone.model.vision_model.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
467
+ "backbone.model.vision_model.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
468
+ "backbone.model.vision_model.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
469
+ "backbone.model.vision_model.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
470
+ "backbone.model.vision_model.vision_model.encoder.layers.14.layer_norm1.bias": "model-00001-of-00002.safetensors",
471
+ "backbone.model.vision_model.vision_model.encoder.layers.14.layer_norm1.weight": "model-00001-of-00002.safetensors",
472
+ "backbone.model.vision_model.vision_model.encoder.layers.14.layer_norm2.bias": "model-00001-of-00002.safetensors",
473
+ "backbone.model.vision_model.vision_model.encoder.layers.14.layer_norm2.weight": "model-00001-of-00002.safetensors",
474
+ "backbone.model.vision_model.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00001-of-00002.safetensors",
475
+ "backbone.model.vision_model.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00001-of-00002.safetensors",
476
+ "backbone.model.vision_model.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00001-of-00002.safetensors",
477
+ "backbone.model.vision_model.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00001-of-00002.safetensors",
478
+ "backbone.model.vision_model.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
479
+ "backbone.model.vision_model.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
480
+ "backbone.model.vision_model.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
481
+ "backbone.model.vision_model.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
482
+ "backbone.model.vision_model.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
483
+ "backbone.model.vision_model.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
484
+ "backbone.model.vision_model.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
485
+ "backbone.model.vision_model.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
486
+ "backbone.model.vision_model.vision_model.encoder.layers.15.layer_norm1.bias": "model-00001-of-00002.safetensors",
487
+ "backbone.model.vision_model.vision_model.encoder.layers.15.layer_norm1.weight": "model-00001-of-00002.safetensors",
488
+ "backbone.model.vision_model.vision_model.encoder.layers.15.layer_norm2.bias": "model-00001-of-00002.safetensors",
489
+ "backbone.model.vision_model.vision_model.encoder.layers.15.layer_norm2.weight": "model-00001-of-00002.safetensors",
490
+ "backbone.model.vision_model.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00001-of-00002.safetensors",
491
+ "backbone.model.vision_model.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00001-of-00002.safetensors",
492
+ "backbone.model.vision_model.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00001-of-00002.safetensors",
493
+ "backbone.model.vision_model.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00001-of-00002.safetensors",
494
+ "backbone.model.vision_model.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
495
+ "backbone.model.vision_model.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
496
+ "backbone.model.vision_model.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
497
+ "backbone.model.vision_model.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
498
+ "backbone.model.vision_model.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
499
+ "backbone.model.vision_model.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
500
+ "backbone.model.vision_model.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
501
+ "backbone.model.vision_model.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
502
+ "backbone.model.vision_model.vision_model.encoder.layers.16.layer_norm1.bias": "model-00001-of-00002.safetensors",
503
+ "backbone.model.vision_model.vision_model.encoder.layers.16.layer_norm1.weight": "model-00001-of-00002.safetensors",
504
+ "backbone.model.vision_model.vision_model.encoder.layers.16.layer_norm2.bias": "model-00001-of-00002.safetensors",
505
+ "backbone.model.vision_model.vision_model.encoder.layers.16.layer_norm2.weight": "model-00001-of-00002.safetensors",
506
+ "backbone.model.vision_model.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00001-of-00002.safetensors",
507
+ "backbone.model.vision_model.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00001-of-00002.safetensors",
508
+ "backbone.model.vision_model.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00001-of-00002.safetensors",
509
+ "backbone.model.vision_model.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00001-of-00002.safetensors",
510
+ "backbone.model.vision_model.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
511
+ "backbone.model.vision_model.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
512
+ "backbone.model.vision_model.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
513
+ "backbone.model.vision_model.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
514
+ "backbone.model.vision_model.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
515
+ "backbone.model.vision_model.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
516
+ "backbone.model.vision_model.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
517
+ "backbone.model.vision_model.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
518
+ "backbone.model.vision_model.vision_model.encoder.layers.17.layer_norm1.bias": "model-00001-of-00002.safetensors",
519
+ "backbone.model.vision_model.vision_model.encoder.layers.17.layer_norm1.weight": "model-00001-of-00002.safetensors",
520
+ "backbone.model.vision_model.vision_model.encoder.layers.17.layer_norm2.bias": "model-00001-of-00002.safetensors",
521
+ "backbone.model.vision_model.vision_model.encoder.layers.17.layer_norm2.weight": "model-00001-of-00002.safetensors",
522
+ "backbone.model.vision_model.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
523
+ "backbone.model.vision_model.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
524
+ "backbone.model.vision_model.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
525
+ "backbone.model.vision_model.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
526
+ "backbone.model.vision_model.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
527
+ "backbone.model.vision_model.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
528
+ "backbone.model.vision_model.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
529
+ "backbone.model.vision_model.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
530
+ "backbone.model.vision_model.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
531
+ "backbone.model.vision_model.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
532
+ "backbone.model.vision_model.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
533
+ "backbone.model.vision_model.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
534
+ "backbone.model.vision_model.vision_model.encoder.layers.18.layer_norm1.bias": "model-00001-of-00002.safetensors",
535
+ "backbone.model.vision_model.vision_model.encoder.layers.18.layer_norm1.weight": "model-00001-of-00002.safetensors",
536
+ "backbone.model.vision_model.vision_model.encoder.layers.18.layer_norm2.bias": "model-00001-of-00002.safetensors",
537
+ "backbone.model.vision_model.vision_model.encoder.layers.18.layer_norm2.weight": "model-00001-of-00002.safetensors",
538
+ "backbone.model.vision_model.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
539
+ "backbone.model.vision_model.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
540
+ "backbone.model.vision_model.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
541
+ "backbone.model.vision_model.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
542
+ "backbone.model.vision_model.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
543
+ "backbone.model.vision_model.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
544
+ "backbone.model.vision_model.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
545
+ "backbone.model.vision_model.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
546
+ "backbone.model.vision_model.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
547
+ "backbone.model.vision_model.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
548
+ "backbone.model.vision_model.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
549
+ "backbone.model.vision_model.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
550
+ "backbone.model.vision_model.vision_model.encoder.layers.19.layer_norm1.bias": "model-00001-of-00002.safetensors",
551
+ "backbone.model.vision_model.vision_model.encoder.layers.19.layer_norm1.weight": "model-00001-of-00002.safetensors",
552
+ "backbone.model.vision_model.vision_model.encoder.layers.19.layer_norm2.bias": "model-00001-of-00002.safetensors",
553
+ "backbone.model.vision_model.vision_model.encoder.layers.19.layer_norm2.weight": "model-00001-of-00002.safetensors",
554
+ "backbone.model.vision_model.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
555
+ "backbone.model.vision_model.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
556
+ "backbone.model.vision_model.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
557
+ "backbone.model.vision_model.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
558
+ "backbone.model.vision_model.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
559
+ "backbone.model.vision_model.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
560
+ "backbone.model.vision_model.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
561
+ "backbone.model.vision_model.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
562
+ "backbone.model.vision_model.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
563
+ "backbone.model.vision_model.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
564
+ "backbone.model.vision_model.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
565
+ "backbone.model.vision_model.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
566
+ "backbone.model.vision_model.vision_model.encoder.layers.2.layer_norm1.bias": "model-00001-of-00002.safetensors",
567
+ "backbone.model.vision_model.vision_model.encoder.layers.2.layer_norm1.weight": "model-00001-of-00002.safetensors",
568
+ "backbone.model.vision_model.vision_model.encoder.layers.2.layer_norm2.bias": "model-00001-of-00002.safetensors",
569
+ "backbone.model.vision_model.vision_model.encoder.layers.2.layer_norm2.weight": "model-00001-of-00002.safetensors",
570
+ "backbone.model.vision_model.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
571
+ "backbone.model.vision_model.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
572
+ "backbone.model.vision_model.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00001-of-00002.safetensors",
573
+ "backbone.model.vision_model.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
574
+ "backbone.model.vision_model.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
575
+ "backbone.model.vision_model.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
576
+ "backbone.model.vision_model.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
577
+ "backbone.model.vision_model.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
578
+ "backbone.model.vision_model.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
579
+ "backbone.model.vision_model.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
580
+ "backbone.model.vision_model.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
581
+ "backbone.model.vision_model.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
582
+ "backbone.model.vision_model.vision_model.encoder.layers.20.layer_norm1.bias": "model-00001-of-00002.safetensors",
583
+ "backbone.model.vision_model.vision_model.encoder.layers.20.layer_norm1.weight": "model-00001-of-00002.safetensors",
584
+ "backbone.model.vision_model.vision_model.encoder.layers.20.layer_norm2.bias": "model-00001-of-00002.safetensors",
585
+ "backbone.model.vision_model.vision_model.encoder.layers.20.layer_norm2.weight": "model-00001-of-00002.safetensors",
586
+ "backbone.model.vision_model.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
587
+ "backbone.model.vision_model.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
588
+ "backbone.model.vision_model.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
589
+ "backbone.model.vision_model.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
590
+ "backbone.model.vision_model.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
591
+ "backbone.model.vision_model.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
592
+ "backbone.model.vision_model.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
593
+ "backbone.model.vision_model.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
594
+ "backbone.model.vision_model.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
595
+ "backbone.model.vision_model.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
596
+ "backbone.model.vision_model.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
597
+ "backbone.model.vision_model.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
598
+ "backbone.model.vision_model.vision_model.encoder.layers.21.layer_norm1.bias": "model-00001-of-00002.safetensors",
599
+ "backbone.model.vision_model.vision_model.encoder.layers.21.layer_norm1.weight": "model-00001-of-00002.safetensors",
600
+ "backbone.model.vision_model.vision_model.encoder.layers.21.layer_norm2.bias": "model-00001-of-00002.safetensors",
601
+ "backbone.model.vision_model.vision_model.encoder.layers.21.layer_norm2.weight": "model-00001-of-00002.safetensors",
602
+ "backbone.model.vision_model.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00001-of-00002.safetensors",
603
+ "backbone.model.vision_model.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
604
+ "backbone.model.vision_model.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00001-of-00002.safetensors",
605
+ "backbone.model.vision_model.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00001-of-00002.safetensors",
606
+ "backbone.model.vision_model.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
607
+ "backbone.model.vision_model.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
608
+ "backbone.model.vision_model.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
609
+ "backbone.model.vision_model.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
610
+ "backbone.model.vision_model.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
611
+ "backbone.model.vision_model.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
612
+ "backbone.model.vision_model.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
613
+ "backbone.model.vision_model.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
614
+ "backbone.model.vision_model.vision_model.encoder.layers.22.layer_norm1.bias": "model-00001-of-00002.safetensors",
615
+ "backbone.model.vision_model.vision_model.encoder.layers.22.layer_norm1.weight": "model-00001-of-00002.safetensors",
616
+ "backbone.model.vision_model.vision_model.encoder.layers.22.layer_norm2.bias": "model-00001-of-00002.safetensors",
617
+ "backbone.model.vision_model.vision_model.encoder.layers.22.layer_norm2.weight": "model-00001-of-00002.safetensors",
618
+ "backbone.model.vision_model.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00001-of-00002.safetensors",
619
+ "backbone.model.vision_model.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00001-of-00002.safetensors",
620
+ "backbone.model.vision_model.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00001-of-00002.safetensors",
621
+ "backbone.model.vision_model.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00001-of-00002.safetensors",
622
+ "backbone.model.vision_model.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
623
+ "backbone.model.vision_model.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
624
+ "backbone.model.vision_model.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
625
+ "backbone.model.vision_model.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
626
+ "backbone.model.vision_model.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
627
+ "backbone.model.vision_model.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
628
+ "backbone.model.vision_model.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
629
+ "backbone.model.vision_model.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
630
+ "backbone.model.vision_model.vision_model.encoder.layers.23.layer_norm1.bias": "model-00001-of-00002.safetensors",
631
+ "backbone.model.vision_model.vision_model.encoder.layers.23.layer_norm1.weight": "model-00001-of-00002.safetensors",
632
+ "backbone.model.vision_model.vision_model.encoder.layers.23.layer_norm2.bias": "model-00001-of-00002.safetensors",
633
+ "backbone.model.vision_model.vision_model.encoder.layers.23.layer_norm2.weight": "model-00001-of-00002.safetensors",
634
+ "backbone.model.vision_model.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00001-of-00002.safetensors",
635
+ "backbone.model.vision_model.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00001-of-00002.safetensors",
636
+ "backbone.model.vision_model.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00001-of-00002.safetensors",
637
+ "backbone.model.vision_model.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00001-of-00002.safetensors",
638
+ "backbone.model.vision_model.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
639
+ "backbone.model.vision_model.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
640
+ "backbone.model.vision_model.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
641
+ "backbone.model.vision_model.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
642
+ "backbone.model.vision_model.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
643
+ "backbone.model.vision_model.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
644
+ "backbone.model.vision_model.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
645
+ "backbone.model.vision_model.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
646
+ "backbone.model.vision_model.vision_model.encoder.layers.24.layer_norm1.bias": "model-00001-of-00002.safetensors",
647
+ "backbone.model.vision_model.vision_model.encoder.layers.24.layer_norm1.weight": "model-00001-of-00002.safetensors",
648
+ "backbone.model.vision_model.vision_model.encoder.layers.24.layer_norm2.bias": "model-00001-of-00002.safetensors",
649
+ "backbone.model.vision_model.vision_model.encoder.layers.24.layer_norm2.weight": "model-00001-of-00002.safetensors",
650
+ "backbone.model.vision_model.vision_model.encoder.layers.24.mlp.fc1.bias": "model-00001-of-00002.safetensors",
651
+ "backbone.model.vision_model.vision_model.encoder.layers.24.mlp.fc1.weight": "model-00001-of-00002.safetensors",
652
+ "backbone.model.vision_model.vision_model.encoder.layers.24.mlp.fc2.bias": "model-00001-of-00002.safetensors",
653
+ "backbone.model.vision_model.vision_model.encoder.layers.24.mlp.fc2.weight": "model-00001-of-00002.safetensors",
654
+ "backbone.model.vision_model.vision_model.encoder.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
655
+ "backbone.model.vision_model.vision_model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
656
+ "backbone.model.vision_model.vision_model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
657
+ "backbone.model.vision_model.vision_model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
658
+ "backbone.model.vision_model.vision_model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
659
+ "backbone.model.vision_model.vision_model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
660
+ "backbone.model.vision_model.vision_model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
661
+ "backbone.model.vision_model.vision_model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
662
+ "backbone.model.vision_model.vision_model.encoder.layers.25.layer_norm1.bias": "model-00001-of-00002.safetensors",
663
+ "backbone.model.vision_model.vision_model.encoder.layers.25.layer_norm1.weight": "model-00001-of-00002.safetensors",
664
+ "backbone.model.vision_model.vision_model.encoder.layers.25.layer_norm2.bias": "model-00001-of-00002.safetensors",
665
+ "backbone.model.vision_model.vision_model.encoder.layers.25.layer_norm2.weight": "model-00001-of-00002.safetensors",
666
+ "backbone.model.vision_model.vision_model.encoder.layers.25.mlp.fc1.bias": "model-00001-of-00002.safetensors",
667
+ "backbone.model.vision_model.vision_model.encoder.layers.25.mlp.fc1.weight": "model-00001-of-00002.safetensors",
668
+ "backbone.model.vision_model.vision_model.encoder.layers.25.mlp.fc2.bias": "model-00001-of-00002.safetensors",
669
+ "backbone.model.vision_model.vision_model.encoder.layers.25.mlp.fc2.weight": "model-00001-of-00002.safetensors",
670
+ "backbone.model.vision_model.vision_model.encoder.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
671
+ "backbone.model.vision_model.vision_model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
672
+ "backbone.model.vision_model.vision_model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
673
+ "backbone.model.vision_model.vision_model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
674
+ "backbone.model.vision_model.vision_model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
675
+ "backbone.model.vision_model.vision_model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
676
+ "backbone.model.vision_model.vision_model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
677
+ "backbone.model.vision_model.vision_model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
678
+ "backbone.model.vision_model.vision_model.encoder.layers.26.layer_norm1.bias": "model-00001-of-00002.safetensors",
679
+ "backbone.model.vision_model.vision_model.encoder.layers.26.layer_norm1.weight": "model-00001-of-00002.safetensors",
680
+ "backbone.model.vision_model.vision_model.encoder.layers.26.layer_norm2.bias": "model-00001-of-00002.safetensors",
681
+ "backbone.model.vision_model.vision_model.encoder.layers.26.layer_norm2.weight": "model-00001-of-00002.safetensors",
682
+ "backbone.model.vision_model.vision_model.encoder.layers.26.mlp.fc1.bias": "model-00001-of-00002.safetensors",
683
+ "backbone.model.vision_model.vision_model.encoder.layers.26.mlp.fc1.weight": "model-00001-of-00002.safetensors",
684
+ "backbone.model.vision_model.vision_model.encoder.layers.26.mlp.fc2.bias": "model-00001-of-00002.safetensors",
685
+ "backbone.model.vision_model.vision_model.encoder.layers.26.mlp.fc2.weight": "model-00001-of-00002.safetensors",
686
+ "backbone.model.vision_model.vision_model.encoder.layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
687
+ "backbone.model.vision_model.vision_model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
688
+ "backbone.model.vision_model.vision_model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
689
+ "backbone.model.vision_model.vision_model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
690
+ "backbone.model.vision_model.vision_model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
691
+ "backbone.model.vision_model.vision_model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
692
+ "backbone.model.vision_model.vision_model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
693
+ "backbone.model.vision_model.vision_model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
694
+ "backbone.model.vision_model.vision_model.encoder.layers.3.layer_norm1.bias": "model-00001-of-00002.safetensors",
695
+ "backbone.model.vision_model.vision_model.encoder.layers.3.layer_norm1.weight": "model-00001-of-00002.safetensors",
696
+ "backbone.model.vision_model.vision_model.encoder.layers.3.layer_norm2.bias": "model-00001-of-00002.safetensors",
697
+ "backbone.model.vision_model.vision_model.encoder.layers.3.layer_norm2.weight": "model-00001-of-00002.safetensors",
698
+ "backbone.model.vision_model.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00001-of-00002.safetensors",
699
+ "backbone.model.vision_model.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00001-of-00002.safetensors",
700
+ "backbone.model.vision_model.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00001-of-00002.safetensors",
701
+ "backbone.model.vision_model.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00001-of-00002.safetensors",
702
+ "backbone.model.vision_model.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
703
+ "backbone.model.vision_model.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
704
+ "backbone.model.vision_model.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
705
+ "backbone.model.vision_model.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
706
+ "backbone.model.vision_model.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
707
+ "backbone.model.vision_model.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
708
+ "backbone.model.vision_model.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
709
+ "backbone.model.vision_model.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
710
+ "backbone.model.vision_model.vision_model.encoder.layers.4.layer_norm1.bias": "model-00001-of-00002.safetensors",
711
+ "backbone.model.vision_model.vision_model.encoder.layers.4.layer_norm1.weight": "model-00001-of-00002.safetensors",
712
+ "backbone.model.vision_model.vision_model.encoder.layers.4.layer_norm2.bias": "model-00001-of-00002.safetensors",
713
+ "backbone.model.vision_model.vision_model.encoder.layers.4.layer_norm2.weight": "model-00001-of-00002.safetensors",
714
+ "backbone.model.vision_model.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00001-of-00002.safetensors",
715
+ "backbone.model.vision_model.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00001-of-00002.safetensors",
716
+ "backbone.model.vision_model.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00001-of-00002.safetensors",
717
+ "backbone.model.vision_model.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00001-of-00002.safetensors",
718
+ "backbone.model.vision_model.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
719
+ "backbone.model.vision_model.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
720
+ "backbone.model.vision_model.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
721
+ "backbone.model.vision_model.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
722
+ "backbone.model.vision_model.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
723
+ "backbone.model.vision_model.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
724
+ "backbone.model.vision_model.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
725
+ "backbone.model.vision_model.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
726
+ "backbone.model.vision_model.vision_model.encoder.layers.5.layer_norm1.bias": "model-00001-of-00002.safetensors",
727
+ "backbone.model.vision_model.vision_model.encoder.layers.5.layer_norm1.weight": "model-00001-of-00002.safetensors",
728
+ "backbone.model.vision_model.vision_model.encoder.layers.5.layer_norm2.bias": "model-00001-of-00002.safetensors",
729
+ "backbone.model.vision_model.vision_model.encoder.layers.5.layer_norm2.weight": "model-00001-of-00002.safetensors",
730
+ "backbone.model.vision_model.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00001-of-00002.safetensors",
731
+ "backbone.model.vision_model.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00001-of-00002.safetensors",
732
+ "backbone.model.vision_model.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00001-of-00002.safetensors",
733
+ "backbone.model.vision_model.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00001-of-00002.safetensors",
734
+ "backbone.model.vision_model.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
735
+ "backbone.model.vision_model.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
736
+ "backbone.model.vision_model.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
737
+ "backbone.model.vision_model.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
738
+ "backbone.model.vision_model.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
739
+ "backbone.model.vision_model.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
740
+ "backbone.model.vision_model.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
741
+ "backbone.model.vision_model.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
742
+ "backbone.model.vision_model.vision_model.encoder.layers.6.layer_norm1.bias": "model-00001-of-00002.safetensors",
743
+ "backbone.model.vision_model.vision_model.encoder.layers.6.layer_norm1.weight": "model-00001-of-00002.safetensors",
744
+ "backbone.model.vision_model.vision_model.encoder.layers.6.layer_norm2.bias": "model-00001-of-00002.safetensors",
745
+ "backbone.model.vision_model.vision_model.encoder.layers.6.layer_norm2.weight": "model-00001-of-00002.safetensors",
746
+ "backbone.model.vision_model.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00001-of-00002.safetensors",
747
+ "backbone.model.vision_model.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00001-of-00002.safetensors",
748
+ "backbone.model.vision_model.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00001-of-00002.safetensors",
749
+ "backbone.model.vision_model.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00001-of-00002.safetensors",
750
+ "backbone.model.vision_model.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
751
+ "backbone.model.vision_model.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
752
+ "backbone.model.vision_model.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
753
+ "backbone.model.vision_model.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
754
+ "backbone.model.vision_model.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
755
+ "backbone.model.vision_model.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
756
+ "backbone.model.vision_model.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
757
+ "backbone.model.vision_model.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
758
+ "backbone.model.vision_model.vision_model.encoder.layers.7.layer_norm1.bias": "model-00001-of-00002.safetensors",
759
+ "backbone.model.vision_model.vision_model.encoder.layers.7.layer_norm1.weight": "model-00001-of-00002.safetensors",
760
+ "backbone.model.vision_model.vision_model.encoder.layers.7.layer_norm2.bias": "model-00001-of-00002.safetensors",
761
+ "backbone.model.vision_model.vision_model.encoder.layers.7.layer_norm2.weight": "model-00001-of-00002.safetensors",
762
+ "backbone.model.vision_model.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00001-of-00002.safetensors",
763
+ "backbone.model.vision_model.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00001-of-00002.safetensors",
764
+ "backbone.model.vision_model.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00001-of-00002.safetensors",
765
+ "backbone.model.vision_model.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00001-of-00002.safetensors",
766
+ "backbone.model.vision_model.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
767
+ "backbone.model.vision_model.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
768
+ "backbone.model.vision_model.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
769
+ "backbone.model.vision_model.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
770
+ "backbone.model.vision_model.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
771
+ "backbone.model.vision_model.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
772
+ "backbone.model.vision_model.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
773
+ "backbone.model.vision_model.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
774
+ "backbone.model.vision_model.vision_model.encoder.layers.8.layer_norm1.bias": "model-00001-of-00002.safetensors",
775
+ "backbone.model.vision_model.vision_model.encoder.layers.8.layer_norm1.weight": "model-00001-of-00002.safetensors",
776
+ "backbone.model.vision_model.vision_model.encoder.layers.8.layer_norm2.bias": "model-00001-of-00002.safetensors",
777
+ "backbone.model.vision_model.vision_model.encoder.layers.8.layer_norm2.weight": "model-00001-of-00002.safetensors",
778
+ "backbone.model.vision_model.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00001-of-00002.safetensors",
779
+ "backbone.model.vision_model.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00001-of-00002.safetensors",
780
+ "backbone.model.vision_model.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00001-of-00002.safetensors",
781
+ "backbone.model.vision_model.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00001-of-00002.safetensors",
782
+ "backbone.model.vision_model.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
783
+ "backbone.model.vision_model.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
784
+ "backbone.model.vision_model.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
785
+ "backbone.model.vision_model.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
786
+ "backbone.model.vision_model.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
787
+ "backbone.model.vision_model.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
788
+ "backbone.model.vision_model.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
789
+ "backbone.model.vision_model.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
790
+ "backbone.model.vision_model.vision_model.encoder.layers.9.layer_norm1.bias": "model-00001-of-00002.safetensors",
791
+ "backbone.model.vision_model.vision_model.encoder.layers.9.layer_norm1.weight": "model-00001-of-00002.safetensors",
792
+ "backbone.model.vision_model.vision_model.encoder.layers.9.layer_norm2.bias": "model-00001-of-00002.safetensors",
793
+ "backbone.model.vision_model.vision_model.encoder.layers.9.layer_norm2.weight": "model-00001-of-00002.safetensors",
794
+ "backbone.model.vision_model.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00001-of-00002.safetensors",
795
+ "backbone.model.vision_model.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00001-of-00002.safetensors",
796
+ "backbone.model.vision_model.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00001-of-00002.safetensors",
797
+ "backbone.model.vision_model.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00001-of-00002.safetensors",
798
+ "backbone.model.vision_model.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
799
+ "backbone.model.vision_model.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
800
+ "backbone.model.vision_model.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors",
801
+ "backbone.model.vision_model.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors",
802
+ "backbone.model.vision_model.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
803
+ "backbone.model.vision_model.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
804
+ "backbone.model.vision_model.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
805
+ "backbone.model.vision_model.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
806
+ "backbone.model.vision_model.vision_model.post_layernorm.bias": "model-00001-of-00002.safetensors",
807
+ "backbone.model.vision_model.vision_model.post_layernorm.weight": "model-00001-of-00002.safetensors"
808
+ }
809
+ }
rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:440b779caee2638b0c474e0a1f791d2efc0da35a17eb3d2842994abc6c1ebb0e
3
+ size 15920
rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc85a86606da4200037fe35521f9599893824d0e980ee919bd426822e3a6031f
3
+ size 15920
rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6637358b9ecab65df49b5bcdda48d0436984bec49981586f92d6bdd41b2e8870
3
+ size 15920
rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fd846e7c69e2f8ef308862b1e00e25ace247006beb5998eb698020282acb0ba
3
+ size 15920
rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e54277bf919e1e1bf358381927fe81c5e02026df9dce05d96cd0f7c04c08ae
3
+ size 15920
rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9387b1f7acedd8b004e3fcece5c4d68db52e34b379ce49564e37f99f55b61e5c
3
+ size 15920
rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8288ec39fb5a8fcedefa3507843ad5bb6ac1be94afd2dd8be9872cb018ce11e4
3
+ size 15920
rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa60440959a734f9e28124a169238bcfa11117c1d83c4eb08105a1d52d0fca50
3
+ size 15920
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc2283e2dd86ce3dc599e40b0a747196b1935f53cd2e88f5082ea97f7875481
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,2133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 8.90207715133531,
5
+ "eval_steps": 500,
6
+ "global_step": 3000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02967359050445104,
13
+ "grad_norm": 1.9033336639404297,
14
+ "learning_rate": 2.0000000000000003e-06,
15
+ "loss": 0.2317,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.05934718100890208,
20
+ "grad_norm": 1.0404284000396729,
21
+ "learning_rate": 4.000000000000001e-06,
22
+ "loss": 0.1977,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.08902077151335312,
27
+ "grad_norm": 0.6579734683036804,
28
+ "learning_rate": 6e-06,
29
+ "loss": 0.1451,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.11869436201780416,
34
+ "grad_norm": 0.33155006170272827,
35
+ "learning_rate": 8.000000000000001e-06,
36
+ "loss": 0.0959,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.14836795252225518,
41
+ "grad_norm": 0.5317391753196716,
42
+ "learning_rate": 1e-05,
43
+ "loss": 0.0828,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.17804154302670624,
48
+ "grad_norm": 0.45179909467697144,
49
+ "learning_rate": 1.2e-05,
50
+ "loss": 0.0814,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.20771513353115728,
55
+ "grad_norm": 0.2707938849925995,
56
+ "learning_rate": 1.4000000000000001e-05,
57
+ "loss": 0.0562,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.23738872403560832,
62
+ "grad_norm": 0.22402559220790863,
63
+ "learning_rate": 1.6000000000000003e-05,
64
+ "loss": 0.0594,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.26706231454005935,
69
+ "grad_norm": 0.16533811390399933,
70
+ "learning_rate": 1.8e-05,
71
+ "loss": 0.0529,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.29673590504451036,
76
+ "grad_norm": 0.222530797123909,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0522,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.3264094955489614,
83
+ "grad_norm": 0.1894129067659378,
84
+ "learning_rate": 2.2000000000000003e-05,
85
+ "loss": 0.0514,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.3560830860534125,
90
+ "grad_norm": 0.20559543371200562,
91
+ "learning_rate": 2.4e-05,
92
+ "loss": 0.0462,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.3857566765578635,
97
+ "grad_norm": 0.157830610871315,
98
+ "learning_rate": 2.6000000000000002e-05,
99
+ "loss": 0.0471,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.41543026706231456,
104
+ "grad_norm": 0.14663924276828766,
105
+ "learning_rate": 2.8000000000000003e-05,
106
+ "loss": 0.0449,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.44510385756676557,
111
+ "grad_norm": 0.14772620797157288,
112
+ "learning_rate": 3e-05,
113
+ "loss": 0.0424,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.47477744807121663,
118
+ "grad_norm": 0.16058433055877686,
119
+ "learning_rate": 3.2000000000000005e-05,
120
+ "loss": 0.0424,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.5044510385756676,
125
+ "grad_norm": 0.15857172012329102,
126
+ "learning_rate": 3.4000000000000007e-05,
127
+ "loss": 0.041,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.5341246290801187,
132
+ "grad_norm": 0.17435680329799652,
133
+ "learning_rate": 3.6e-05,
134
+ "loss": 0.0408,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.5637982195845698,
139
+ "grad_norm": 0.1439993977546692,
140
+ "learning_rate": 3.8e-05,
141
+ "loss": 0.0352,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.5934718100890207,
146
+ "grad_norm": 0.15629075467586517,
147
+ "learning_rate": 4e-05,
148
+ "loss": 0.0383,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.6231454005934718,
153
+ "grad_norm": 0.1610369235277176,
154
+ "learning_rate": 4.2e-05,
155
+ "loss": 0.0392,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.6528189910979229,
160
+ "grad_norm": 0.17589861154556274,
161
+ "learning_rate": 4.4000000000000006e-05,
162
+ "loss": 0.0374,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.6824925816023739,
167
+ "grad_norm": 0.19186066091060638,
168
+ "learning_rate": 4.600000000000001e-05,
169
+ "loss": 0.0358,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.712166172106825,
174
+ "grad_norm": 0.1579175740480423,
175
+ "learning_rate": 4.8e-05,
176
+ "loss": 0.0357,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.7418397626112759,
181
+ "grad_norm": 0.17220136523246765,
182
+ "learning_rate": 5e-05,
183
+ "loss": 0.0334,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.771513353115727,
188
+ "grad_norm": 0.18591266870498657,
189
+ "learning_rate": 5.2000000000000004e-05,
190
+ "loss": 0.0315,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.8011869436201781,
195
+ "grad_norm": 0.2341579794883728,
196
+ "learning_rate": 5.4000000000000005e-05,
197
+ "loss": 0.0375,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.8308605341246291,
202
+ "grad_norm": 0.15227168798446655,
203
+ "learning_rate": 5.6000000000000006e-05,
204
+ "loss": 0.031,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.8605341246290801,
209
+ "grad_norm": 0.1876339167356491,
210
+ "learning_rate": 5.8e-05,
211
+ "loss": 0.0371,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.8902077151335311,
216
+ "grad_norm": 0.1789393573999405,
217
+ "learning_rate": 6e-05,
218
+ "loss": 0.0313,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 0.9198813056379822,
223
+ "grad_norm": 0.1678636074066162,
224
+ "learning_rate": 6.2e-05,
225
+ "loss": 0.0349,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 0.9495548961424333,
230
+ "grad_norm": 0.17457032203674316,
231
+ "learning_rate": 6.400000000000001e-05,
232
+ "loss": 0.0296,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 0.9792284866468842,
237
+ "grad_norm": 0.14290577173233032,
238
+ "learning_rate": 6.6e-05,
239
+ "loss": 0.0308,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 1.0089020771513353,
244
+ "grad_norm": 0.23601128160953522,
245
+ "learning_rate": 6.800000000000001e-05,
246
+ "loss": 0.0298,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 1.0385756676557865,
251
+ "grad_norm": 0.14039042592048645,
252
+ "learning_rate": 7e-05,
253
+ "loss": 0.0262,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 1.0682492581602374,
258
+ "grad_norm": 0.1804966777563095,
259
+ "learning_rate": 7.2e-05,
260
+ "loss": 0.0284,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 1.0979228486646884,
265
+ "grad_norm": 0.22986947000026703,
266
+ "learning_rate": 7.4e-05,
267
+ "loss": 0.0308,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 1.1275964391691395,
272
+ "grad_norm": 0.20188020169734955,
273
+ "learning_rate": 7.6e-05,
274
+ "loss": 0.0261,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 1.1572700296735905,
279
+ "grad_norm": 0.14067409932613373,
280
+ "learning_rate": 7.800000000000001e-05,
281
+ "loss": 0.028,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 1.1869436201780414,
286
+ "grad_norm": 0.16516339778900146,
287
+ "learning_rate": 8e-05,
288
+ "loss": 0.0247,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 1.2166172106824926,
293
+ "grad_norm": 0.19918474555015564,
294
+ "learning_rate": 8.2e-05,
295
+ "loss": 0.0301,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 1.2462908011869436,
300
+ "grad_norm": 0.1878385990858078,
301
+ "learning_rate": 8.4e-05,
302
+ "loss": 0.0251,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 1.2759643916913945,
307
+ "grad_norm": 0.20107118785381317,
308
+ "learning_rate": 8.6e-05,
309
+ "loss": 0.0279,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 1.3056379821958457,
314
+ "grad_norm": 0.24616649746894836,
315
+ "learning_rate": 8.800000000000001e-05,
316
+ "loss": 0.0259,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 1.3353115727002967,
321
+ "grad_norm": 0.19029636681079865,
322
+ "learning_rate": 9e-05,
323
+ "loss": 0.0262,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 1.3649851632047478,
328
+ "grad_norm": 0.194508358836174,
329
+ "learning_rate": 9.200000000000001e-05,
330
+ "loss": 0.0275,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 1.3946587537091988,
335
+ "grad_norm": 0.20826251804828644,
336
+ "learning_rate": 9.4e-05,
337
+ "loss": 0.0289,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 1.4243323442136497,
342
+ "grad_norm": 0.13222843408584595,
343
+ "learning_rate": 9.6e-05,
344
+ "loss": 0.0249,
345
+ "step": 480
346
+ },
347
+ {
348
+ "epoch": 1.454005934718101,
349
+ "grad_norm": 0.13967235386371613,
350
+ "learning_rate": 9.8e-05,
351
+ "loss": 0.0231,
352
+ "step": 490
353
+ },
354
+ {
355
+ "epoch": 1.4836795252225519,
356
+ "grad_norm": 0.21556402742862701,
357
+ "learning_rate": 0.0001,
358
+ "loss": 0.0232,
359
+ "step": 500
360
+ },
361
+ {
362
+ "epoch": 1.513353115727003,
363
+ "grad_norm": 0.2407234013080597,
364
+ "learning_rate": 9.999972660400536e-05,
365
+ "loss": 0.025,
366
+ "step": 510
367
+ },
368
+ {
369
+ "epoch": 1.543026706231454,
370
+ "grad_norm": 0.1544090360403061,
371
+ "learning_rate": 9.999890641901125e-05,
372
+ "loss": 0.0224,
373
+ "step": 520
374
+ },
375
+ {
376
+ "epoch": 1.572700296735905,
377
+ "grad_norm": 0.1930345594882965,
378
+ "learning_rate": 9.999753945398704e-05,
379
+ "loss": 0.0244,
380
+ "step": 530
381
+ },
382
+ {
383
+ "epoch": 1.6023738872403561,
384
+ "grad_norm": 0.2288358211517334,
385
+ "learning_rate": 9.99956257238817e-05,
386
+ "loss": 0.0223,
387
+ "step": 540
388
+ },
389
+ {
390
+ "epoch": 1.632047477744807,
391
+ "grad_norm": 0.2028588354587555,
392
+ "learning_rate": 9.999316524962345e-05,
393
+ "loss": 0.022,
394
+ "step": 550
395
+ },
396
+ {
397
+ "epoch": 1.6617210682492582,
398
+ "grad_norm": 0.17989283800125122,
399
+ "learning_rate": 9.999015805811965e-05,
400
+ "loss": 0.0201,
401
+ "step": 560
402
+ },
403
+ {
404
+ "epoch": 1.6913946587537092,
405
+ "grad_norm": 0.20576386153697968,
406
+ "learning_rate": 9.998660418225645e-05,
407
+ "loss": 0.0211,
408
+ "step": 570
409
+ },
410
+ {
411
+ "epoch": 1.7210682492581602,
412
+ "grad_norm": 0.1263924539089203,
413
+ "learning_rate": 9.998250366089848e-05,
414
+ "loss": 0.0205,
415
+ "step": 580
416
+ },
417
+ {
418
+ "epoch": 1.7507418397626113,
419
+ "grad_norm": 0.23239193856716156,
420
+ "learning_rate": 9.997785653888835e-05,
421
+ "loss": 0.0223,
422
+ "step": 590
423
+ },
424
+ {
425
+ "epoch": 1.7804154302670623,
426
+ "grad_norm": 0.15964201092720032,
427
+ "learning_rate": 9.997266286704631e-05,
428
+ "loss": 0.0236,
429
+ "step": 600
430
+ },
431
+ {
432
+ "epoch": 1.8100890207715135,
433
+ "grad_norm": 0.1998339742422104,
434
+ "learning_rate": 9.996692270216947e-05,
435
+ "loss": 0.0254,
436
+ "step": 610
437
+ },
438
+ {
439
+ "epoch": 1.8397626112759644,
440
+ "grad_norm": 0.25945162773132324,
441
+ "learning_rate": 9.996063610703137e-05,
442
+ "loss": 0.0207,
443
+ "step": 620
444
+ },
445
+ {
446
+ "epoch": 1.8694362017804154,
447
+ "grad_norm": 0.1687825620174408,
448
+ "learning_rate": 9.995380315038119e-05,
449
+ "loss": 0.0169,
450
+ "step": 630
451
+ },
452
+ {
453
+ "epoch": 1.8991097922848663,
454
+ "grad_norm": 0.13454881310462952,
455
+ "learning_rate": 9.994642390694308e-05,
456
+ "loss": 0.0185,
457
+ "step": 640
458
+ },
459
+ {
460
+ "epoch": 1.9287833827893175,
461
+ "grad_norm": 0.1880808174610138,
462
+ "learning_rate": 9.993849845741524e-05,
463
+ "loss": 0.0199,
464
+ "step": 650
465
+ },
466
+ {
467
+ "epoch": 1.9584569732937687,
468
+ "grad_norm": 0.17725640535354614,
469
+ "learning_rate": 9.993002688846913e-05,
470
+ "loss": 0.019,
471
+ "step": 660
472
+ },
473
+ {
474
+ "epoch": 1.9881305637982196,
475
+ "grad_norm": 0.17680853605270386,
476
+ "learning_rate": 9.992100929274846e-05,
477
+ "loss": 0.0257,
478
+ "step": 670
479
+ },
480
+ {
481
+ "epoch": 2.0178041543026706,
482
+ "grad_norm": 0.18133607506752014,
483
+ "learning_rate": 9.991144576886823e-05,
484
+ "loss": 0.0222,
485
+ "step": 680
486
+ },
487
+ {
488
+ "epoch": 2.0474777448071215,
489
+ "grad_norm": 0.21174193918704987,
490
+ "learning_rate": 9.990133642141359e-05,
491
+ "loss": 0.0192,
492
+ "step": 690
493
+ },
494
+ {
495
+ "epoch": 2.077151335311573,
496
+ "grad_norm": 0.15766288340091705,
497
+ "learning_rate": 9.989068136093873e-05,
498
+ "loss": 0.0199,
499
+ "step": 700
500
+ },
501
+ {
502
+ "epoch": 2.106824925816024,
503
+ "grad_norm": 0.19250448048114777,
504
+ "learning_rate": 9.987948070396571e-05,
505
+ "loss": 0.0252,
506
+ "step": 710
507
+ },
508
+ {
509
+ "epoch": 2.136498516320475,
510
+ "grad_norm": 0.1412709653377533,
511
+ "learning_rate": 9.986773457298311e-05,
512
+ "loss": 0.0186,
513
+ "step": 720
514
+ },
515
+ {
516
+ "epoch": 2.166172106824926,
517
+ "grad_norm": 0.2136259824037552,
518
+ "learning_rate": 9.985544309644475e-05,
519
+ "loss": 0.0204,
520
+ "step": 730
521
+ },
522
+ {
523
+ "epoch": 2.1958456973293767,
524
+ "grad_norm": 0.10815251618623734,
525
+ "learning_rate": 9.984260640876821e-05,
526
+ "loss": 0.0153,
527
+ "step": 740
528
+ },
529
+ {
530
+ "epoch": 2.2255192878338277,
531
+ "grad_norm": 0.14663298428058624,
532
+ "learning_rate": 9.98292246503335e-05,
533
+ "loss": 0.0189,
534
+ "step": 750
535
+ },
536
+ {
537
+ "epoch": 2.255192878338279,
538
+ "grad_norm": 0.18529076874256134,
539
+ "learning_rate": 9.981529796748134e-05,
540
+ "loss": 0.0155,
541
+ "step": 760
542
+ },
543
+ {
544
+ "epoch": 2.28486646884273,
545
+ "grad_norm": 0.2349974811077118,
546
+ "learning_rate": 9.980082651251175e-05,
547
+ "loss": 0.0172,
548
+ "step": 770
549
+ },
550
+ {
551
+ "epoch": 2.314540059347181,
552
+ "grad_norm": 0.10778886079788208,
553
+ "learning_rate": 9.97858104436822e-05,
554
+ "loss": 0.0161,
555
+ "step": 780
556
+ },
557
+ {
558
+ "epoch": 2.344213649851632,
559
+ "grad_norm": 0.15675969421863556,
560
+ "learning_rate": 9.977024992520602e-05,
561
+ "loss": 0.0165,
562
+ "step": 790
563
+ },
564
+ {
565
+ "epoch": 2.373887240356083,
566
+ "grad_norm": 0.23468513786792755,
567
+ "learning_rate": 9.975414512725057e-05,
568
+ "loss": 0.0196,
569
+ "step": 800
570
+ },
571
+ {
572
+ "epoch": 2.4035608308605343,
573
+ "grad_norm": 0.1332869678735733,
574
+ "learning_rate": 9.973749622593534e-05,
575
+ "loss": 0.0193,
576
+ "step": 810
577
+ },
578
+ {
579
+ "epoch": 2.4332344213649852,
580
+ "grad_norm": 0.1406887024641037,
581
+ "learning_rate": 9.972030340333001e-05,
582
+ "loss": 0.0186,
583
+ "step": 820
584
+ },
585
+ {
586
+ "epoch": 2.462908011869436,
587
+ "grad_norm": 0.11544730514287949,
588
+ "learning_rate": 9.970256684745258e-05,
589
+ "loss": 0.0195,
590
+ "step": 830
591
+ },
592
+ {
593
+ "epoch": 2.492581602373887,
594
+ "grad_norm": 0.19476240873336792,
595
+ "learning_rate": 9.968428675226714e-05,
596
+ "loss": 0.0171,
597
+ "step": 840
598
+ },
599
+ {
600
+ "epoch": 2.5222551928783385,
601
+ "grad_norm": 0.22309833765029907,
602
+ "learning_rate": 9.966546331768191e-05,
603
+ "loss": 0.0223,
604
+ "step": 850
605
+ },
606
+ {
607
+ "epoch": 2.551928783382789,
608
+ "grad_norm": 0.2214643657207489,
609
+ "learning_rate": 9.964609674954696e-05,
610
+ "loss": 0.019,
611
+ "step": 860
612
+ },
613
+ {
614
+ "epoch": 2.5816023738872405,
615
+ "grad_norm": 0.2298765480518341,
616
+ "learning_rate": 9.962618725965196e-05,
617
+ "loss": 0.0178,
618
+ "step": 870
619
+ },
620
+ {
621
+ "epoch": 2.6112759643916914,
622
+ "grad_norm": 0.1542595475912094,
623
+ "learning_rate": 9.96057350657239e-05,
624
+ "loss": 0.0152,
625
+ "step": 880
626
+ },
627
+ {
628
+ "epoch": 2.6409495548961424,
629
+ "grad_norm": 0.12994691729545593,
630
+ "learning_rate": 9.95847403914247e-05,
631
+ "loss": 0.0153,
632
+ "step": 890
633
+ },
634
+ {
635
+ "epoch": 2.6706231454005933,
636
+ "grad_norm": 0.1726643294095993,
637
+ "learning_rate": 9.956320346634876e-05,
638
+ "loss": 0.0219,
639
+ "step": 900
640
+ },
641
+ {
642
+ "epoch": 2.7002967359050443,
643
+ "grad_norm": 0.18292242288589478,
644
+ "learning_rate": 9.954112452602045e-05,
645
+ "loss": 0.0137,
646
+ "step": 910
647
+ },
648
+ {
649
+ "epoch": 2.7299703264094957,
650
+ "grad_norm": 0.19749417901039124,
651
+ "learning_rate": 9.95185038118915e-05,
652
+ "loss": 0.0179,
653
+ "step": 920
654
+ },
655
+ {
656
+ "epoch": 2.7596439169139466,
657
+ "grad_norm": 0.17808304727077484,
658
+ "learning_rate": 9.949534157133844e-05,
659
+ "loss": 0.0155,
660
+ "step": 930
661
+ },
662
+ {
663
+ "epoch": 2.7893175074183976,
664
+ "grad_norm": 0.13589969277381897,
665
+ "learning_rate": 9.94716380576598e-05,
666
+ "loss": 0.0143,
667
+ "step": 940
668
+ },
669
+ {
670
+ "epoch": 2.8189910979228485,
671
+ "grad_norm": 0.17047159373760223,
672
+ "learning_rate": 9.944739353007344e-05,
673
+ "loss": 0.0211,
674
+ "step": 950
675
+ },
676
+ {
677
+ "epoch": 2.8486646884272995,
678
+ "grad_norm": 0.15535619854927063,
679
+ "learning_rate": 9.942260825371358e-05,
680
+ "loss": 0.0132,
681
+ "step": 960
682
+ },
683
+ {
684
+ "epoch": 2.878338278931751,
685
+ "grad_norm": 0.1609240472316742,
686
+ "learning_rate": 9.939728249962807e-05,
687
+ "loss": 0.0145,
688
+ "step": 970
689
+ },
690
+ {
691
+ "epoch": 2.908011869436202,
692
+ "grad_norm": 0.20487530529499054,
693
+ "learning_rate": 9.937141654477528e-05,
694
+ "loss": 0.0172,
695
+ "step": 980
696
+ },
697
+ {
698
+ "epoch": 2.9376854599406528,
699
+ "grad_norm": 0.12371553480625153,
700
+ "learning_rate": 9.934501067202117e-05,
701
+ "loss": 0.0191,
702
+ "step": 990
703
+ },
704
+ {
705
+ "epoch": 2.9673590504451037,
706
+ "grad_norm": 0.15513016283512115,
707
+ "learning_rate": 9.931806517013612e-05,
708
+ "loss": 0.0158,
709
+ "step": 1000
710
+ },
711
+ {
712
+ "epoch": 2.9970326409495547,
713
+ "grad_norm": 0.1723584234714508,
714
+ "learning_rate": 9.929058033379181e-05,
715
+ "loss": 0.0139,
716
+ "step": 1010
717
+ },
718
+ {
719
+ "epoch": 3.026706231454006,
720
+ "grad_norm": 0.1482209414243698,
721
+ "learning_rate": 9.926255646355804e-05,
722
+ "loss": 0.0148,
723
+ "step": 1020
724
+ },
725
+ {
726
+ "epoch": 3.056379821958457,
727
+ "grad_norm": 0.15559454262256622,
728
+ "learning_rate": 9.923399386589933e-05,
729
+ "loss": 0.0147,
730
+ "step": 1030
731
+ },
732
+ {
733
+ "epoch": 3.086053412462908,
734
+ "grad_norm": 0.22350917756557465,
735
+ "learning_rate": 9.92048928531717e-05,
736
+ "loss": 0.0157,
737
+ "step": 1040
738
+ },
739
+ {
740
+ "epoch": 3.115727002967359,
741
+ "grad_norm": 0.1581738144159317,
742
+ "learning_rate": 9.917525374361912e-05,
743
+ "loss": 0.0133,
744
+ "step": 1050
745
+ },
746
+ {
747
+ "epoch": 3.14540059347181,
748
+ "grad_norm": 0.20087914168834686,
749
+ "learning_rate": 9.914507686137019e-05,
750
+ "loss": 0.0208,
751
+ "step": 1060
752
+ },
753
+ {
754
+ "epoch": 3.1750741839762613,
755
+ "grad_norm": 0.16003265976905823,
756
+ "learning_rate": 9.911436253643445e-05,
757
+ "loss": 0.0168,
758
+ "step": 1070
759
+ },
760
+ {
761
+ "epoch": 3.2047477744807122,
762
+ "grad_norm": 0.15235169231891632,
763
+ "learning_rate": 9.90831111046988e-05,
764
+ "loss": 0.0168,
765
+ "step": 1080
766
+ },
767
+ {
768
+ "epoch": 3.234421364985163,
769
+ "grad_norm": 0.15660665929317474,
770
+ "learning_rate": 9.905132290792394e-05,
771
+ "loss": 0.0106,
772
+ "step": 1090
773
+ },
774
+ {
775
+ "epoch": 3.264094955489614,
776
+ "grad_norm": 0.16853424906730652,
777
+ "learning_rate": 9.901899829374047e-05,
778
+ "loss": 0.0149,
779
+ "step": 1100
780
+ },
781
+ {
782
+ "epoch": 3.293768545994065,
783
+ "grad_norm": 0.1335846185684204,
784
+ "learning_rate": 9.89861376156452e-05,
785
+ "loss": 0.0168,
786
+ "step": 1110
787
+ },
788
+ {
789
+ "epoch": 3.3234421364985165,
790
+ "grad_norm": 0.20238997042179108,
791
+ "learning_rate": 9.895274123299723e-05,
792
+ "loss": 0.0154,
793
+ "step": 1120
794
+ },
795
+ {
796
+ "epoch": 3.3531157270029674,
797
+ "grad_norm": 0.22216491401195526,
798
+ "learning_rate": 9.891880951101407e-05,
799
+ "loss": 0.019,
800
+ "step": 1130
801
+ },
802
+ {
803
+ "epoch": 3.3827893175074184,
804
+ "grad_norm": 0.2017626017332077,
805
+ "learning_rate": 9.888434282076758e-05,
806
+ "loss": 0.017,
807
+ "step": 1140
808
+ },
809
+ {
810
+ "epoch": 3.4124629080118694,
811
+ "grad_norm": 0.18049117922782898,
812
+ "learning_rate": 9.884934153917997e-05,
813
+ "loss": 0.0163,
814
+ "step": 1150
815
+ },
816
+ {
817
+ "epoch": 3.4421364985163203,
818
+ "grad_norm": 0.28145721554756165,
819
+ "learning_rate": 9.881380604901964e-05,
820
+ "loss": 0.0166,
821
+ "step": 1160
822
+ },
823
+ {
824
+ "epoch": 3.4718100890207717,
825
+ "grad_norm": 0.2356300801038742,
826
+ "learning_rate": 9.877773673889701e-05,
827
+ "loss": 0.0155,
828
+ "step": 1170
829
+ },
830
+ {
831
+ "epoch": 3.5014836795252227,
832
+ "grad_norm": 0.24113395810127258,
833
+ "learning_rate": 9.87411340032603e-05,
834
+ "loss": 0.0156,
835
+ "step": 1180
836
+ },
837
+ {
838
+ "epoch": 3.5311572700296736,
839
+ "grad_norm": 0.18665863573551178,
840
+ "learning_rate": 9.870399824239117e-05,
841
+ "loss": 0.0159,
842
+ "step": 1190
843
+ },
844
+ {
845
+ "epoch": 3.5608308605341246,
846
+ "grad_norm": 0.16171567142009735,
847
+ "learning_rate": 9.86663298624003e-05,
848
+ "loss": 0.0167,
849
+ "step": 1200
850
+ },
851
+ {
852
+ "epoch": 3.5905044510385755,
853
+ "grad_norm": 0.17315839231014252,
854
+ "learning_rate": 9.862812927522309e-05,
855
+ "loss": 0.017,
856
+ "step": 1210
857
+ },
858
+ {
859
+ "epoch": 3.620178041543027,
860
+ "grad_norm": 0.14727933704853058,
861
+ "learning_rate": 9.858939689861506e-05,
862
+ "loss": 0.0132,
863
+ "step": 1220
864
+ },
865
+ {
866
+ "epoch": 3.649851632047478,
867
+ "grad_norm": 0.1552547812461853,
868
+ "learning_rate": 9.855013315614725e-05,
869
+ "loss": 0.0175,
870
+ "step": 1230
871
+ },
872
+ {
873
+ "epoch": 3.679525222551929,
874
+ "grad_norm": 0.1715100109577179,
875
+ "learning_rate": 9.851033847720166e-05,
876
+ "loss": 0.0139,
877
+ "step": 1240
878
+ },
879
+ {
880
+ "epoch": 3.7091988130563798,
881
+ "grad_norm": 0.16414763033390045,
882
+ "learning_rate": 9.847001329696653e-05,
883
+ "loss": 0.0189,
884
+ "step": 1250
885
+ },
886
+ {
887
+ "epoch": 3.7388724035608307,
888
+ "grad_norm": 0.1251063048839569,
889
+ "learning_rate": 9.842915805643155e-05,
890
+ "loss": 0.0145,
891
+ "step": 1260
892
+ },
893
+ {
894
+ "epoch": 3.768545994065282,
895
+ "grad_norm": 0.17011059820652008,
896
+ "learning_rate": 9.838777320238312e-05,
897
+ "loss": 0.0161,
898
+ "step": 1270
899
+ },
900
+ {
901
+ "epoch": 3.798219584569733,
902
+ "grad_norm": 0.14429537951946259,
903
+ "learning_rate": 9.834585918739936e-05,
904
+ "loss": 0.0159,
905
+ "step": 1280
906
+ },
907
+ {
908
+ "epoch": 3.827893175074184,
909
+ "grad_norm": 0.138567715883255,
910
+ "learning_rate": 9.830341646984521e-05,
911
+ "loss": 0.0175,
912
+ "step": 1290
913
+ },
914
+ {
915
+ "epoch": 3.857566765578635,
916
+ "grad_norm": 0.08295896649360657,
917
+ "learning_rate": 9.826044551386744e-05,
918
+ "loss": 0.0145,
919
+ "step": 1300
920
+ },
921
+ {
922
+ "epoch": 3.887240356083086,
923
+ "grad_norm": 0.0911448523402214,
924
+ "learning_rate": 9.821694678938953e-05,
925
+ "loss": 0.0134,
926
+ "step": 1310
927
+ },
928
+ {
929
+ "epoch": 3.9169139465875373,
930
+ "grad_norm": 0.14157798886299133,
931
+ "learning_rate": 9.817292077210659e-05,
932
+ "loss": 0.0158,
933
+ "step": 1320
934
+ },
935
+ {
936
+ "epoch": 3.9465875370919883,
937
+ "grad_norm": 0.17415288090705872,
938
+ "learning_rate": 9.812836794348004e-05,
939
+ "loss": 0.0125,
940
+ "step": 1330
941
+ },
942
+ {
943
+ "epoch": 3.9762611275964392,
944
+ "grad_norm": 0.22007249295711517,
945
+ "learning_rate": 9.808328879073251e-05,
946
+ "loss": 0.0153,
947
+ "step": 1340
948
+ },
949
+ {
950
+ "epoch": 4.005934718100891,
951
+ "grad_norm": 0.144961416721344,
952
+ "learning_rate": 9.803768380684242e-05,
953
+ "loss": 0.0117,
954
+ "step": 1350
955
+ },
956
+ {
957
+ "epoch": 4.035608308605341,
958
+ "grad_norm": 0.14304885268211365,
959
+ "learning_rate": 9.799155349053851e-05,
960
+ "loss": 0.0138,
961
+ "step": 1360
962
+ },
963
+ {
964
+ "epoch": 4.0652818991097925,
965
+ "grad_norm": 0.18843571841716766,
966
+ "learning_rate": 9.794489834629455e-05,
967
+ "loss": 0.0106,
968
+ "step": 1370
969
+ },
970
+ {
971
+ "epoch": 4.094955489614243,
972
+ "grad_norm": 0.16858817636966705,
973
+ "learning_rate": 9.789771888432375e-05,
974
+ "loss": 0.0138,
975
+ "step": 1380
976
+ },
977
+ {
978
+ "epoch": 4.1246290801186944,
979
+ "grad_norm": 0.19177594780921936,
980
+ "learning_rate": 9.785001562057309e-05,
981
+ "loss": 0.0114,
982
+ "step": 1390
983
+ },
984
+ {
985
+ "epoch": 4.154302670623146,
986
+ "grad_norm": 0.20158767700195312,
987
+ "learning_rate": 9.780178907671789e-05,
988
+ "loss": 0.0146,
989
+ "step": 1400
990
+ },
991
+ {
992
+ "epoch": 4.183976261127596,
993
+ "grad_norm": 0.17675232887268066,
994
+ "learning_rate": 9.775303978015585e-05,
995
+ "loss": 0.0116,
996
+ "step": 1410
997
+ },
998
+ {
999
+ "epoch": 4.213649851632048,
1000
+ "grad_norm": 0.20077385008335114,
1001
+ "learning_rate": 9.77037682640015e-05,
1002
+ "loss": 0.0172,
1003
+ "step": 1420
1004
+ },
1005
+ {
1006
+ "epoch": 4.243323442136498,
1007
+ "grad_norm": 0.17185665667057037,
1008
+ "learning_rate": 9.765397506708023e-05,
1009
+ "loss": 0.0138,
1010
+ "step": 1430
1011
+ },
1012
+ {
1013
+ "epoch": 4.27299703264095,
1014
+ "grad_norm": 0.1641971468925476,
1015
+ "learning_rate": 9.760366073392246e-05,
1016
+ "loss": 0.0145,
1017
+ "step": 1440
1018
+ },
1019
+ {
1020
+ "epoch": 4.302670623145401,
1021
+ "grad_norm": 0.13757102191448212,
1022
+ "learning_rate": 9.755282581475769e-05,
1023
+ "loss": 0.0158,
1024
+ "step": 1450
1025
+ },
1026
+ {
1027
+ "epoch": 4.332344213649852,
1028
+ "grad_norm": 0.18012432754039764,
1029
+ "learning_rate": 9.750147086550844e-05,
1030
+ "loss": 0.0139,
1031
+ "step": 1460
1032
+ },
1033
+ {
1034
+ "epoch": 4.362017804154303,
1035
+ "grad_norm": 0.1639927178621292,
1036
+ "learning_rate": 9.744959644778422e-05,
1037
+ "loss": 0.0142,
1038
+ "step": 1470
1039
+ },
1040
+ {
1041
+ "epoch": 4.3916913946587535,
1042
+ "grad_norm": 0.17392724752426147,
1043
+ "learning_rate": 9.739720312887535e-05,
1044
+ "loss": 0.018,
1045
+ "step": 1480
1046
+ },
1047
+ {
1048
+ "epoch": 4.421364985163205,
1049
+ "grad_norm": 0.15744361281394958,
1050
+ "learning_rate": 9.734429148174675e-05,
1051
+ "loss": 0.0128,
1052
+ "step": 1490
1053
+ },
1054
+ {
1055
+ "epoch": 4.451038575667655,
1056
+ "grad_norm": 0.12954673171043396,
1057
+ "learning_rate": 9.729086208503174e-05,
1058
+ "loss": 0.0188,
1059
+ "step": 1500
1060
+ },
1061
+ {
1062
+ "epoch": 4.480712166172107,
1063
+ "grad_norm": 0.16809141635894775,
1064
+ "learning_rate": 9.723691552302562e-05,
1065
+ "loss": 0.0144,
1066
+ "step": 1510
1067
+ },
1068
+ {
1069
+ "epoch": 4.510385756676558,
1070
+ "grad_norm": 0.1523902416229248,
1071
+ "learning_rate": 9.718245238567939e-05,
1072
+ "loss": 0.0145,
1073
+ "step": 1520
1074
+ },
1075
+ {
1076
+ "epoch": 4.540059347181009,
1077
+ "grad_norm": 0.26666077971458435,
1078
+ "learning_rate": 9.712747326859315e-05,
1079
+ "loss": 0.015,
1080
+ "step": 1530
1081
+ },
1082
+ {
1083
+ "epoch": 4.56973293768546,
1084
+ "grad_norm": 0.193909153342247,
1085
+ "learning_rate": 9.707197877300974e-05,
1086
+ "loss": 0.0167,
1087
+ "step": 1540
1088
+ },
1089
+ {
1090
+ "epoch": 4.5994065281899115,
1091
+ "grad_norm": 0.17812030017375946,
1092
+ "learning_rate": 9.701596950580806e-05,
1093
+ "loss": 0.0157,
1094
+ "step": 1550
1095
+ },
1096
+ {
1097
+ "epoch": 4.629080118694362,
1098
+ "grad_norm": 0.26170411705970764,
1099
+ "learning_rate": 9.695944607949649e-05,
1100
+ "loss": 0.0139,
1101
+ "step": 1560
1102
+ },
1103
+ {
1104
+ "epoch": 4.658753709198813,
1105
+ "grad_norm": 0.14579689502716064,
1106
+ "learning_rate": 9.690240911220618e-05,
1107
+ "loss": 0.013,
1108
+ "step": 1570
1109
+ },
1110
+ {
1111
+ "epoch": 4.688427299703264,
1112
+ "grad_norm": 0.17479683458805084,
1113
+ "learning_rate": 9.684485922768422e-05,
1114
+ "loss": 0.014,
1115
+ "step": 1580
1116
+ },
1117
+ {
1118
+ "epoch": 4.718100890207715,
1119
+ "grad_norm": 0.15623094141483307,
1120
+ "learning_rate": 9.6786797055287e-05,
1121
+ "loss": 0.0151,
1122
+ "step": 1590
1123
+ },
1124
+ {
1125
+ "epoch": 4.747774480712166,
1126
+ "grad_norm": 0.1983657032251358,
1127
+ "learning_rate": 9.672822322997305e-05,
1128
+ "loss": 0.0138,
1129
+ "step": 1600
1130
+ },
1131
+ {
1132
+ "epoch": 4.777448071216617,
1133
+ "grad_norm": 0.13670101761817932,
1134
+ "learning_rate": 9.66691383922964e-05,
1135
+ "loss": 0.0114,
1136
+ "step": 1610
1137
+ },
1138
+ {
1139
+ "epoch": 4.807121661721069,
1140
+ "grad_norm": 0.13849210739135742,
1141
+ "learning_rate": 9.660954318839933e-05,
1142
+ "loss": 0.0105,
1143
+ "step": 1620
1144
+ },
1145
+ {
1146
+ "epoch": 4.836795252225519,
1147
+ "grad_norm": 0.13254909217357635,
1148
+ "learning_rate": 9.654943827000548e-05,
1149
+ "loss": 0.0113,
1150
+ "step": 1630
1151
+ },
1152
+ {
1153
+ "epoch": 4.8664688427299705,
1154
+ "grad_norm": 0.12954489886760712,
1155
+ "learning_rate": 9.648882429441257e-05,
1156
+ "loss": 0.0133,
1157
+ "step": 1640
1158
+ },
1159
+ {
1160
+ "epoch": 4.896142433234421,
1161
+ "grad_norm": 0.13290920853614807,
1162
+ "learning_rate": 9.642770192448536e-05,
1163
+ "loss": 0.0156,
1164
+ "step": 1650
1165
+ },
1166
+ {
1167
+ "epoch": 4.925816023738872,
1168
+ "grad_norm": 0.13683238625526428,
1169
+ "learning_rate": 9.636607182864827e-05,
1170
+ "loss": 0.0096,
1171
+ "step": 1660
1172
+ },
1173
+ {
1174
+ "epoch": 4.955489614243324,
1175
+ "grad_norm": 0.07962514460086823,
1176
+ "learning_rate": 9.630393468087818e-05,
1177
+ "loss": 0.0128,
1178
+ "step": 1670
1179
+ },
1180
+ {
1181
+ "epoch": 4.985163204747774,
1182
+ "grad_norm": 0.15497808158397675,
1183
+ "learning_rate": 9.624129116069694e-05,
1184
+ "loss": 0.014,
1185
+ "step": 1680
1186
+ },
1187
+ {
1188
+ "epoch": 5.014836795252226,
1189
+ "grad_norm": 0.1419367492198944,
1190
+ "learning_rate": 9.617814195316411e-05,
1191
+ "loss": 0.0132,
1192
+ "step": 1690
1193
+ },
1194
+ {
1195
+ "epoch": 5.044510385756676,
1196
+ "grad_norm": 0.2201174795627594,
1197
+ "learning_rate": 9.611448774886924e-05,
1198
+ "loss": 0.0133,
1199
+ "step": 1700
1200
+ },
1201
+ {
1202
+ "epoch": 5.074183976261128,
1203
+ "grad_norm": 0.16477946937084198,
1204
+ "learning_rate": 9.605032924392457e-05,
1205
+ "loss": 0.0131,
1206
+ "step": 1710
1207
+ },
1208
+ {
1209
+ "epoch": 5.103857566765579,
1210
+ "grad_norm": 0.19834354519844055,
1211
+ "learning_rate": 9.598566713995718e-05,
1212
+ "loss": 0.0154,
1213
+ "step": 1720
1214
+ },
1215
+ {
1216
+ "epoch": 5.1335311572700295,
1217
+ "grad_norm": 0.22880475223064423,
1218
+ "learning_rate": 9.59205021441015e-05,
1219
+ "loss": 0.014,
1220
+ "step": 1730
1221
+ },
1222
+ {
1223
+ "epoch": 5.163204747774481,
1224
+ "grad_norm": 0.16253937780857086,
1225
+ "learning_rate": 9.58548349689915e-05,
1226
+ "loss": 0.0122,
1227
+ "step": 1740
1228
+ },
1229
+ {
1230
+ "epoch": 5.192878338278931,
1231
+ "grad_norm": 0.22121521830558777,
1232
+ "learning_rate": 9.578866633275288e-05,
1233
+ "loss": 0.015,
1234
+ "step": 1750
1235
+ },
1236
+ {
1237
+ "epoch": 5.222551928783383,
1238
+ "grad_norm": 0.182882621884346,
1239
+ "learning_rate": 9.572199695899522e-05,
1240
+ "loss": 0.0163,
1241
+ "step": 1760
1242
+ },
1243
+ {
1244
+ "epoch": 5.252225519287834,
1245
+ "grad_norm": 0.17466451227664948,
1246
+ "learning_rate": 9.565482757680415e-05,
1247
+ "loss": 0.0145,
1248
+ "step": 1770
1249
+ },
1250
+ {
1251
+ "epoch": 5.281899109792285,
1252
+ "grad_norm": 0.12321746349334717,
1253
+ "learning_rate": 9.558715892073323e-05,
1254
+ "loss": 0.0127,
1255
+ "step": 1780
1256
+ },
1257
+ {
1258
+ "epoch": 5.311572700296736,
1259
+ "grad_norm": 0.1869288980960846,
1260
+ "learning_rate": 9.551899173079607e-05,
1261
+ "loss": 0.0185,
1262
+ "step": 1790
1263
+ },
1264
+ {
1265
+ "epoch": 5.341246290801187,
1266
+ "grad_norm": 0.1715238392353058,
1267
+ "learning_rate": 9.545032675245813e-05,
1268
+ "loss": 0.0136,
1269
+ "step": 1800
1270
+ },
1271
+ {
1272
+ "epoch": 5.370919881305638,
1273
+ "grad_norm": 0.20112700760364532,
1274
+ "learning_rate": 9.538116473662861e-05,
1275
+ "loss": 0.013,
1276
+ "step": 1810
1277
+ },
1278
+ {
1279
+ "epoch": 5.400593471810089,
1280
+ "grad_norm": 0.13722355663776398,
1281
+ "learning_rate": 9.531150643965223e-05,
1282
+ "loss": 0.0112,
1283
+ "step": 1820
1284
+ },
1285
+ {
1286
+ "epoch": 5.43026706231454,
1287
+ "grad_norm": 0.2131308764219284,
1288
+ "learning_rate": 9.524135262330098e-05,
1289
+ "loss": 0.012,
1290
+ "step": 1830
1291
+ },
1292
+ {
1293
+ "epoch": 5.459940652818991,
1294
+ "grad_norm": 0.13212528824806213,
1295
+ "learning_rate": 9.517070405476575e-05,
1296
+ "loss": 0.0112,
1297
+ "step": 1840
1298
+ },
1299
+ {
1300
+ "epoch": 5.489614243323442,
1301
+ "grad_norm": 0.1769159734249115,
1302
+ "learning_rate": 9.509956150664796e-05,
1303
+ "loss": 0.0122,
1304
+ "step": 1850
1305
+ },
1306
+ {
1307
+ "epoch": 5.519287833827893,
1308
+ "grad_norm": 0.15795955061912537,
1309
+ "learning_rate": 9.502792575695112e-05,
1310
+ "loss": 0.0152,
1311
+ "step": 1860
1312
+ },
1313
+ {
1314
+ "epoch": 5.548961424332344,
1315
+ "grad_norm": 0.1461247205734253,
1316
+ "learning_rate": 9.49557975890723e-05,
1317
+ "loss": 0.0138,
1318
+ "step": 1870
1319
+ },
1320
+ {
1321
+ "epoch": 5.578635014836795,
1322
+ "grad_norm": 0.12148546427488327,
1323
+ "learning_rate": 9.488317779179361e-05,
1324
+ "loss": 0.0105,
1325
+ "step": 1880
1326
+ },
1327
+ {
1328
+ "epoch": 5.6083086053412465,
1329
+ "grad_norm": 0.17779065668582916,
1330
+ "learning_rate": 9.481006715927351e-05,
1331
+ "loss": 0.0107,
1332
+ "step": 1890
1333
+ },
1334
+ {
1335
+ "epoch": 5.637982195845697,
1336
+ "grad_norm": 0.17717322707176208,
1337
+ "learning_rate": 9.473646649103818e-05,
1338
+ "loss": 0.0158,
1339
+ "step": 1900
1340
+ },
1341
+ {
1342
+ "epoch": 5.667655786350148,
1343
+ "grad_norm": 0.19820888340473175,
1344
+ "learning_rate": 9.46623765919727e-05,
1345
+ "loss": 0.0137,
1346
+ "step": 1910
1347
+ },
1348
+ {
1349
+ "epoch": 5.697329376854599,
1350
+ "grad_norm": 0.15721198916435242,
1351
+ "learning_rate": 9.458779827231237e-05,
1352
+ "loss": 0.0119,
1353
+ "step": 1920
1354
+ },
1355
+ {
1356
+ "epoch": 5.72700296735905,
1357
+ "grad_norm": 0.17158320546150208,
1358
+ "learning_rate": 9.451273234763371e-05,
1359
+ "loss": 0.0133,
1360
+ "step": 1930
1361
+ },
1362
+ {
1363
+ "epoch": 5.756676557863502,
1364
+ "grad_norm": 0.15745575726032257,
1365
+ "learning_rate": 9.443717963884569e-05,
1366
+ "loss": 0.0129,
1367
+ "step": 1940
1368
+ },
1369
+ {
1370
+ "epoch": 5.786350148367952,
1371
+ "grad_norm": 0.17795485258102417,
1372
+ "learning_rate": 9.43611409721806e-05,
1373
+ "loss": 0.013,
1374
+ "step": 1950
1375
+ },
1376
+ {
1377
+ "epoch": 5.816023738872404,
1378
+ "grad_norm": 0.18350425362586975,
1379
+ "learning_rate": 9.428461717918511e-05,
1380
+ "loss": 0.0103,
1381
+ "step": 1960
1382
+ },
1383
+ {
1384
+ "epoch": 5.845697329376854,
1385
+ "grad_norm": 0.1675357222557068,
1386
+ "learning_rate": 9.420760909671118e-05,
1387
+ "loss": 0.0149,
1388
+ "step": 1970
1389
+ },
1390
+ {
1391
+ "epoch": 5.8753709198813056,
1392
+ "grad_norm": 0.1395285427570343,
1393
+ "learning_rate": 9.413011756690685e-05,
1394
+ "loss": 0.0174,
1395
+ "step": 1980
1396
+ },
1397
+ {
1398
+ "epoch": 5.905044510385757,
1399
+ "grad_norm": 0.18694210052490234,
1400
+ "learning_rate": 9.405214343720707e-05,
1401
+ "loss": 0.011,
1402
+ "step": 1990
1403
+ },
1404
+ {
1405
+ "epoch": 5.9347181008902075,
1406
+ "grad_norm": 0.13509497046470642,
1407
+ "learning_rate": 9.397368756032445e-05,
1408
+ "loss": 0.0105,
1409
+ "step": 2000
1410
+ },
1411
+ {
1412
+ "epoch": 5.964391691394659,
1413
+ "grad_norm": 0.1707238107919693,
1414
+ "learning_rate": 9.389475079423988e-05,
1415
+ "loss": 0.0099,
1416
+ "step": 2010
1417
+ },
1418
+ {
1419
+ "epoch": 5.994065281899109,
1420
+ "grad_norm": 0.18797723948955536,
1421
+ "learning_rate": 9.381533400219318e-05,
1422
+ "loss": 0.0144,
1423
+ "step": 2020
1424
+ },
1425
+ {
1426
+ "epoch": 6.023738872403561,
1427
+ "grad_norm": 0.15263915061950684,
1428
+ "learning_rate": 9.373543805267368e-05,
1429
+ "loss": 0.0145,
1430
+ "step": 2030
1431
+ },
1432
+ {
1433
+ "epoch": 6.053412462908012,
1434
+ "grad_norm": 0.10786967724561691,
1435
+ "learning_rate": 9.365506381941066e-05,
1436
+ "loss": 0.0167,
1437
+ "step": 2040
1438
+ },
1439
+ {
1440
+ "epoch": 6.083086053412463,
1441
+ "grad_norm": 0.10059456527233124,
1442
+ "learning_rate": 9.357421218136386e-05,
1443
+ "loss": 0.0127,
1444
+ "step": 2050
1445
+ },
1446
+ {
1447
+ "epoch": 6.112759643916914,
1448
+ "grad_norm": 0.12447630614042282,
1449
+ "learning_rate": 9.349288402271388e-05,
1450
+ "loss": 0.0109,
1451
+ "step": 2060
1452
+ },
1453
+ {
1454
+ "epoch": 6.142433234421365,
1455
+ "grad_norm": 0.14649389684200287,
1456
+ "learning_rate": 9.341108023285238e-05,
1457
+ "loss": 0.0115,
1458
+ "step": 2070
1459
+ },
1460
+ {
1461
+ "epoch": 6.172106824925816,
1462
+ "grad_norm": 0.09072308987379074,
1463
+ "learning_rate": 9.332880170637252e-05,
1464
+ "loss": 0.0097,
1465
+ "step": 2080
1466
+ },
1467
+ {
1468
+ "epoch": 6.201780415430267,
1469
+ "grad_norm": 0.1700124889612198,
1470
+ "learning_rate": 9.32460493430591e-05,
1471
+ "loss": 0.0107,
1472
+ "step": 2090
1473
+ },
1474
+ {
1475
+ "epoch": 6.231454005934718,
1476
+ "grad_norm": 0.13447318971157074,
1477
+ "learning_rate": 9.316282404787871e-05,
1478
+ "loss": 0.0104,
1479
+ "step": 2100
1480
+ },
1481
+ {
1482
+ "epoch": 6.261127596439169,
1483
+ "grad_norm": 0.14388670027256012,
1484
+ "learning_rate": 9.30791267309698e-05,
1485
+ "loss": 0.0101,
1486
+ "step": 2110
1487
+ },
1488
+ {
1489
+ "epoch": 6.29080118694362,
1490
+ "grad_norm": 0.14514364302158356,
1491
+ "learning_rate": 9.299495830763286e-05,
1492
+ "loss": 0.0096,
1493
+ "step": 2120
1494
+ },
1495
+ {
1496
+ "epoch": 6.320474777448071,
1497
+ "grad_norm": 0.09969841688871384,
1498
+ "learning_rate": 9.291031969832026e-05,
1499
+ "loss": 0.0115,
1500
+ "step": 2130
1501
+ },
1502
+ {
1503
+ "epoch": 6.350148367952523,
1504
+ "grad_norm": 0.14247213304042816,
1505
+ "learning_rate": 9.282521182862629e-05,
1506
+ "loss": 0.0115,
1507
+ "step": 2140
1508
+ },
1509
+ {
1510
+ "epoch": 6.379821958456973,
1511
+ "grad_norm": 0.13603922724723816,
1512
+ "learning_rate": 9.273963562927695e-05,
1513
+ "loss": 0.0147,
1514
+ "step": 2150
1515
+ },
1516
+ {
1517
+ "epoch": 6.4094955489614245,
1518
+ "grad_norm": 0.23838986456394196,
1519
+ "learning_rate": 9.265359203611987e-05,
1520
+ "loss": 0.0115,
1521
+ "step": 2160
1522
+ },
1523
+ {
1524
+ "epoch": 6.439169139465875,
1525
+ "grad_norm": 0.11037889868021011,
1526
+ "learning_rate": 9.256708199011401e-05,
1527
+ "loss": 0.0096,
1528
+ "step": 2170
1529
+ },
1530
+ {
1531
+ "epoch": 6.468842729970326,
1532
+ "grad_norm": 0.15438543260097504,
1533
+ "learning_rate": 9.248010643731935e-05,
1534
+ "loss": 0.0126,
1535
+ "step": 2180
1536
+ },
1537
+ {
1538
+ "epoch": 6.498516320474778,
1539
+ "grad_norm": 0.12337090075016022,
1540
+ "learning_rate": 9.239266632888659e-05,
1541
+ "loss": 0.0093,
1542
+ "step": 2190
1543
+ },
1544
+ {
1545
+ "epoch": 6.528189910979228,
1546
+ "grad_norm": 0.09916039556264877,
1547
+ "learning_rate": 9.230476262104677e-05,
1548
+ "loss": 0.0114,
1549
+ "step": 2200
1550
+ },
1551
+ {
1552
+ "epoch": 6.55786350148368,
1553
+ "grad_norm": 0.1173950806260109,
1554
+ "learning_rate": 9.221639627510076e-05,
1555
+ "loss": 0.0095,
1556
+ "step": 2210
1557
+ },
1558
+ {
1559
+ "epoch": 6.58753709198813,
1560
+ "grad_norm": 0.14159461855888367,
1561
+ "learning_rate": 9.212756825740873e-05,
1562
+ "loss": 0.0096,
1563
+ "step": 2220
1564
+ },
1565
+ {
1566
+ "epoch": 6.617210682492582,
1567
+ "grad_norm": 0.14629167318344116,
1568
+ "learning_rate": 9.20382795393797e-05,
1569
+ "loss": 0.0118,
1570
+ "step": 2230
1571
+ },
1572
+ {
1573
+ "epoch": 6.646884272997033,
1574
+ "grad_norm": 0.11691708117723465,
1575
+ "learning_rate": 9.194853109746074e-05,
1576
+ "loss": 0.0095,
1577
+ "step": 2240
1578
+ },
1579
+ {
1580
+ "epoch": 6.6765578635014835,
1581
+ "grad_norm": 0.12816114723682404,
1582
+ "learning_rate": 9.185832391312644e-05,
1583
+ "loss": 0.0133,
1584
+ "step": 2250
1585
+ },
1586
+ {
1587
+ "epoch": 6.706231454005935,
1588
+ "grad_norm": 0.11063099652528763,
1589
+ "learning_rate": 9.176765897286813e-05,
1590
+ "loss": 0.0125,
1591
+ "step": 2260
1592
+ },
1593
+ {
1594
+ "epoch": 6.735905044510385,
1595
+ "grad_norm": 0.12592542171478271,
1596
+ "learning_rate": 9.167653726818305e-05,
1597
+ "loss": 0.0097,
1598
+ "step": 2270
1599
+ },
1600
+ {
1601
+ "epoch": 6.765578635014837,
1602
+ "grad_norm": 0.20816679298877716,
1603
+ "learning_rate": 9.158495979556358e-05,
1604
+ "loss": 0.0127,
1605
+ "step": 2280
1606
+ },
1607
+ {
1608
+ "epoch": 6.795252225519288,
1609
+ "grad_norm": 0.13589587807655334,
1610
+ "learning_rate": 9.14929275564863e-05,
1611
+ "loss": 0.0112,
1612
+ "step": 2290
1613
+ },
1614
+ {
1615
+ "epoch": 6.824925816023739,
1616
+ "grad_norm": 0.17974646389484406,
1617
+ "learning_rate": 9.140044155740101e-05,
1618
+ "loss": 0.0088,
1619
+ "step": 2300
1620
+ },
1621
+ {
1622
+ "epoch": 6.85459940652819,
1623
+ "grad_norm": 0.18915049731731415,
1624
+ "learning_rate": 9.130750280971978e-05,
1625
+ "loss": 0.0156,
1626
+ "step": 2310
1627
+ },
1628
+ {
1629
+ "epoch": 6.884272997032641,
1630
+ "grad_norm": 0.14018063247203827,
1631
+ "learning_rate": 9.121411232980588e-05,
1632
+ "loss": 0.0098,
1633
+ "step": 2320
1634
+ },
1635
+ {
1636
+ "epoch": 6.913946587537092,
1637
+ "grad_norm": 0.13840338587760925,
1638
+ "learning_rate": 9.112027113896262e-05,
1639
+ "loss": 0.017,
1640
+ "step": 2330
1641
+ },
1642
+ {
1643
+ "epoch": 6.943620178041543,
1644
+ "grad_norm": 0.11696403473615646,
1645
+ "learning_rate": 9.102598026342222e-05,
1646
+ "loss": 0.0099,
1647
+ "step": 2340
1648
+ },
1649
+ {
1650
+ "epoch": 6.973293768545994,
1651
+ "grad_norm": 0.13574601709842682,
1652
+ "learning_rate": 9.093124073433463e-05,
1653
+ "loss": 0.014,
1654
+ "step": 2350
1655
+ },
1656
+ {
1657
+ "epoch": 7.002967359050445,
1658
+ "grad_norm": 0.10100409388542175,
1659
+ "learning_rate": 9.083605358775612e-05,
1660
+ "loss": 0.0103,
1661
+ "step": 2360
1662
+ },
1663
+ {
1664
+ "epoch": 7.032640949554896,
1665
+ "grad_norm": 0.16500301659107208,
1666
+ "learning_rate": 9.074041986463808e-05,
1667
+ "loss": 0.0109,
1668
+ "step": 2370
1669
+ },
1670
+ {
1671
+ "epoch": 7.062314540059347,
1672
+ "grad_norm": 0.18439586460590363,
1673
+ "learning_rate": 9.064434061081562e-05,
1674
+ "loss": 0.0095,
1675
+ "step": 2380
1676
+ },
1677
+ {
1678
+ "epoch": 7.091988130563799,
1679
+ "grad_norm": 0.12944291532039642,
1680
+ "learning_rate": 9.0547816876996e-05,
1681
+ "loss": 0.0123,
1682
+ "step": 2390
1683
+ },
1684
+ {
1685
+ "epoch": 7.121661721068249,
1686
+ "grad_norm": 0.1536119282245636,
1687
+ "learning_rate": 9.045084971874738e-05,
1688
+ "loss": 0.0125,
1689
+ "step": 2400
1690
+ },
1691
+ {
1692
+ "epoch": 7.1513353115727005,
1693
+ "grad_norm": 0.1650673747062683,
1694
+ "learning_rate": 9.035344019648702e-05,
1695
+ "loss": 0.0092,
1696
+ "step": 2410
1697
+ },
1698
+ {
1699
+ "epoch": 7.181008902077151,
1700
+ "grad_norm": 0.12277387827634811,
1701
+ "learning_rate": 9.025558937546988e-05,
1702
+ "loss": 0.0102,
1703
+ "step": 2420
1704
+ },
1705
+ {
1706
+ "epoch": 7.210682492581602,
1707
+ "grad_norm": 0.15055687725543976,
1708
+ "learning_rate": 9.015729832577681e-05,
1709
+ "loss": 0.0094,
1710
+ "step": 2430
1711
+ },
1712
+ {
1713
+ "epoch": 7.240356083086054,
1714
+ "grad_norm": 0.16825971007347107,
1715
+ "learning_rate": 9.005856812230304e-05,
1716
+ "loss": 0.0116,
1717
+ "step": 2440
1718
+ },
1719
+ {
1720
+ "epoch": 7.270029673590504,
1721
+ "grad_norm": 0.10691312700510025,
1722
+ "learning_rate": 8.995939984474624e-05,
1723
+ "loss": 0.0095,
1724
+ "step": 2450
1725
+ },
1726
+ {
1727
+ "epoch": 7.299703264094956,
1728
+ "grad_norm": 0.14602239429950714,
1729
+ "learning_rate": 8.98597945775948e-05,
1730
+ "loss": 0.0124,
1731
+ "step": 2460
1732
+ },
1733
+ {
1734
+ "epoch": 7.329376854599406,
1735
+ "grad_norm": 0.13875631988048553,
1736
+ "learning_rate": 8.975975341011596e-05,
1737
+ "loss": 0.0106,
1738
+ "step": 2470
1739
+ },
1740
+ {
1741
+ "epoch": 7.359050445103858,
1742
+ "grad_norm": 0.12208565324544907,
1743
+ "learning_rate": 8.965927743634391e-05,
1744
+ "loss": 0.0108,
1745
+ "step": 2480
1746
+ },
1747
+ {
1748
+ "epoch": 7.388724035608309,
1749
+ "grad_norm": 0.11230789124965668,
1750
+ "learning_rate": 8.955836775506776e-05,
1751
+ "loss": 0.0081,
1752
+ "step": 2490
1753
+ },
1754
+ {
1755
+ "epoch": 7.4183976261127595,
1756
+ "grad_norm": 0.13064904510974884,
1757
+ "learning_rate": 8.945702546981969e-05,
1758
+ "loss": 0.0122,
1759
+ "step": 2500
1760
+ },
1761
+ {
1762
+ "epoch": 7.448071216617211,
1763
+ "grad_norm": 0.16824467480182648,
1764
+ "learning_rate": 8.935525168886262e-05,
1765
+ "loss": 0.0112,
1766
+ "step": 2510
1767
+ },
1768
+ {
1769
+ "epoch": 7.4777448071216615,
1770
+ "grad_norm": 0.11342830210924149,
1771
+ "learning_rate": 8.92530475251784e-05,
1772
+ "loss": 0.0103,
1773
+ "step": 2520
1774
+ },
1775
+ {
1776
+ "epoch": 7.507418397626113,
1777
+ "grad_norm": 0.15296466648578644,
1778
+ "learning_rate": 8.91504140964553e-05,
1779
+ "loss": 0.0085,
1780
+ "step": 2530
1781
+ },
1782
+ {
1783
+ "epoch": 7.537091988130564,
1784
+ "grad_norm": 0.16064001619815826,
1785
+ "learning_rate": 8.90473525250761e-05,
1786
+ "loss": 0.0114,
1787
+ "step": 2540
1788
+ },
1789
+ {
1790
+ "epoch": 7.566765578635015,
1791
+ "grad_norm": 0.10076630860567093,
1792
+ "learning_rate": 8.894386393810563e-05,
1793
+ "loss": 0.0144,
1794
+ "step": 2550
1795
+ },
1796
+ {
1797
+ "epoch": 7.596439169139466,
1798
+ "grad_norm": 0.15510450303554535,
1799
+ "learning_rate": 8.883994946727849e-05,
1800
+ "loss": 0.0112,
1801
+ "step": 2560
1802
+ },
1803
+ {
1804
+ "epoch": 7.626112759643917,
1805
+ "grad_norm": 0.21251456439495087,
1806
+ "learning_rate": 8.873561024898668e-05,
1807
+ "loss": 0.0106,
1808
+ "step": 2570
1809
+ },
1810
+ {
1811
+ "epoch": 7.655786350148368,
1812
+ "grad_norm": 0.17526623606681824,
1813
+ "learning_rate": 8.863084742426719e-05,
1814
+ "loss": 0.0113,
1815
+ "step": 2580
1816
+ },
1817
+ {
1818
+ "epoch": 7.6854599406528195,
1819
+ "grad_norm": 0.12284035235643387,
1820
+ "learning_rate": 8.852566213878947e-05,
1821
+ "loss": 0.0114,
1822
+ "step": 2590
1823
+ },
1824
+ {
1825
+ "epoch": 7.71513353115727,
1826
+ "grad_norm": 0.12916874885559082,
1827
+ "learning_rate": 8.842005554284296e-05,
1828
+ "loss": 0.0099,
1829
+ "step": 2600
1830
+ },
1831
+ {
1832
+ "epoch": 7.744807121661721,
1833
+ "grad_norm": 0.17493458092212677,
1834
+ "learning_rate": 8.831402879132446e-05,
1835
+ "loss": 0.0092,
1836
+ "step": 2610
1837
+ },
1838
+ {
1839
+ "epoch": 7.774480712166172,
1840
+ "grad_norm": 0.12995202839374542,
1841
+ "learning_rate": 8.820758304372557e-05,
1842
+ "loss": 0.0104,
1843
+ "step": 2620
1844
+ },
1845
+ {
1846
+ "epoch": 7.804154302670623,
1847
+ "grad_norm": 0.08063960820436478,
1848
+ "learning_rate": 8.810071946411989e-05,
1849
+ "loss": 0.0087,
1850
+ "step": 2630
1851
+ },
1852
+ {
1853
+ "epoch": 7.833827893175075,
1854
+ "grad_norm": 0.10341209173202515,
1855
+ "learning_rate": 8.799343922115044e-05,
1856
+ "loss": 0.0077,
1857
+ "step": 2640
1858
+ },
1859
+ {
1860
+ "epoch": 7.863501483679525,
1861
+ "grad_norm": 0.108217254281044,
1862
+ "learning_rate": 8.788574348801675e-05,
1863
+ "loss": 0.0117,
1864
+ "step": 2650
1865
+ },
1866
+ {
1867
+ "epoch": 7.893175074183977,
1868
+ "grad_norm": 0.1359342336654663,
1869
+ "learning_rate": 8.77776334424621e-05,
1870
+ "loss": 0.0157,
1871
+ "step": 2660
1872
+ },
1873
+ {
1874
+ "epoch": 7.922848664688427,
1875
+ "grad_norm": 0.13467204570770264,
1876
+ "learning_rate": 8.766911026676064e-05,
1877
+ "loss": 0.011,
1878
+ "step": 2670
1879
+ },
1880
+ {
1881
+ "epoch": 7.9525222551928785,
1882
+ "grad_norm": 0.1321392059326172,
1883
+ "learning_rate": 8.756017514770443e-05,
1884
+ "loss": 0.0136,
1885
+ "step": 2680
1886
+ },
1887
+ {
1888
+ "epoch": 7.98219584569733,
1889
+ "grad_norm": 0.16591744124889374,
1890
+ "learning_rate": 8.745082927659047e-05,
1891
+ "loss": 0.0093,
1892
+ "step": 2690
1893
+ },
1894
+ {
1895
+ "epoch": 8.011869436201781,
1896
+ "grad_norm": 0.14482906460762024,
1897
+ "learning_rate": 8.73410738492077e-05,
1898
+ "loss": 0.012,
1899
+ "step": 2700
1900
+ },
1901
+ {
1902
+ "epoch": 8.041543026706231,
1903
+ "grad_norm": 0.12772271037101746,
1904
+ "learning_rate": 8.723091006582389e-05,
1905
+ "loss": 0.0118,
1906
+ "step": 2710
1907
+ },
1908
+ {
1909
+ "epoch": 8.071216617210682,
1910
+ "grad_norm": 0.12283479422330856,
1911
+ "learning_rate": 8.71203391311725e-05,
1912
+ "loss": 0.0104,
1913
+ "step": 2720
1914
+ },
1915
+ {
1916
+ "epoch": 8.100890207715134,
1917
+ "grad_norm": 0.11549960821866989,
1918
+ "learning_rate": 8.700936225443959e-05,
1919
+ "loss": 0.0101,
1920
+ "step": 2730
1921
+ },
1922
+ {
1923
+ "epoch": 8.130563798219585,
1924
+ "grad_norm": 0.12340424209833145,
1925
+ "learning_rate": 8.689798064925049e-05,
1926
+ "loss": 0.0108,
1927
+ "step": 2740
1928
+ },
1929
+ {
1930
+ "epoch": 8.160237388724036,
1931
+ "grad_norm": 0.13828441500663757,
1932
+ "learning_rate": 8.678619553365659e-05,
1933
+ "loss": 0.0084,
1934
+ "step": 2750
1935
+ },
1936
+ {
1937
+ "epoch": 8.189910979228486,
1938
+ "grad_norm": 0.176404669880867,
1939
+ "learning_rate": 8.6674008130122e-05,
1940
+ "loss": 0.0093,
1941
+ "step": 2760
1942
+ },
1943
+ {
1944
+ "epoch": 8.219584569732937,
1945
+ "grad_norm": 0.12829335033893585,
1946
+ "learning_rate": 8.656141966551019e-05,
1947
+ "loss": 0.0095,
1948
+ "step": 2770
1949
+ },
1950
+ {
1951
+ "epoch": 8.249258160237389,
1952
+ "grad_norm": 0.15259785950183868,
1953
+ "learning_rate": 8.644843137107059e-05,
1954
+ "loss": 0.01,
1955
+ "step": 2780
1956
+ },
1957
+ {
1958
+ "epoch": 8.27893175074184,
1959
+ "grad_norm": 0.17509141564369202,
1960
+ "learning_rate": 8.633504448242505e-05,
1961
+ "loss": 0.0086,
1962
+ "step": 2790
1963
+ },
1964
+ {
1965
+ "epoch": 8.308605341246292,
1966
+ "grad_norm": 0.18248887360095978,
1967
+ "learning_rate": 8.622126023955446e-05,
1968
+ "loss": 0.0098,
1969
+ "step": 2800
1970
+ },
1971
+ {
1972
+ "epoch": 8.338278931750741,
1973
+ "grad_norm": 0.13852570950984955,
1974
+ "learning_rate": 8.610707988678503e-05,
1975
+ "loss": 0.0109,
1976
+ "step": 2810
1977
+ },
1978
+ {
1979
+ "epoch": 8.367952522255193,
1980
+ "grad_norm": 0.15752212703227997,
1981
+ "learning_rate": 8.599250467277483e-05,
1982
+ "loss": 0.0088,
1983
+ "step": 2820
1984
+ },
1985
+ {
1986
+ "epoch": 8.397626112759644,
1987
+ "grad_norm": 0.08893997222185135,
1988
+ "learning_rate": 8.587753585050004e-05,
1989
+ "loss": 0.0096,
1990
+ "step": 2830
1991
+ },
1992
+ {
1993
+ "epoch": 8.427299703264095,
1994
+ "grad_norm": 0.1134849488735199,
1995
+ "learning_rate": 8.576217467724128e-05,
1996
+ "loss": 0.0105,
1997
+ "step": 2840
1998
+ },
1999
+ {
2000
+ "epoch": 8.456973293768545,
2001
+ "grad_norm": 0.18662723898887634,
2002
+ "learning_rate": 8.564642241456986e-05,
2003
+ "loss": 0.0102,
2004
+ "step": 2850
2005
+ },
2006
+ {
2007
+ "epoch": 8.486646884272997,
2008
+ "grad_norm": 0.07733399420976639,
2009
+ "learning_rate": 8.553028032833397e-05,
2010
+ "loss": 0.0104,
2011
+ "step": 2860
2012
+ },
2013
+ {
2014
+ "epoch": 8.516320474777448,
2015
+ "grad_norm": 0.13568611443042755,
2016
+ "learning_rate": 8.541374968864487e-05,
2017
+ "loss": 0.0086,
2018
+ "step": 2870
2019
+ },
2020
+ {
2021
+ "epoch": 8.5459940652819,
2022
+ "grad_norm": 0.07520133256912231,
2023
+ "learning_rate": 8.529683176986295e-05,
2024
+ "loss": 0.0084,
2025
+ "step": 2880
2026
+ },
2027
+ {
2028
+ "epoch": 8.57566765578635,
2029
+ "grad_norm": 0.06504914909601212,
2030
+ "learning_rate": 8.517952785058385e-05,
2031
+ "loss": 0.0086,
2032
+ "step": 2890
2033
+ },
2034
+ {
2035
+ "epoch": 8.605341246290802,
2036
+ "grad_norm": 0.15347328782081604,
2037
+ "learning_rate": 8.506183921362443e-05,
2038
+ "loss": 0.0097,
2039
+ "step": 2900
2040
+ },
2041
+ {
2042
+ "epoch": 8.635014836795252,
2043
+ "grad_norm": 0.15778006613254547,
2044
+ "learning_rate": 8.494376714600878e-05,
2045
+ "loss": 0.0157,
2046
+ "step": 2910
2047
+ },
2048
+ {
2049
+ "epoch": 8.664688427299703,
2050
+ "grad_norm": 0.15846551954746246,
2051
+ "learning_rate": 8.482531293895412e-05,
2052
+ "loss": 0.0093,
2053
+ "step": 2920
2054
+ },
2055
+ {
2056
+ "epoch": 8.694362017804155,
2057
+ "grad_norm": 0.19215027987957,
2058
+ "learning_rate": 8.470647788785665e-05,
2059
+ "loss": 0.0101,
2060
+ "step": 2930
2061
+ },
2062
+ {
2063
+ "epoch": 8.724035608308606,
2064
+ "grad_norm": 0.14507389068603516,
2065
+ "learning_rate": 8.458726329227747e-05,
2066
+ "loss": 0.0102,
2067
+ "step": 2940
2068
+ },
2069
+ {
2070
+ "epoch": 8.753709198813056,
2071
+ "grad_norm": 0.15245003998279572,
2072
+ "learning_rate": 8.44676704559283e-05,
2073
+ "loss": 0.009,
2074
+ "step": 2950
2075
+ },
2076
+ {
2077
+ "epoch": 8.783382789317507,
2078
+ "grad_norm": 0.16085323691368103,
2079
+ "learning_rate": 8.434770068665723e-05,
2080
+ "loss": 0.0122,
2081
+ "step": 2960
2082
+ },
2083
+ {
2084
+ "epoch": 8.813056379821958,
2085
+ "grad_norm": 0.12772567570209503,
2086
+ "learning_rate": 8.422735529643444e-05,
2087
+ "loss": 0.012,
2088
+ "step": 2970
2089
+ },
2090
+ {
2091
+ "epoch": 8.84272997032641,
2092
+ "grad_norm": 0.12985776364803314,
2093
+ "learning_rate": 8.410663560133784e-05,
2094
+ "loss": 0.0085,
2095
+ "step": 2980
2096
+ },
2097
+ {
2098
+ "epoch": 8.872403560830861,
2099
+ "grad_norm": 0.13308796286582947,
2100
+ "learning_rate": 8.398554292153866e-05,
2101
+ "loss": 0.0076,
2102
+ "step": 2990
2103
+ },
2104
+ {
2105
+ "epoch": 8.90207715133531,
2106
+ "grad_norm": 0.18778546154499054,
2107
+ "learning_rate": 8.386407858128706e-05,
2108
+ "loss": 0.0116,
2109
+ "step": 3000
2110
+ }
2111
+ ],
2112
+ "logging_steps": 10,
2113
+ "max_steps": 10000,
2114
+ "num_input_tokens_seen": 0,
2115
+ "num_train_epochs": 30,
2116
+ "save_steps": 500,
2117
+ "stateful_callbacks": {
2118
+ "TrainerControl": {
2119
+ "args": {
2120
+ "should_epoch_stop": false,
2121
+ "should_evaluate": false,
2122
+ "should_log": false,
2123
+ "should_save": true,
2124
+ "should_training_stop": false
2125
+ },
2126
+ "attributes": {}
2127
+ }
2128
+ },
2129
+ "total_flos": 2.375898115552051e+18,
2130
+ "train_batch_size": 32,
2131
+ "trial_name": null,
2132
+ "trial_params": null
2133
+ }