Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +104 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_15.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_15.pre_self_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_18.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_18.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_self_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_6.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_7.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_7.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_23.pre_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_23.pre_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_6.pre_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/0 +0 -0
.gitattributes
CHANGED
@@ -254,3 +254,107 @@ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layer
|
|
254 |
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
255 |
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
256 |
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
255 |
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
256 |
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
257 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
258 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
259 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
260 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
261 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
262 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
263 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
264 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
265 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
266 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
267 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
268 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
269 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
270 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
271 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
272 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
273 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
274 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
275 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
276 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
277 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
278 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
279 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
280 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
281 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
282 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
283 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
284 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
285 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
286 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
287 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
288 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
289 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
290 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
291 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
292 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
293 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
294 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
295 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
296 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
297 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
298 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
299 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
300 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
301 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
302 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
303 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
304 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
305 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
306 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
307 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
308 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
309 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
310 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
311 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
312 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
313 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
314 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
315 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
316 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
317 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
318 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
319 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
320 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
321 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
322 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
323 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
324 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
325 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
326 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
327 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
328 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
329 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
330 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
331 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
332 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
333 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
334 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
335 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
336 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
337 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
338 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
339 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
340 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
341 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
342 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
343 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
344 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
345 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
346 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
347 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
348 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
349 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
350 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
351 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
352 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
353 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
354 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
355 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
356 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
357 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
358 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
359 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
360 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.87 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (3.82 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (3.84 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (3.73 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.73 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_15.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_15.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (3.74 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_18.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_18.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (3.72 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (3.82 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (3.79 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_6.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.8 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_7.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_7.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (3.79 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.74 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (3.75 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.72 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (3.73 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (3.73 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.74 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.75 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (3.77 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.76 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (3.74 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_23.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_23.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (3.74 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.76 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (3.76 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_6.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (3.73 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (3.76 kB). View file
|
|