Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +203 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_16.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_16.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_19.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_19.pre_self_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_4.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_self_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.encoder_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.encoder_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_21.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_21.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/0 +0 -0
- checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.mlp.wo.kernel/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.0 +3 -0
- checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.1 +3 -0
- checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wo.kernel/.zarray +1 -0
- checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.key.kernel/.zarray +1 -0
.gitattributes
CHANGED
@@ -358,3 +358,206 @@ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.mlp.wi_1.kernel/0.0 f
|
|
358 |
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
359 |
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
360 |
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
359 |
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
360 |
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
361 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
362 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
363 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
364 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
365 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
366 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
367 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
368 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
369 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
370 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
371 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
372 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
373 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
374 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
375 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
376 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
377 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
378 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
379 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
380 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
381 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
382 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
383 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
384 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
385 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
386 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
387 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
388 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
389 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
390 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
391 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
392 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
393 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
394 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
395 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
396 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
397 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
398 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
399 |
+
savedmodels/musiccoca_mv212f_cpu_compat/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
400 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
401 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
402 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
403 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
404 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
405 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
406 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
407 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
408 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
409 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
410 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
411 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
412 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
413 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
414 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
415 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
416 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
417 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
418 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
419 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
420 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
421 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
422 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
423 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
424 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
425 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
426 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
427 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
428 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
429 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
430 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
431 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
432 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
433 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
434 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
435 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
436 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
437 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
438 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
439 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
440 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
441 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
442 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
443 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
444 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
445 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
446 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
447 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
448 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
449 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
450 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
451 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
452 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
453 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
454 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
455 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
456 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
457 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
458 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
459 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
460 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
461 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
462 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
463 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
464 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
465 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
466 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
467 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
468 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
469 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
470 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
471 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
472 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
473 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
474 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
475 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
476 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
477 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
478 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
479 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
480 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
481 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
482 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
483 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
484 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
485 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
486 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
487 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
488 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
489 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
490 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
491 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
492 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
493 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
494 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
495 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
496 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
497 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
498 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
499 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
500 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
501 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
502 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
503 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
504 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
505 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
506 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
507 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
508 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
509 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
510 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
511 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
512 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
513 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
514 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
515 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
516 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
517 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
518 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
519 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
520 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
521 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
522 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
523 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
524 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
525 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
526 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
527 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
528 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
529 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
530 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
531 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
532 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
533 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
534 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
535 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
536 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
537 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
538 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
539 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
540 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
541 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
542 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
543 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
544 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
545 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
546 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
547 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
548 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
549 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
550 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
551 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
552 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
553 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
554 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
555 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
556 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
|
557 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
558 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_9.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
559 |
+
checkpoints/llm_large_x3047_c1860k/target.encoder.layers_9.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
560 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
561 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
|
562 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
563 |
+
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.66 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.71 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_16.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_16.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (3.7 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (3.7 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.7 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_19.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_19.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (3.74 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.83 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_4.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.8 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (3.79 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (3.74 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder_norm.scale.v/0
ADDED
Binary file (3.64 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.encoder_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.encoder_norm.scale.v/0
ADDED
Binary file (3.72 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (3.76 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.73 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.77 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.74 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (3.79 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.72 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.73 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_21.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_21.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.72 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (3.75 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (3.75 kB). View file
|
|
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.mlp.wo.kernel/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1408,1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[2816,1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64f6d340e0cc12327578b10d75a916f45a6de50e342831977a6185d0bd0e8e36
|
3 |
+
size 5377294
|
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d4c46a8dfcd3dad89de58f12f9453dd75647caaadf6b3321bd4646d1be334b3
|
3 |
+
size 5378400
|
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wo.kernel/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1408,1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[2816,1024],"zarr_format":2}
|
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.key.kernel/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1024,512],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024,1024],"zarr_format":2}
|