diff --git a/.gitattributes b/.gitattributes index a66da316f601d6d12cf5af8ef99f8881f464fc87..b78a946011de2902f947b3428e6cf2258d08867e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -35,3 +35,119 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/checkpoint filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_15.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_15.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +savedmodels/musiccoca_mv212_quant/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_15.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_15.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_15.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_15.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_9.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_11.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_11.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_9.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_9.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +savedmodels/ssv2_48k_stereo/encoder/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text +checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoints/llm_large_x3047_c1860k/checkpoint b/checkpoints/llm_large_x3047_c1860k/checkpoint new file mode 100644 index 0000000000000000000000000000000000000000..fbbf7d09591f962de3ab7ec33a1ac2b464660b9c --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/checkpoint @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ccf9bcd4a85893c0b308414075688751438ddcb7661b22a5a6a59572c9675f1 +size 4881325 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.key.kernel/0.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..52d9bbdd449112ed169c63d70c98d774800e81b9 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d350f41d2d6415110d4ab4e0f5ab5998e8a1402de19f27cadde2a915b8ec0359 +size 1955236 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.key.kernel/0.1 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.key.kernel/0.1 new file mode 100644 index 0000000000000000000000000000000000000000..d35390fd439e880abda9aeef89bae34184f1bd65 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.key.kernel/0.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e9f707608851e259b37f3209408a8aea4544f57a6b9a31eac014d773be26303 +size 1956088 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.value.kernel/0.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..dd7ddd59745ed5d7bb8b83152c4cbfd9f364c5f8 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d918285bc089b4f26e97ba85dab15840fd20a36d11aa9cdff477be19d35e1bd +size 1955877 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.value.kernel/0.1 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.value.kernel/0.1 new file mode 100644 index 0000000000000000000000000000000000000000..47da78ea82a7133534a258ab8f2690e133fece48 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.value.kernel/0.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a30becb4c48b67166ddbc5a4057b80a5ab9b45f95545a5d4e27207625f5a80 +size 1956366 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.out.kernel/0.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..8af89c2d1ce4075576b3dde2e9ac2c60d3d18747 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f9eca473cc699ac8a8f79b6ca47cefdd4c815085f492e8ba8b968b254ee8088 +size 1951045 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.out.kernel/1.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.out.kernel/1.0 new file mode 100644 index 0000000000000000000000000000000000000000..2d7680fa932c974b5c25c6eba764f3b4bc1537ce --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.out.kernel/1.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcaad1b419d7109a260c29e2b67461899162854be8113ed2f5f1c70941e5a9fa +size 1950729 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.out.kernel/0.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..581dd092ac809838684a8fc775cda058691da773 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:861d5ec0ce4ca61d0d131bd9c123418e1f8af4b937bf72ca2e54ece836cf70c5 +size 1950618 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.out.kernel/1.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.out.kernel/1.0 new file mode 100644 index 0000000000000000000000000000000000000000..1d663956d5ec92b4621b927f7148afbf2f5721e9 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.out.kernel/1.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb57020bce75a6e6a86b2b73aa0ab8d2911d71227b60642148b5cb29bc3a2667 +size 1950784 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.out.kernel/0.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..cb456aa24b74e51db70307d5d21e4d425da4542c --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01e8c5a9643890cc13cd32fe9843df0a280b2c72c2dff579ff7b9115617481f9 +size 1951457 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.out.kernel/1.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.out.kernel/1.0 new file mode 100644 index 0000000000000000000000000000000000000000..90bf377c8fbec86c7d2389c5327a2ccb8c3bfaa1 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.out.kernel/1.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:557e0872ca4fec0a15c5628918311aa98f6d8b39bf6b08f71c760df8687d2957 +size 1950777 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.out.kernel/0.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..fab6f8d6794b17194d65ef6f98bcefd2d2a811df --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9238d2088e32a50575f0da3aeefa96f968c53a6e5abb7274f642aa417c734c84 +size 1960458 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.out.kernel/1.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.out.kernel/1.0 new file mode 100644 index 0000000000000000000000000000000000000000..17806faebd35af8e1c2b2964038f97271fdfd8b6 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.out.kernel/1.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fd5d3f3298819527607733d75cd7d08cd40b1c0fe8035e8deabdb3511cd6d52 +size 1962493 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.out.kernel/0.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..0278011a46770b9c210400ccc0e0106c6fafd9ae --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c17f4c588bc9f29140d8f3d79eba103f15ac9c3c4fc1bbe71c7a7a9f6072fe6 +size 1953495 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.value.kernel/0.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5efa7a6c1a43d71c4e4db5020e8d890bc3d79736 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77b1e526c822c3236cc2ed2a6c5360e5b23e83124d846bdfcd2d12fc261999c +size 1951431 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.value.kernel/0.1 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.value.kernel/0.1 new file mode 100644 index 0000000000000000000000000000000000000000..775561a2f691240fde8be377588f69a389b94d13 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.value.kernel/0.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:300492bf20392d3dc791891fd20297d5a972831ba2699bdbd633fc59807a792c +size 1951517 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.query.kernel/0.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..da92b7230595fd380d1578008ea9d77c0115ee81 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85f09ce805ea3d11bf4668acdd9e24259ae34233d633204e779178eb7ac076e6 +size 1950477 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.query.kernel/0.1 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.query.kernel/0.1 new file mode 100644 index 0000000000000000000000000000000000000000..b0129ccdfe4f3a8da0f72fbcc045641dda3c7a21 --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.query.kernel/0.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d98b259c299f90467e065d70b341369f27cbaf479f430f311ae2cfb3f52bbd1 +size 1950275 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.key.kernel/0.0 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..3d8f37abbb3e1fc37b36515440e34e4e09cd88ed --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348c97029507c294f1bedc4fa8cddec0bcb733968c4e9b5ce9a7f76f8999448c +size 1955734 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.key.kernel/0.1 b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.key.kernel/0.1 new file mode 100644 index 0000000000000000000000000000000000000000..84bf3f3a5fbacb734e10bf88423e58de1e904e2a --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.key.kernel/0.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5251f597b4ea566bd6cea72fd29895b25ab5daf68d76f8ca2b5a3ca673ba817e +size 1955863 diff --git a/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.out.kernel/.zarray b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..b265a0d45553ad446a52f5856c7876ef3292058e --- /dev/null +++ b/checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.out.kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[512,1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"