TF-Keras
chrisdonahue-goog commited on
Commit
621b846
·
verified ·
1 Parent(s): 40395c7

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +373 -0
  2. checkpoints/llm_base_x4286_c1860k/checkpoint +3 -0
  3. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  4. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_mlp_layer_norm.scale.v/0 +0 -0
  5. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  6. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_self_attention_layer_norm.scale.v/0 +0 -0
  7. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_2.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  8. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_2.pre_mlp_layer_norm.scale.v/0 +0 -0
  9. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_3.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  10. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_3.pre_self_attention_layer_norm.scale.v/0 +0 -0
  11. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.relpos_bias_depth.rel_embedding.v/.zarray +1 -0
  12. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.relpos_bias_depth.rel_embedding.v/0.0 +0 -0
  13. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  14. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_mlp_layer_norm.scale.v/0 +0 -0
  15. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  16. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_self_attention_layer_norm.scale.v/0 +0 -0
  17. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  18. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  19. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  20. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_mlp_layer_norm.scale.v/0 +0 -0
  21. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  22. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 +0 -0
  23. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_10.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  24. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_10.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  25. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_10.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  26. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_10.pre_mlp_layer_norm.scale.v/0 +0 -0
  27. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_10.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  28. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_10.pre_self_attention_layer_norm.scale.v/0 +0 -0
  29. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  30. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  31. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  32. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_mlp_layer_norm.scale.v/0 +0 -0
  33. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  34. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_self_attention_layer_norm.scale.v/0 +0 -0
  35. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  36. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  37. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  38. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_mlp_layer_norm.scale.v/0 +0 -0
  39. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_13.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  40. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_13.pre_mlp_layer_norm.scale.v/0 +0 -0
  41. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_13.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  42. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_13.pre_self_attention_layer_norm.scale.v/0 +0 -0
  43. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  44. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  45. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  46. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_mlp_layer_norm.scale.v/0 +0 -0
  47. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  48. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_self_attention_layer_norm.scale.v/0 +0 -0
  49. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_15.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  50. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_15.pre_cross_attention_layer_norm.scale.v/0 +0 -0
.gitattributes CHANGED
@@ -811,3 +811,376 @@ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_la
811
  checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
812
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
813
  checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
811
  checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
812
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
813
  checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
814
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
815
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
816
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
817
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
818
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
819
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
820
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
821
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
822
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
823
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
824
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
825
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
826
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
827
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
828
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
829
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
830
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
831
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
832
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
833
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
834
+ checkpoints/llm_large_x3047_c1860k/target.token_embedder.embedding/0.0 filter=lfs diff=lfs merge=lfs -text
835
+ checkpoints/llm_large_x3047_c1860k/target.token_embedder.embedding/1.0 filter=lfs diff=lfs merge=lfs -text
836
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_11.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
837
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
838
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
839
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
840
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
841
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
842
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_11.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
843
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
844
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
845
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
846
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_15.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
847
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
848
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_15.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
849
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
850
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
851
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
852
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
853
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
854
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
855
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
856
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
857
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
858
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
859
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
860
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
861
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
862
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
863
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
864
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
865
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
866
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
867
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
868
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
869
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
870
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
871
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
872
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
873
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
874
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
875
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
876
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
877
+ checkpoints/llm_base_x4286_c1860k/checkpoint filter=lfs diff=lfs merge=lfs -text
878
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_1.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
879
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
880
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
881
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
882
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
883
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
884
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_2.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
885
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_8.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
886
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
887
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
888
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
889
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
890
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_3.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
891
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
892
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_3.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
893
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
894
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_3.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
895
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_2.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
896
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_8.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
897
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_4.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
898
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
899
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_8.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
900
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_9.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
901
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
902
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
903
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_3.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
904
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_8.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
905
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
906
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_7.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
907
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_8.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
908
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
909
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
910
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_9.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
911
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_8.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
912
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
913
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
914
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
915
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_4.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
916
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
917
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_11.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
918
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
919
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
920
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
921
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
922
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
923
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_9.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
924
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_7.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
925
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_6.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
926
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
927
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
928
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
929
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
930
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
931
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_3.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
932
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_10.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
933
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_8.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
934
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_3.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
935
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
936
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_9.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
937
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
938
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_6.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
939
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_7.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
940
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
941
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
942
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_11.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
943
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_13.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
944
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_1.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
945
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_9.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
946
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
947
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_8.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
948
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
949
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_4.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
950
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
951
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
952
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_7.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
953
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_13.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
954
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
955
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
956
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
957
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
958
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_5.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
959
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_13.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
960
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
961
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
962
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_5.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
963
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
964
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_11.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
965
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_2.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
966
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
967
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_3.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
968
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_9.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
969
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
970
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
971
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
972
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_4.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
973
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
974
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_9.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
975
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_10.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
976
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
977
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
978
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
979
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_13.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
980
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
981
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_5.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
982
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
983
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_0.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
984
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
985
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_7.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
986
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_6.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
987
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_4.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
988
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
989
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
990
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
991
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_3.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
992
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_14.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
993
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_16.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
994
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
995
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_2.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
996
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_6.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
997
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_18.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
998
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
999
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1000
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_3.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1001
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_4.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1002
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_1.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1003
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1004
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_1.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1005
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1006
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_0.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1007
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1008
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1009
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_6.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1010
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1011
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_9.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1012
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1013
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1014
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_6.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1015
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1016
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_2.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1017
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1018
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1019
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_7.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1020
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1021
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1022
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1023
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1024
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1025
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1026
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1027
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_7.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1028
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_14.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1029
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1030
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1031
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1032
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1033
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1034
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1035
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_3.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1036
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1037
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1038
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1039
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_13.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1040
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_7.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1041
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_6.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1042
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_10.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1043
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_10.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1044
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_3.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1045
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_0.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1046
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_2.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1047
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_1.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1048
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_5.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1049
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_4.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1050
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1051
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1052
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1053
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_4.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1054
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1055
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_10.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1056
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1057
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1058
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_6.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1059
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1060
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1061
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1062
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_16.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1063
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1064
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1065
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1066
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_13.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1067
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1068
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_7.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1069
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1070
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1071
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_7.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1072
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1073
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_10.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1074
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_7.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1075
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1076
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1077
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1078
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1079
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1080
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1081
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1082
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1083
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1084
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1085
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1086
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_8.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1087
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1088
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1089
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1090
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1091
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_8.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1092
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1093
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1094
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1095
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_9.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1096
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1097
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_9.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1098
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_8.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1099
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1100
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_8.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1101
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1102
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1103
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_0.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1104
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_4.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1105
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1106
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_0.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1107
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_8.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1108
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1109
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1110
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_11.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1111
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1112
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1113
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_8.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1114
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_7.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1115
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_0.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1116
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_9.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1117
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_18.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1118
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_9.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1119
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_4.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1120
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_7.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1121
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_3.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1122
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1123
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1124
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1125
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1126
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1127
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1128
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1129
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1130
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1131
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1132
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1133
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1134
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_4.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1135
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1136
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_8.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1137
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1138
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_2.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1139
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1140
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1141
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_3.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1142
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_7.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1143
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1144
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1145
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1146
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_3.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1147
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1148
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1149
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1150
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1151
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1152
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1153
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1154
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_6.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1155
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_3.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1156
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1157
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_4.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1158
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_11.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1159
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1160
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_4.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1161
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_13.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1162
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_9.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1163
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1164
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_5.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1165
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1166
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_8.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1167
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1168
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1169
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1170
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1171
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1172
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_5.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1173
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1174
+ checkpoints/llm_base_x4286_c1860k/target.decoder.logits_dense.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1175
+ checkpoints/llm_base_x4286_c1860k/target.decoder.logits_dense.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
1176
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_8.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1177
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_3.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1178
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_5.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1179
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1180
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1181
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1182
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1183
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_3.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1184
+ checkpoints/llm_base_x4286_c1860k/target.encoder.layers_8.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1185
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
1186
+ checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
checkpoints/llm_base_x4286_c1860k/checkpoint ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a772893769281208fbba5e6c75279ebe1afbed20f86620927e9bd04713d67fe5
3
+ size 2995665
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (2.76 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (2.8 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_2.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_2.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (2.71 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_3.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_3.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (2.72 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.relpos_bias_depth.rel_embedding.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[12,16],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[12,16],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.relpos_bias_depth.rel_embedding.v/0.0 ADDED
Binary file (776 Bytes). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (2.92 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (2.82 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (2.91 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (2.9 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (2.9 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_10.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_10.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (2.85 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_10.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_10.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (2.84 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_10.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_10.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (2.86 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (2.84 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (2.83 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (2.86 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (2.83 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (2.83 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_13.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_13.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (2.82 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_13.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_13.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (2.81 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (2.82 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (2.8 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (2.8 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_15.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_15.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (2.82 kB). View file