TF-Keras
chrisdonahue-goog commited on
Commit
40395c7
·
verified ·
1 Parent(s): e82735f

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +250 -0
  2. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_1.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  3. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_1.pre_mlp_layer_norm.scale.v/0 +0 -0
  4. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_1.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  5. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_1.pre_self_attention_layer_norm.scale.v/0 +0 -0
  6. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_2.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  7. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_2.pre_self_attention_layer_norm.scale.v/0 +0 -0
  8. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_3.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  9. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_3.pre_mlp_layer_norm.scale.v/0 +0 -0
  10. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  11. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  12. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  13. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_self_attention_layer_norm.scale.v/0 +0 -0
  14. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_13.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  15. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_13.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  16. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  17. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  18. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_6.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  19. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_6.pre_self_attention_layer_norm.scale.v/0 +0 -0
  20. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.relpos_bias.rel_embedding.v/.zarray +1 -0
  21. checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.relpos_bias.rel_embedding.v/0.0 +0 -0
  22. checkpoints/llm_base_x4286_c1860k/state.param_states.encoder.layers_11.pre_attention_layer_norm.scale.v/.zarray +1 -0
  23. checkpoints/llm_base_x4286_c1860k/state.param_states.encoder.layers_11.pre_attention_layer_norm.scale.v/0 +0 -0
  24. checkpoints/llm_base_x4286_c1860k/state.param_states.encoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  25. checkpoints/llm_base_x4286_c1860k/state.param_states.encoder.layers_5.pre_mlp_layer_norm.scale.v/0 +0 -0
  26. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.key.kernel/.zarray +1 -0
  27. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.value.kernel/.zarray +1 -0
  28. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.out.kernel/.zarray +1 -0
  29. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.out.kernel/.zarray +1 -0
  30. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.out.kernel/.zarray +1 -0
  31. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.value.kernel/.zarray +1 -0
  32. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.out.kernel/.zarray +1 -0
  33. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.value.kernel/.zarray +1 -0
  34. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.query.kernel/.zarray +1 -0
  35. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.key.kernel/.zarray +1 -0
  36. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.pre_cross_attention_layer_norm.scale/.zarray +1 -0
  37. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.pre_cross_attention_layer_norm.scale/0 +0 -0
  38. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wi_0.kernel/.zarray +1 -0
  39. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wi_0.kernel/.zarray +1 -0
  40. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wi_1.kernel/.zarray +1 -0
  41. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.pre_cross_attention_layer_norm.scale/.zarray +1 -0
  42. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.pre_cross_attention_layer_norm.scale/0 +0 -0
  43. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.query.kernel/.zarray +1 -0
  44. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.out.kernel/.zarray +1 -0
  45. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.query.kernel/.zarray +1 -0
  46. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.value.kernel/.zarray +1 -0
  47. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.mlp.wi_1.kernel/.zarray +1 -0
  48. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.query.kernel/.zarray +1 -0
  49. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.pre_self_attention_layer_norm.scale/.zarray +1 -0
  50. checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.pre_self_attention_layer_norm.scale/0 +0 -0
.gitattributes CHANGED
@@ -561,3 +561,253 @@ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layer
561
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
562
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
563
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
562
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
563
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
564
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
565
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
566
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
567
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
568
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
569
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
570
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
571
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
572
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
573
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
574
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
575
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
576
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
577
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
578
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
579
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
580
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
581
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
582
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
583
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
584
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
585
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
586
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
587
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
588
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
589
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
590
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
591
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
592
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
593
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
594
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
595
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
596
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
597
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
598
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
599
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
600
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
601
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
602
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
603
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
604
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
605
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
606
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_11.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
607
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_11.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
608
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
609
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
610
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
611
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
612
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
613
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
614
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
615
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
616
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
617
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
618
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
619
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
620
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
621
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
622
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
623
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
624
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
625
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_9.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
626
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_9.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
627
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
628
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
629
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
630
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
631
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
632
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
633
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
634
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
635
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
636
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
637
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
638
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
639
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
640
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
641
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
642
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
643
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
644
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
645
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
646
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
647
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
648
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
649
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
650
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
651
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
652
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
653
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
654
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
655
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
656
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
657
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
658
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
659
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
660
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
661
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
662
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
663
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
664
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
665
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
666
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
667
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
668
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
669
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
670
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
671
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
672
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
673
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
674
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
675
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
676
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
677
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
678
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
679
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
680
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
681
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
682
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
683
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
684
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
685
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
686
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
687
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
688
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
689
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
690
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
691
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
692
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
693
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
694
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
695
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
696
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
697
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
698
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
699
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
700
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
701
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
702
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
703
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
704
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
705
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
706
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
707
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
708
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
709
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
710
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
711
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
712
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
713
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
714
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
715
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
716
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
717
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
718
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
719
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
720
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
721
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
722
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
723
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
724
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
725
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
726
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
727
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_15.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
728
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_15.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
729
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
730
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
731
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
732
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_11.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
733
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_11.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
734
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
735
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
736
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
737
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
738
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
739
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
740
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
741
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
742
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
743
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
744
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
745
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
746
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
747
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
748
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
749
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_15.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
750
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_15.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
751
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
752
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
753
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
754
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
755
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
756
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
757
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
758
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
759
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
760
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
761
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
762
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
763
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
764
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_8.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
765
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
766
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
767
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
768
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
769
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
770
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
771
+ checkpoints/llm_large_x3047_c1860k/target.decoder.logits_dense.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
772
+ checkpoints/llm_large_x3047_c1860k/target.decoder.logits_dense.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
773
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
774
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
775
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
776
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
777
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
778
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
779
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
780
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
781
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
782
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
783
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
784
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
785
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_11.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
786
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_11.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
787
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
788
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
789
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
790
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
791
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
792
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
793
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
794
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
795
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
796
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
797
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
798
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
799
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
800
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
801
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
802
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
803
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
804
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
805
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
806
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_9.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
807
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_9.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
808
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
809
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
810
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
811
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
812
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
813
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_1.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_1.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (2.78 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_1.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_1.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (2.75 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_2.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_2.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (2.73 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_3.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_3.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (2.78 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (2.93 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (2.84 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_13.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_13.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (2.79 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (2.87 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_6.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_6.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (2.89 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.relpos_bias.rel_embedding.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[12,128],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[12,128],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.decoder.decoder.temporal_decoder.relpos_bias.rel_embedding.v/0.0 ADDED
Binary file (2.32 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.encoder.layers_11.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.encoder.layers_11.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (2.81 kB). View file
 
checkpoints/llm_base_x4286_c1860k/state.param_states.encoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/state.param_states.encoder.layers_5.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (2.84 kB). View file
 
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.key.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.self_attention.value.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.out.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.self_attention.out.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.out.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.value.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.out.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.value.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.query.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.key.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.pre_cross_attention_layer_norm.scale/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_12.pre_cross_attention_layer_norm.scale/0 ADDED
Binary file (2.81 kB). View file
 
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wi_0.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,2048],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,2048],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wi_0.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,2048],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,2048],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wi_1.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,2048],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,2048],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.pre_cross_attention_layer_norm.scale/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.pre_cross_attention_layer_norm.scale/0 ADDED
Binary file (2.76 kB). View file
 
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.query.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.out.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.query.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.value.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_17.mlp.wi_1.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,2048],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,2048],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.query.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768,768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768,768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.pre_self_attention_layer_norm.scale/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[768],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[768],"zarr_format":2}
checkpoints/llm_base_x4286_c1860k/target.decoder.decoder.temporal_decoder.layers_19.pre_self_attention_layer_norm.scale/0 ADDED
Binary file (2.68 kB). View file