TF-Keras
chrisdonahue-goog commited on
Commit
e82735f
·
verified ·
1 Parent(s): 5f121ac

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +203 -0
  2. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  3. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_mlp_layer_norm.scale.v/0 +0 -0
  4. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  5. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_mlp_layer_norm.scale.v/0 +0 -0
  6. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_16.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  7. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_16.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  8. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  9. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  10. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  11. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_mlp_layer_norm.scale.v/0 +0 -0
  12. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_19.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  13. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_19.pre_self_attention_layer_norm.scale.v/0 +0 -0
  14. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  15. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_mlp_layer_norm.scale.v/0 +0 -0
  16. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  17. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_4.pre_mlp_layer_norm.scale.v/0 +0 -0
  18. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  19. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  20. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  21. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_self_attention_layer_norm.scale.v/0 +0 -0
  22. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder_norm.scale.v/.zarray +1 -0
  23. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder_norm.scale.v/0 +0 -0
  24. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.encoder_norm.scale.v/.zarray +1 -0
  25. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.encoder_norm.scale.v/0 +0 -0
  26. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/.zarray +1 -0
  27. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/0 +0 -0
  28. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  29. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/0 +0 -0
  30. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  31. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/0 +0 -0
  32. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  33. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/0 +0 -0
  34. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_attention_layer_norm.scale.v/.zarray +1 -0
  35. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_attention_layer_norm.scale.v/0 +0 -0
  36. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  37. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/0 +0 -0
  38. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  39. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_mlp_layer_norm.scale.v/0 +0 -0
  40. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_21.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  41. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_21.pre_mlp_layer_norm.scale.v/0 +0 -0
  42. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/.zarray +1 -0
  43. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/0 +0 -0
  44. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  45. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/0 +0 -0
  46. checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.mlp.wo.kernel/.zarray +1 -0
  47. checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.0 +3 -0
  48. checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.1 +3 -0
  49. checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wo.kernel/.zarray +1 -0
  50. checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.key.kernel/.zarray +1 -0
.gitattributes CHANGED
@@ -358,3 +358,206 @@ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.mlp.wi_1.kernel/0.0 f
358
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
359
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
360
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
359
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
360
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
361
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
362
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
363
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
364
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
365
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
366
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
367
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
368
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
369
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
370
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
371
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
372
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_23.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
373
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
374
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
375
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
376
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
377
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
378
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
379
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
380
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
381
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
382
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
383
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
384
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
385
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
386
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
387
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
388
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
389
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
390
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
391
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
392
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
393
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
394
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
395
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
396
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
397
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
398
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
399
+ savedmodels/musiccoca_mv212f_cpu_compat/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
400
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
401
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
402
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
403
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
404
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
405
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
406
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
407
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
408
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
409
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
410
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
411
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
412
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
413
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
414
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
415
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
416
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
417
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
418
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
419
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
420
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
421
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
422
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
423
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
424
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
425
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
426
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
427
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
428
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_22.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
429
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
430
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
431
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
432
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
433
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_17.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
434
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
435
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
436
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
437
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
438
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
439
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
440
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
441
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
442
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
443
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
444
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
445
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
446
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
447
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
448
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
449
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
450
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
451
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
452
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
453
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
454
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
455
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
456
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
457
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
458
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_3.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
459
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
460
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
461
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
462
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
463
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
464
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
465
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
466
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
467
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
468
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
469
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
470
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
471
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
472
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
473
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
474
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
475
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
476
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
477
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
478
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
479
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
480
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
481
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
482
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
483
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
484
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
485
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
486
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
487
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
488
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
489
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
490
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
491
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
492
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
493
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
494
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
495
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
496
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
497
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
498
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
499
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
500
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
501
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
502
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
503
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
504
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
505
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
506
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
507
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
508
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
509
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
510
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
511
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
512
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
513
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
514
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
515
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
516
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
517
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
518
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
519
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
520
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
521
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
522
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
523
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
524
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
525
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
526
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
527
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
528
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
529
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
530
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
531
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
532
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
533
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
534
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
535
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
536
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_2.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
537
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
538
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
539
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_11.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
540
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
541
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
542
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
543
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
544
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_21.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
545
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
546
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
547
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
548
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
549
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
550
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_7.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
551
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_16.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
552
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
553
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
554
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
555
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
556
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
557
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_8.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
558
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_9.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
559
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_9.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
560
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
561
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
562
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
563
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.depth_decoder.depth_layers_0.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.66 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_14.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.71 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_16.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_16.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (3.7 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (3.7 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_17.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.7 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_19.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_19.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (3.74 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.83 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_4.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.8 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (3.79 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (3.74 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder_norm.scale.v/0 ADDED
Binary file (3.64 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.encoder_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.encoder_norm.scale.v/0 ADDED
Binary file (3.72 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (3.76 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.73 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.77 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.74 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (3.79 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.72 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.73 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_21.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_21.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.72 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (3.75 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.75 kB). View file
 
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_0.mlp.wo.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1408,1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[2816,1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f6d340e0cc12327578b10d75a916f45a6de50e342831977a6185d0bd0e8e36
3
+ size 5377294
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_0.kernel/0.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d4c46a8dfcd3dad89de58f12f9453dd75647caaadf6b3321bd4646d1be334b3
3
+ size 5378400
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wo.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1408,1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[2816,1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.self_attention.key.kernel/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024,512],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024,1024],"zarr_format":2}