TF-Keras
chrisdonahue-goog commited on
Commit
5f121ac
·
verified ·
1 Parent(s): d940c77

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +104 -0
  2. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  3. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_mlp_layer_norm.scale.v/0 +0 -0
  4. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  5. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  6. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  7. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 +0 -0
  8. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  9. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  10. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  11. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_mlp_layer_norm.scale.v/0 +0 -0
  12. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_15.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  13. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_15.pre_self_attention_layer_norm.scale.v/0 +0 -0
  14. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_18.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  15. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_18.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  16. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  17. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_self_attention_layer_norm.scale.v/0 +0 -0
  18. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  19. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_6.pre_mlp_layer_norm.scale.v/0 +0 -0
  20. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_7.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  21. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_7.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  22. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  23. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_mlp_layer_norm.scale.v/0 +0 -0
  24. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  25. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  26. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  27. checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_mlp_layer_norm.scale.v/0 +0 -0
  28. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray +1 -0
  29. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0 +0 -0
  30. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray +1 -0
  31. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/0 +0 -0
  32. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  33. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/0 +0 -0
  34. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  35. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/0 +0 -0
  36. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/.zarray +1 -0
  37. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/0 +0 -0
  38. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  39. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_mlp_layer_norm.scale.v/0 +0 -0
  40. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_attention_layer_norm.scale.v/.zarray +1 -0
  41. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_attention_layer_norm.scale.v/0 +0 -0
  42. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_23.pre_attention_layer_norm.scale.v/.zarray +1 -0
  43. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_23.pre_attention_layer_norm.scale.v/0 +0 -0
  44. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  45. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/0 +0 -0
  46. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/.zarray +1 -0
  47. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/0 +0 -0
  48. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_6.pre_attention_layer_norm.scale.v/0 +0 -0
  49. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/.zarray +1 -0
  50. checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/0 +0 -0
.gitattributes CHANGED
@@ -254,3 +254,107 @@ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layer
254
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
255
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
256
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
255
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
256
  checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
257
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_17.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
258
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_12.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
259
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
260
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
261
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
262
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
263
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_5.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
264
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
265
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
266
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
267
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
268
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
269
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
270
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
271
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
272
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
273
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
274
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
275
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
276
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
277
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
278
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
279
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
280
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_18.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
281
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
282
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
283
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
284
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
285
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
286
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
287
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_7.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
288
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
289
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_9.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
290
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
291
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
292
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
293
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.self_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
294
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_4.self_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
295
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
296
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
297
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.self_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
298
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
299
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
300
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
301
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
302
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
303
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_2.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
304
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
305
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
306
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_3.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
307
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
308
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
309
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
310
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.encoder_decoder_attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
311
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
312
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_13.self_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
313
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_12.attention.out.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
314
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.attention.out.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
315
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
316
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_4.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
317
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
318
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
319
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
320
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
321
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_10.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
322
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
323
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
324
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_19.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
325
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.encoder_decoder_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
326
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_3.encoder_decoder_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
327
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
328
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
329
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
330
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
331
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
332
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_18.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
333
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_20.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
334
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
335
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
336
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
337
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
338
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_19.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
339
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
340
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_13.attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
341
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
342
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_16.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
343
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_14.mlp.wo.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
344
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.mlp.wi_0.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
345
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
346
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.attention.value.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
347
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_6.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
348
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
349
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_5.attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
350
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_0.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
351
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_14.mlp.wi_0.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
352
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.depth_decoder.depth_layers_1.mlp.wi_1.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
353
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_10.self_attention.value.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
354
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_0.mlp.wo.kernel/1.0 filter=lfs diff=lfs merge=lfs -text
355
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_2.encoder_decoder_attention.key.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
356
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_6.attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
357
+ checkpoints/llm_large_x3047_c1860k/target.encoder.layers_1.mlp.wi_1.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
358
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
359
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_15.encoder_decoder_attention.key.kernel/0.0 filter=lfs diff=lfs merge=lfs -text
360
+ checkpoints/llm_large_x3047_c1860k/target.decoder.decoder.temporal_decoder.layers_1.self_attention.query.kernel/0.1 filter=lfs diff=lfs merge=lfs -text
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_0.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.87 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (3.82 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (3.84 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_11.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (3.73 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_12.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.73 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_15.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_15.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (3.74 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_18.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_18.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (3.72 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_2.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (3.82 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (3.79 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_6.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.8 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_7.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_7.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (3.79 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_8.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.74 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (3.75 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.decoder.decoder.temporal_decoder.layers_9.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.72 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (3.73 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (3.73 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.74 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.75 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (3.77 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_18.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.76 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_20.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (3.74 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_23.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_23.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (3.74 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (3.76 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (3.76 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_6.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (3.73 kB). View file
 
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1024],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1024],"zarr_format":2}
checkpoints/llm_large_x3047_c1860k/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (3.76 kB). View file