diff --git a/checkpoint_metadata.json b/checkpoint_metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..dd32b2ef68a34e178de5dce34487418ea3e2e510
--- /dev/null
+++ b/checkpoint_metadata.json
@@ -0,0 +1,9 @@
+{
+  "dp": 64,
+  "metas": {
+    "consumed_train_samples": 11776000,
+    "last_train_step": 11500
+  },
+  "tp": 1,
+  "version": "1.2"
+}
\ No newline at end of file
diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..23ba1e82facbc116ebdc13bb8fd58385cd40c99e
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,144 @@
+checkpoints:
+  checkpoint_interval: 500
+  checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredrebalanceddespaced-seed-6-
+  checkpoints_path_is_shared_file_system: false
+  resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredrebalanceddespaced-seed-6-
+  save_initial_state: true
+data:
+  dataset:
+    dataloader_type: single
+    dataset_max_tokens: null
+    dataset_weights: null
+    datasets:
+    - bits_per_token: 16
+      filename_pattern: .*\.ds$
+      folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredrebalanceddespaced-seed-6-/
+      original_folder: null
+      seed: 6
+      shuffle: true
+      skip_tokens: 0
+    pad_samples_to_global_batch_size: false
+    skip_in_stream: true
+  num_loading_workers: 0
+  seed: 6
+experiment_logger:
+  tensorboard_logger:
+    push_to_hub_interval: 300
+    repo_id: craffel/commav0p1-ablations
+    repo_public: false
+    tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations
+  wandb_logger: null
+general:
+  benchmark_csv_path: null
+  consumed_train_samples: 11776000
+  ignore_sanity_checks: true
+  project: commav0p1-ablations
+  run: commav0p1-ablations-1p82G-commonpile0p1filteredrebalanceddespaced-seed-6-
+  seed: 42
+  step: 11500
+kill_switch_path: null
+lighteval:
+  batch_size: 16
+  checkpoints_path: null
+  generation: null
+  logging:
+    hub_repo_details: null
+    hub_repo_results: null
+    hub_repo_tensorboard: craffel/commav0p1-ablations
+    local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredrebalanceddespaced-seed-6-
+    push_details_to_hub: false
+    push_results_to_hub: false
+    push_results_to_tensorboard: true
+    tensorboard_metric_prefix: e
+  parallelism:
+    dp: 8
+    expert_parallel_size: 1
+    pp: 1
+    pp_engine: 1f1b
+    tp: 1
+    tp_linear_async_communication: false
+    tp_mode: ALL_REDUCE
+  slurm_script_dir: /fsx/craffel/train/eval-scripts
+  slurm_template: /fsx/craffel/run_eval.slurm.jinja
+  tasks:
+    custom_tasks: brrr.lighteval.evaluation_tasks
+    dataset_loading_processes: 8
+    max_samples: 1000
+    multichoice_continuations_start_space: null
+    no_multichoice_continuations_start_space: null
+    num_fewshot_seeds: null
+    tasks: early-signal
+  wandb: null
+logging:
+  iteration_step_info_interval: 1
+  log_level: info
+  log_level_replica: info
+model:
+  ddp_bucket_cap_mb: 25
+  dtype: bfloat16
+  init_method:
+    std: 0.02
+  make_vocab_size_divisible_by: 1
+  model_config:
+    bos_token_id: 1
+    eos_token_id: 2
+    hidden_act: silu
+    hidden_size: 2048
+    initializer_range: 0.02
+    intermediate_size: 8192
+    is_llama_config: true
+    max_position_embeddings: 2048
+    num_attention_heads: 32
+    num_hidden_layers: 24
+    num_key_value_heads: 32
+    pad_token_id: null
+    pretraining_tp: 1
+    rms_norm_eps: 1.0e-05
+    rope_scaling: null
+    tie_word_embeddings: true
+    use_cache: true
+    vocab_size: 50272
+optimizer:
+  accumulate_grad_in_fp32: true
+  adam_beta1: 0.9
+  adam_beta2: 0.95
+  adam_eps: 1.0e-08
+  clip_grad: 1.0
+  learning_rate_scheduler:
+    learning_rate: 0.0003
+    lr_decay_starting_step: null
+    lr_decay_steps: null
+    lr_decay_style: cosine
+    lr_warmup_steps: 500
+    lr_warmup_style: linear
+    min_decay_lr: 3.0e-05
+  torch_adam_is_fused: true
+  weight_decay: 0.1
+  zero_stage: 0
+parallelism:
+  dp: 64
+  expert_parallel_size: 1
+  pp: 1
+  pp_engine: 1f1b
+  tp: 1
+  tp_linear_async_communication: true
+  tp_mode: REDUCE_SCATTER
+profiler: null
+s3_upload:
+  remove_after_upload: true
+  s5cmd_concurrency: 5
+  s5cmd_numworkers: 16
+  s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd
+  upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredrebalanceddespaced-seed-6-
+tokenizer:
+  tokenizer_max_length: null
+  tokenizer_name_or_path: gpt2
+  tokenizer_revision: null
+tokens:
+  batch_accumulation_per_replica: 4
+  limit_test_batches: 0
+  limit_val_batches: 0
+  micro_batch_size: 4
+  sequence_length: 2048
+  train_steps: 166893
+  val_check_interval: 100
diff --git a/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..54741bf6206e502dbc83da47f0453336701b0e8b
--- /dev/null
+++ b/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f595f843328e4dbfbfe4bb03702ee99aa945848aa992cdb9f381cafb7752f226
+size 8388848
diff --git a/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1c1dc187f9ccfdc9958909232771a0f326873611
--- /dev/null
+++ b/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62c6865cb7f19fa47c0357a78a30379fccd92cf9c6029fad264952cfc34b901d
+size 25166176
diff --git a/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f633f17b2d1f677cdd4ace4d61430faf1dfed7d7
--- /dev/null
+++ b/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23e2b0d83742b9f670fbb0987fafda40a18ab097160a765307e9c760f29a3ebb
+size 4192
diff --git a/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e3d137e1fd45c8219fcad14e2ea80390f3f69c8d
--- /dev/null
+++ b/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:547c3af525a293f758643eb51858e5ed85beacaca4403eb7f3c46d2cb1af180f
+size 33554672
diff --git a/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6b09be9ad169cef0caf67f6d088ca4b27af03ff4
--- /dev/null
+++ b/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ce4ca52dcd754a5b7be9853ab60d55b54b6745005176872cc978765112bed7b
+size 67109160
diff --git a/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ec52372e13f8a2b4e63cd8af6e60080ccea77cce
--- /dev/null
+++ b/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f789059786919335721746905b2fed6983b972cd976adfc65a0b63b5af9e3f9
+size 4192
diff --git a/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..30cd0de8d5875af11c1e4addc31c430527f02895
--- /dev/null
+++ b/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:857b31f40d13617a9afa0119ad2493bb9581252de91d95a0b0cc38baa3c2ed5b
+size 8388848
diff --git a/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8998287f9a1150f1f4495e6b364895760a74c89d
--- /dev/null
+++ b/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16ea0dbd8bf6a1b0d8268884e18fe51f2d2e008c7a25237e19cf7366cbd9851d
+size 25166176
diff --git a/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ee31bfe56654eaca5f41230d6514d00690056ae1
--- /dev/null
+++ b/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09208cc005d4079fdaf9eb5f60027316757fcd2e952f2b8119f1fbf62e9a9ef7
+size 4192
diff --git a/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9be4c0b80f058f2004e5ba5808b6673ec965bfcb
--- /dev/null
+++ b/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a079c60b58cc02e979f530490d1244f4c12d2cdb575b62734583f4186cb1d04
+size 33554672
diff --git a/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f252d5e8d0391ca60ca12dbc8606bb8604c303b2
--- /dev/null
+++ b/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:345cdcd95a92037a84cdada460c1b549763495770e58e7d17f1ba05f88cc9d3f
+size 67109160
diff --git a/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a7f16a22032707af86a697399e3bcb00e6caaf4d
--- /dev/null
+++ b/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e73698f3aab361c9f71a5cdb6e6d20b3c9a264d26e367946d701cb5a59ba943
+size 4192
diff --git a/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6b76ce919cbdad6755f9af01fe0f8c60ac8d3220
--- /dev/null
+++ b/model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9c786e1cdf095dead8d577e450a916ee186f094c18810b176637309fbf528a6
+size 8388848
diff --git a/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b04f801517bd713dae7886a4d90941e8ee128e29
--- /dev/null
+++ b/model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd249d1d16f0e112e0fbe42f465d2e0f839671fcb3cfc01d300a084e4b5def83
+size 25166176
diff --git a/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c095b5f8977ae53d59bf4f5bd0702078cca87a88
--- /dev/null
+++ b/model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c7511e6536720d0ab495e3f6e9e014228a2b0992b9d905fb261339845cf623c
+size 4192
diff --git a/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c0d0dff88a9456194aad15ead8f41fba750d2a4e
--- /dev/null
+++ b/model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0381c3a4bebbb052d2e94a03bfaf77b366773a504da76bbc76e5fca3a1ca29cc
+size 33554672
diff --git a/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..717a9a6d93ad95575e06b1ae77510ab02359d333
--- /dev/null
+++ b/model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3543891896f068c6067808c320ff65ddccc851d01fd82ebb6b7ca258e8f6cce
+size 67109160
diff --git a/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7f6d7f29801207bd8438197d13a254c5d653b983
--- /dev/null
+++ b/model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4d35ec64afc5c9c35640ad05a9ed653f1b726d38ccee8bcd1382fb16ec03de8
+size 4192
diff --git a/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fcb1beeb1ac7f4007a9904fe437900ee53fef1e3
--- /dev/null
+++ b/model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac80ad0b0858a3cd628fbe411bf8e25eac602a5cb143983aa25c540ddfb7471f
+size 8388848
diff --git a/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..df39a3972ed275b29589b37b4083e2ae23fae6b1
--- /dev/null
+++ b/model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31adda5f22fa301b272589f5077b9ef4c72997e3726a6cc365bd0a1f8a75c17f
+size 25166176
diff --git a/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..23189ff5f50ce4dd348095207066abc936b7f0f9
--- /dev/null
+++ b/model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cda020b7d376de3e3318e90773d43d07e65a4f0bb25dedb5f7e7f373001912d2
+size 4192
diff --git a/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..937e7c73d09398362cf8e2f9085a96dfbbb7947c
--- /dev/null
+++ b/model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03bb08bc15845088fb71bf35e0fbd47e047a846110a5dd2b38d374fa6de2e9b3
+size 33554672
diff --git a/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ef46e26f2adeea65bd31fd605d5283c3be5b0b7c
--- /dev/null
+++ b/model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:657d742c73746fb8be4ee69826641f6d2dc4a682428dcf72b5316d21d74323f6
+size 67109160
diff --git a/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aebced8bc93f5261385caadf8d4071332a62956b
--- /dev/null
+++ b/model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:909e56fdf401265785d845168cb0dc200c7af771d0f57b0c7357fa3ca199afed
+size 4192
diff --git a/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..76e660c9e33f12eadd5b8e3a221c67a83d5d96ea
--- /dev/null
+++ b/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:330d7d6710fff1c67078bfd47e3e66fd4c57edcbc00d9cfc387213869849b8d7
+size 8388848
diff --git a/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..01bf757f92d79c44e340802163501aa5fc9e9f82
--- /dev/null
+++ b/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:244a293c15e9f62d6602091d955d8e0ba835abbe59be6b8a99bc959e87073ac3
+size 25166176
diff --git a/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6f577a90e71f331c67e7e0f908f5de5b69ac7135
--- /dev/null
+++ b/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:741db7520d4105a068fe664b8c685938160d02c019a0f3f539f235b0f8afa9dd
+size 4192
diff --git a/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7d832b86fad9107440505c7a88d29bbbe500ea4c
--- /dev/null
+++ b/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4854fff099477f5a89b9a6f90388dc72291970623c6c50430702faa52ae54b81
+size 33554672
diff --git a/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6a4333c5bc7e2f763a42769b57469a0da3a23997
--- /dev/null
+++ b/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4da6220b7c9da37be4d0fd11d215bd60fedfef20133ba07309a896b731ebfd38
+size 67109160
diff --git a/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fcdd864b6787689f3da2e097e895894266bb3e85
--- /dev/null
+++ b/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec19650183c6723bafec772e7753ff82b854f0ba504473cd26d38f314d007286
+size 4192
diff --git a/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..68c9ca78b969a3cc8e724692ce249f032c1c1867
--- /dev/null
+++ b/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebe2c38452c95c4fd7513eaf391126fc2ba8eb8ec4d3977b9630a2869caf8f07
+size 8388848
diff --git a/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d01d8e9aaf726f7903862c35e6b7540baa2cfd20
--- /dev/null
+++ b/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69df2cf77996f34b6818b40abfe827e9abf3d13c33c145b4168b5410401afcdc
+size 25166176
diff --git a/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6b863dbe136b740de403ab800321c28610d4e82a
--- /dev/null
+++ b/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e16bb44d659cbfc89bb76143adc90f3d002ad6d2e433f06ce1fb3b536358a568
+size 4192
diff --git a/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2d7cc708d3cbe9e3a7ea189aeb94bc5291d7c409
--- /dev/null
+++ b/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c5e2d80309962ddab74c797b5fe38d96bab3517e537f8e3137ac9076b52f579
+size 33554672
diff --git a/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8e0a5cc245bc96182cb4fcb07ee1f1d792761fce
--- /dev/null
+++ b/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1662d032158c41c5b3580253f32b3895f33e06dbd49be3e1fdcc606531134f92
+size 67109160
diff --git a/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8cc2b14c7d98613d6f0cd98186a2f9d6c2e779b3
--- /dev/null
+++ b/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4c157081b4232b3688029601d8a3701da500c452ccde7e109a8aefc4f099e53
+size 4192
diff --git a/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7335b33763026885e3c1c6973f76b1bb91ccf5dc
--- /dev/null
+++ b/model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d0d2f5b186b8a6eb7db79a68d917233d4dc30c84505361d6afe35998d84b89f
+size 8388848
diff --git a/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9531f796a83f435ca53ea0ecb338cd68171feba3
--- /dev/null
+++ b/model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4742ccc61006f1bfb2876219f5df3b42c1a5346053b8b0332dd9ec1f812626fe
+size 25166176
diff --git a/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..78726007227ccb8482f7f2872e67b28ea93e5bd9
--- /dev/null
+++ b/model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2eec94d8153c9550c0726821b4ee11077639f2ced03cd4082a6ed94c2389cc6b
+size 4192
diff --git a/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..12243fbf97842dbd7566ea18a979bd45a4a87973
--- /dev/null
+++ b/model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c224ac691fb31507663bbeee7b32e969c8e836e6571ae249c01e6240151f2ce
+size 33554672
diff --git a/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e0c1aad4ac1a6a93fc671f47456f640ea1e63188
--- /dev/null
+++ b/model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d4deba2b45b7e1f162c31d962a618bf39cfedc64e6179cb025afe220e1c3d48
+size 67109160
diff --git a/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..139307ebdd760ef5986330a73a3259cad4997ee7
--- /dev/null
+++ b/model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13bfbc176224d4f4b850d74682fb1dc5436ee08cdda021cc829a272a2314fd1f
+size 4192
diff --git a/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..05eb52487b254e4f165fc761457af347d39de142
--- /dev/null
+++ b/model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebed027c57964e1f8dc1e5fb82fd52304b0a3de85a7ef68eb4c269ee81c1941c
+size 8388848
diff --git a/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d4b3fd362b5476a25d3ed68f23bfbb7b5b2a64ed
--- /dev/null
+++ b/model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ccebed5a572e93346a2f96b2b78ea94ae20a57956b13b7cac4c40eb10a9f660
+size 25166176
diff --git a/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..13e962020265e7fba66e3c7ab388376387e87d28
--- /dev/null
+++ b/model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9005570002611a550a2f2230e8173bde314413699e302d6280fde4ec93e8976
+size 4192
diff --git a/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4aa51001e19e6a015557d63b322f4e6cb70e5ee0
--- /dev/null
+++ b/model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b6ab383f8bb91a95cb60375b40d9fd89ad0447a6026c99a89bf669f23397b6a
+size 33554672
diff --git a/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9700ad8c9d5560eacc3907fe020928c728d6a7cd
--- /dev/null
+++ b/model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48b52d3a8c390c6bc92708c94a111e7ea456b8a650d2807e50cadf6f72605f5c
+size 67109160
diff --git a/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c6e9ddf035244c6cf5d1b5d358ad0e2178a96f64
--- /dev/null
+++ b/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9667691612c019d12bc11b3fb9707122937aaa75a66f6ebd67fd6e3a411032ea
+size 4192
diff --git a/model/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..76067bad60f7eca9e29d79c24f980b182a535d2a
--- /dev/null
+++ b/model/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07a2f794f4b72fcec1e865c0ef61dfd9cb827e45f17b6c0b7a932523640f8480
+size 8388848
diff --git a/model/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e5eb8c123a22d2a63b7ec88c77bd79cdef68a983
--- /dev/null
+++ b/model/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:220071382bcd7fda0e8fbecf78891c8a027724efdc949a4680ccdad1b7216ca7
+size 25166176
diff --git a/model/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..01db762942f26b281a2346da719e12af95dfa17b
--- /dev/null
+++ b/model/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d56e7bd5aa19440e6b2487a0ef6139a6257331cc6b2592b3c2e65964fe048fb7
+size 4192
diff --git a/model/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b145bf15e0cbc7a4094fc28e8e2801e593953460
--- /dev/null
+++ b/model/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa710a308e7e366072cdae823b0858cd969ef95c510bc6bba8b8bf6917877c91
+size 33554672
diff --git a/model/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1286972fcfc6a76dd46194fd45c0cbeb759b753d
--- /dev/null
+++ b/model/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89aded88eb3c8064bd0aad793d38c05f0d8fa9db771e7173687838143aaf103c
+size 67109160
diff --git a/model/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..27e52609975d18c7ce7c58b5d275ef7f2f0640e8
--- /dev/null
+++ b/model/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a4ba146fb165bf013886ab4a16609769c1647ed26b9ee351ddb1d09de8b68af
+size 4192
diff --git a/model/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ff33a3b57acf08bab2ebf3484a0999372332dbd2
--- /dev/null
+++ b/model/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d04db95d37491e1b95887b90f9d3af041eade12cb77c6c62c23ed44cd69e9c64
+size 8388848
diff --git a/model/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ab669d81c67da8669d96a0fb06441c73096bac8a
--- /dev/null
+++ b/model/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:977b36d635d5a14c6accbc1991eac9a7150ca6c10b0aba787fe68900ad48410a
+size 25166176
diff --git a/model/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..873f738f52691672b7d49e7562d777628d592695
--- /dev/null
+++ b/model/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a874af5f075fde980e9b228f061f5080f162113ea6d763c974e2d4febd9ae351
+size 4192
diff --git a/model/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8251c02dc79e40e49f5998d0bf86f7a482b559d3
--- /dev/null
+++ b/model/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:deadcddaacebe1a3693e1ba5ec951d9b2a1b7c5ca753fb6b132b346383ac6bb2
+size 33554672
diff --git a/model/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fda9aa0ef273c1e50b49fedd2befac486b617f38
--- /dev/null
+++ b/model/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:503f7f0e34409ee2d5a55f19344bf2dabe0e43f54fef2bf400c0aef8c45328bf
+size 67109160
diff --git a/model/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..af1651b5a9752a8e094a4bb2b6dca43a514c9858
--- /dev/null
+++ b/model/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6deb5e52cf8606bff73ac4c807dfbfd5d04bfc2e347015088b92541f83083d2
+size 4192
diff --git a/model/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..43a3e0bbe4cb14ef26a6445b39c2d5d1f2cb6604
--- /dev/null
+++ b/model/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95419c69ee5cbb5efdb3a0d5797345e146d605c5bb59009d5be8b3e96fedba3e
+size 8388848
diff --git a/model/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5dd83185a8086cb768c6c00762f4adabb78daf1a
--- /dev/null
+++ b/model/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2c9e64e4a3d27e9cccb85939c451cd7ff368c5d094713ccf4951fa23e1b806f
+size 25166176
diff --git a/model/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ded3b71c78d429ecb1ab44e86763d81d67881e2b
--- /dev/null
+++ b/model/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96a0ab5ee2e646b66e9e50be214f21513a61f2128a00c3421a8de405881f7fe9
+size 4192
diff --git a/model/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8c38fd7d0df80368848d25f6d13cb5942eee770c
--- /dev/null
+++ b/model/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0be49cbb9491e04d88dc3c2d4e47bce35bd6a8485f2f8c2bae4d1e36131d949
+size 33554672
diff --git a/model/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b8401254b8875fb4aa4396debad5465a3bec1fe3
--- /dev/null
+++ b/model/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:413378a0524e799b154237c7441d569f6ad66691a2df8e5e36a594b6d7e0c7fe
+size 67109160
diff --git a/model/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7d089e1f89a92ce7023cc890bbb8603aa4687ba3
--- /dev/null
+++ b/model/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:250ec3a04662f2b8ba81e82adf1817fcca72116c32e7094dc2482bdb066e59ad
+size 4192
diff --git a/model/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..19a4ab533d782e8a3505558307a2de4fc73a384f
--- /dev/null
+++ b/model/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe65ce230c76945b2cbb911bf2a56522e3e367bbc6e5e499ef197b55a926112c
+size 8388848
diff --git a/model/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c485aa61b2bd04118111c13e1f0f290d0f8ddea2
--- /dev/null
+++ b/model/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fc3ac815ed66b0ce99ff8523d6a38ec4e0259d8ce52380b2a39800730d424e1
+size 25166176
diff --git a/model/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0e44b5c19e796975a788bf3743b866675559f44c
--- /dev/null
+++ b/model/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fd4a383e0f984bc7a24c321861b9301d222d0f79deb9cdbd821bfd52c5d01bc
+size 4192
diff --git a/model/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..42507fd56f4bdbae094ac1aaf26facc4f6703dbd
--- /dev/null
+++ b/model/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5fc9ec26b05c1235890030f2ba8c4f79f4ddccc1eb849be7d7c8c2cb841313d6
+size 33554672
diff --git a/model/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..89044f91658bb9aba8309d6be5bd5c82160461bd
--- /dev/null
+++ b/model/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df2d2d80a9e7522d150e14d24f6b11553f7dfe27d45429b28028993ea65124be
+size 67109160
diff --git a/model/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a24b2a2e7ceac0aa548528e8a7d76f8892b73fa7
--- /dev/null
+++ b/model/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84feac0fd53c2cbcd824cc8cae4400ae02a423595d7d9136bba448f2983bb8e5
+size 4192
diff --git a/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..436b253645d6e12c26547e79fa3b4cb3fd62ce33
--- /dev/null
+++ b/model/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd8737dd366aa39ad12999c1b81f0d2c5e2eb0995ab83f9587a4825678ada5a3
+size 8388848
diff --git a/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2c6ea0422f500ece93a3d33b728d4a844422b6e5
--- /dev/null
+++ b/model/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23ed9c25381ec2e6615712c9a58ec7798c49f505b9f4f45871794eeaee268666
+size 25166176
diff --git a/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..73e5dcd253cf6d3dba71306206b5fc03fdf377ed
--- /dev/null
+++ b/model/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:606ff112814f6fd9fbec67934fb327d344569e30fabd5c8385e947d719825eca
+size 4192
diff --git a/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..42d7c418f6c8651c128a66ca5ff912b06677aea9
--- /dev/null
+++ b/model/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39d22f55ffcec0ff9e3f1b608bb7100e5260fb976ec26196185e1282c9f6ce69
+size 33554672
diff --git a/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..19836cd2d17ae7df24ad74e3ece5a3afc8859163
--- /dev/null
+++ b/model/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f42189332df4091deab8e45e51455a4877876152e48efbd7f59c6f1a71630213
+size 67109160
diff --git a/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5718edaa917310d09ded46b5f8a523a4356e4328
--- /dev/null
+++ b/model/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb2c39e58fc639a4e242aed34354e6158cdf2172e80b0d2eca8654a6b91bbc28
+size 4192
diff --git a/model/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8fae4fffe3333396ffadcde9a31d2994b3f86c95
--- /dev/null
+++ b/model/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:500b8f0d964d4ecc2b5eca46419e59f63b3011a7960e10200eed05c9295a7f30
+size 8388848
diff --git a/model/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0a13725a9dc0fba79843b85045389a7d9bfbec40
--- /dev/null
+++ b/model/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f76150e8aa17241b35bdbeece62136bcaa789b5eb44e5a697779f0883db6a0b3
+size 25166176
diff --git a/model/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b35121d5d7d9c5dc744c125afacdfe5df68721a3
--- /dev/null
+++ b/model/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:944c4380de845bbd2128b2a120b2353fa27b5b4f04347ac3db6632d7aaee48d4
+size 4192
diff --git a/model/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c8522ef7813dac696cd84b116429631d45b4778a
--- /dev/null
+++ b/model/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a59ec7800e2ad6e5b795643800cef155d0b367acd45001595dd56f7622d3e9e4
+size 33554672
diff --git a/model/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..95efef14449f9ba1fc218afeca7b9f8f5236c603
--- /dev/null
+++ b/model/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c4c608acaba475c80245524ea2796faabe50c839ce3bbe6024ecc8f4cb449b6
+size 67109160
diff --git a/model/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7f6236c4a22ebd92f6871ce6d67781defb9a2084
--- /dev/null
+++ b/model/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:927d8e7964d7c37f3449917fa59ad5bb378703eaab8044860bf9569eb9717623
+size 4192
diff --git a/model/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2d04c805eab0a554bd6f0957b16686558b370f38
--- /dev/null
+++ b/model/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bb7046cf9d2c631dbae0dc777c340c7fa930ccb6cef25a67615df72136a557d
+size 8388848
diff --git a/model/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a5a1c2e96bebbf220de3c955b1b636848d283409
--- /dev/null
+++ b/model/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4a283ec50ec7d654ac8801c96830326be77a3a32e847c8b217505dcf163e77e
+size 25166176
diff --git a/model/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8eab54e87c8191fe26f039a672ebce39a588906f
--- /dev/null
+++ b/model/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78d1d37dc8c8daffcde697ee0f30a8da7da2a55dafa8b3900dba1afd9a791b41
+size 4192
diff --git a/model/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e87a5b4bf3c6700ff4cd4c29559d4e4463146ee2
--- /dev/null
+++ b/model/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:038fd530a8c69273afdde36b671de789ba7f10c98425ddcfc60541c069b0aa2a
+size 33554672
diff --git a/model/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3190ea687b47077e046c2a54056a8e77340996d5
--- /dev/null
+++ b/model/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db9c025bd532538551966c77cc9b71a1d03aeda716bb9e7d63296cae878fa37f
+size 67109160
diff --git a/model/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e9668eb34db590717e4438d6837ee48c583de97b
--- /dev/null
+++ b/model/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:273dc1411618ad2b78204add5d8a7830dcd3f41ce5f2c56091832395fd7ff3a2
+size 4192
diff --git a/model/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..208196291b8d6c5a6924dbb611aece1459b7426c
--- /dev/null
+++ b/model/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e73f4772ce000a3eb98fb8c7c985c9f83b99ff9cb5e62f43f6fd981b9c287cd
+size 8388848
diff --git a/model/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..af8ef3036cc77ed3f67e1b8ba2dcfb769044ecee
--- /dev/null
+++ b/model/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72a9da0742ee8fae940112f0dbd389af96e57cdcf2d97933e496a005bca8554f
+size 25166176
diff --git a/model/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..04465481b5155b72e6ce1afe74e4ad89a8546512
--- /dev/null
+++ b/model/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da53fc03fb7b09e94daecc5fb2b5c5cb9aa83c7d614b356deb797443c9a16164
+size 4192
diff --git a/model/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cc4a9e457324507a7ee6f02aab5a4e1176d2ba8b
--- /dev/null
+++ b/model/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1bd43c2481513287fc4ceaeb049430468190ba99ff4a8c736414e47fab66a72
+size 33554672
diff --git a/model/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..658da598536c813a57abbd8bde0a27d903d3de65
--- /dev/null
+++ b/model/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8c2ac625be611559ab964c7b3996a0ab28b1bc527a368a3f48d213ca5ce6626
+size 67109160
diff --git a/model/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2115e2aa02a2e6f6614fa30bf398ecc3defb2cf6
--- /dev/null
+++ b/model/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92d15e1c7672b26ba85483f90a2f6d67256cfd603c41fa67730250853e768cb5
+size 4192
diff --git a/model/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bb559166afb6b2de3bc63b598cbe8a64999325ee
--- /dev/null
+++ b/model/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c1a7aa67cc36969a02c996e609ca264ad5cc4c15080450801006a1de9bf9595
+size 8388848
diff --git a/model/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9a9885bb5b6c0b8685c6ed5591ea3b59614a63e0
--- /dev/null
+++ b/model/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e619b42b5c3358ab114520bf4066740cee1f514ef482b01954f25178e54c3f5
+size 25166176
diff --git a/model/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7bb3cc89ec4ae6985fb01ad04a07d8150b3b7518
--- /dev/null
+++ b/model/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db7d0733f8176b03bd2e41d333e6892e66b0b3fbf51b28927b73abf9a0c857d5
+size 4192
diff --git a/model/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2fbd828cb027444244f1f62eb1f01e1df68058b1
--- /dev/null
+++ b/model/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc038ed1949c4d0332f9433d76054020c2a996a577b24356a28f8bef710417cb
+size 33554672
diff --git a/model/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3aa0d167cbcf5129039622adc7abb2749d0b952c
--- /dev/null
+++ b/model/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e8d1015ef64650313cbb08b7c5f5389ce0f0de3d06f1c8ea8578bc3e1713490
+size 67109160
diff --git a/model/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c6846ecabde658ac3fe5e6758d3cf1a97c0edc1f
--- /dev/null
+++ b/model/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53f20bf8f9e3e8ed3e2c32ab478b695241e9ba297d4626a2dc66adae0e25b99c
+size 4192
diff --git a/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bfd3b3567576826a39699cedcf95a9c7886d8671
--- /dev/null
+++ b/model/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1cfe35c8df9ebc5fd70f98f73cf42422eaec9533176daa26842bd8eea7209a64
+size 8388848
diff --git a/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..026ba19895b9446467566b2e228a06e165898869
--- /dev/null
+++ b/model/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fc5eb42a69e4951ae62398845dfe1c5630264286e6bb3d00dfad13b81a08e74
+size 25166176
diff --git a/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..faaf469d3fb5848e08813945cc7192306bce5304
--- /dev/null
+++ b/model/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccb66c7fed3bc0c12816252b50723319ab7412f090153405a305b13e1cfb0ba0
+size 4192
diff --git a/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7914769b68c5230a7ee56618091b48b6de1b1ab0
--- /dev/null
+++ b/model/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5960cd51767d40fc328477cd105176a432653ce5753457b93d877a826f38aefe
+size 33554672
diff --git a/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..902ccb10cd8de56905b54d1b8139e33bc64bb3ba
--- /dev/null
+++ b/model/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea4e8885a2290b7c6d4bc0ea3bfd98038bf30da4728546dd057c2cc2fed1dee
+size 67109160
diff --git a/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..75d631c7464a9ad5d180fd99a6d8a3408a1595d0
--- /dev/null
+++ b/model/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70638322897a6a04b879a72c5581ee5dd3fa952f846c9b41632b930d94b4a9dd
+size 4192
diff --git a/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..84d1d8e1992505ca50f4c6e3ee03ffac5de8d73b
--- /dev/null
+++ b/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce0017d2a49c9025d73e2b6c201434b0fc2815fd99dccec517f2caeda251f0d6
+size 8388848
diff --git a/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4c4fd0adae8be3a077b866fb9ab8d7f1ac7f28e1
--- /dev/null
+++ b/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab0295a44561f39c0fe350eb970430dea0910addd5356dcb0a589e51e34582f0
+size 25166176
diff --git a/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aa45639c68525d3d960a6a6078f80d65cc9df486
--- /dev/null
+++ b/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b1654de97bd14fc18f00bb20bc01681c8441113eec4e017ffe36180ea15f616
+size 4192
diff --git a/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..67551f6afabfb9d2980a8b0880ed11bf3104f17f
--- /dev/null
+++ b/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e300948c71dd02cf3ec5320d8cf49aa31eb12df161e61c75d05bcd1d69277eed
+size 33554672
diff --git a/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..40f2ac72f25dcbf1c43d8eb79ce29c29cf9ca9f0
--- /dev/null
+++ b/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d41b9f80603ad35cfe644ec2cb6804b1fff2a4d1d2366076d6ef93f09be91d63
+size 67109160
diff --git a/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ac945a00afb3a381b5fbfd6eb7960763ee59c03c
--- /dev/null
+++ b/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:447790d41d0c6baab712541022ca15d3f123fff5f3ac17bc550bb4ec54960663
+size 4192
diff --git a/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2199d8c8e1ad3cd5d46931337b1d35b412bae442
--- /dev/null
+++ b/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe35a01ccfd8c68f0fa98366462eed45d95e7bc9683661d3fdbf79922a753cef
+size 8388848
diff --git a/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1b0332a9853fd00678620d92f91b484f446e3870
--- /dev/null
+++ b/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8a1ce19b3fa2b34880cf875b7b6d9c41cce7af23f4723330900aac96b8af449
+size 25166176
diff --git a/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..154605196f0293752134dde52d4723de32747045
--- /dev/null
+++ b/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2da6c2f73cb589c1739494fcb786024591263073ac11ed3303a990600e3fb07c
+size 4192
diff --git a/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..21f23b2f139839a5e480fcd10e9b26ca0a1cb09d
--- /dev/null
+++ b/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bb332b827b61727694d8055cd40987f902004295c437f396a97d524008831b9
+size 33554672
diff --git a/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9a4f12cd35b4c844a42f8d58e321e44239e4c2f1
--- /dev/null
+++ b/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96593e70123034a7bf9fad7c8a1ed97fdb595de4da8d32fff72f0b4a2a3180f9
+size 67109160
diff --git a/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6a1d983c73f9387bcdc77f9fc995e25567b38f14
--- /dev/null
+++ b/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f565042e6099164ead4880dd4b098f7b761edb9ca335bc12fff5269665a72a3f
+size 4192
diff --git a/model/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5d2071d24bd694676e7708c7a4c027c4ded8899f
--- /dev/null
+++ b/model/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:824826648c8897a86c5244ec97e69f8cc4bdb06e99ed08d9c555d2b337b66831
+size 8388848
diff --git a/model/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..30d850a6b6aa59120adc20ae565d4dec78a000b6
--- /dev/null
+++ b/model/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81279f483c792139d66a6e9b06220d882209b72ccf1e20328e5055264cae1a7e
+size 25166176
diff --git a/model/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fea09e1f59e8f320c6b2ca482cb25df88b9e31fd
--- /dev/null
+++ b/model/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebc97dda4dec794f39ea3bb3cbca49afd65b8e89d85d271bf2d0911c73c2755c
+size 4192
diff --git a/model/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e1e7d5cd8f5585102e297ed8c3f432fd91d59f7b
--- /dev/null
+++ b/model/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0f9426a7183ce245a51c7125f61f38cc38172aab93363408e4bf82aae21ab2a
+size 33554672
diff --git a/model/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c1f8e69ea664338f6b468262841515650c70fc60
--- /dev/null
+++ b/model/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c62dcfb6cb05f98506ce25f5dd2d06fc77f988941bc4e5f37f0d021a43324859
+size 67109160
diff --git a/model/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1a374abc50c974d0f30510c85492a48ed1f6b8d1
--- /dev/null
+++ b/model/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61598430f1ab567a473baf8a215eb8ceccefd206633c97dc9db8c6d56346be54
+size 4192
diff --git a/model/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5d132299131d8776e03ba9dceb80b7ffecfb3775
--- /dev/null
+++ b/model/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42c807bd3ecd5bd75a157591c920e6bb425abd60eccf0f777747cc330e53479a
+size 8388848
diff --git a/model/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..07770528254df8f96d5943c4c10102bf8b5a03b9
--- /dev/null
+++ b/model/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34a449d8f3b71bba9736f6791d29c5c2a5b55c5ead282a32ffa06abcb3b8fe36
+size 25166176
diff --git a/model/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b25e53ab3d74d393de8b76f9cd713c24327c5237
--- /dev/null
+++ b/model/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:334e740951bb01ca75ebfb5b3f2e5d2034901acabfa489cc4f85bfd0cc9b276f
+size 4192
diff --git a/model/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6de460b40981c092a25be70f659768d708606f90
--- /dev/null
+++ b/model/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d0af26b85133390c7b8bc43b3eca9f8fe92ef70481acd4d6d085682e844a89a
+size 33554672
diff --git a/model/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9dcb64b06ee5e8e055fb1982ab95a8cf98ebeffc
--- /dev/null
+++ b/model/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3861b18aabaf51b567d441075b5131a3e249f89375b0a820e82b8752515dee1
+size 67109160
diff --git a/model/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f181b226091fa8fa1072faf75f2dce348a73abbd
--- /dev/null
+++ b/model/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0574c2702d2325abfd9b1648c881262bb4c34b6bedb90c1555ae4f91b3562bd5
+size 4192
diff --git a/model/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3e6fd888a2125b30417b949e1e67fafe4197f47a
--- /dev/null
+++ b/model/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65cfa8cd4074e3017481ce2d5aa33322b0254b02edb967a6d47f07b966afbea1
+size 8388848
diff --git a/model/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6cb15523a98bee30bbd383dc8dd3215e18fb61d6
--- /dev/null
+++ b/model/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c09d44112b44197bc9a44a9636e9da52587be8be70ebd3695aeb197eebe3187
+size 25166176
diff --git a/model/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7c7d2cbfd6d8282a0bb43ed0439b7109c990e62d
--- /dev/null
+++ b/model/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48e50e11dfc3aac32c1f28b7324a347a72b48259c18d0b2fb56ca9e142dea666
+size 4192
diff --git a/model/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2e58ed2a1599c496a0cf3693c4b9068584d85135
--- /dev/null
+++ b/model/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c6fa3957f9d6c5417cf1cd010a8c75459dee819ffd2ba0c1e06061bfe1922b2
+size 33554672
diff --git a/model/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8b27a10834d4963f97323161bde08c7931b0573b
--- /dev/null
+++ b/model/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de0439dd7e089340d712551e98f597bd48ed5d6efb446e78541860c063a8594c
+size 67109160
diff --git a/model/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fa82ba815a94e879d0bce087133bfd4c4f3c92ee
--- /dev/null
+++ b/model/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abeb4ecbb38460ce9bddd66f4010c5841a2150159d06229bd2ba7d2d9d0b4a88
+size 4192
diff --git a/model/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..52311dd4f5fde5ce8d0365b6735dba1e88569916
--- /dev/null
+++ b/model/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c65aa27df13ec95f1ba8194c00b22e510c921908697ee8d076bb473b7f90fe5
+size 8388848
diff --git a/model/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1cfcbda51314e67f7b80a65a6d0bfbcdfc327e58
--- /dev/null
+++ b/model/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f7549ae1df2db5c5daf523d66962eaee147b211e2c0e3d73eaebd7a1a5be800
+size 25166176
diff --git a/model/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/model/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e063bcd95ffbb98ff7839ab07f814193bead17e4
--- /dev/null
+++ b/model/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74066b45476e231db6c56c72a6c4ee2b9ed41a7fd99ac6a7fbb3e06c000ff98f
+size 4192
diff --git a/model/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4f3bb9526ab647bf2fe71c94309d42d3b87fcb88
--- /dev/null
+++ b/model/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d7c6986fac7d6f0176e84fd3bd3ee8657f967f7aa936cd70967a98114f1de6a
+size 33554672
diff --git a/model/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2d3dbcd0b30e063d7ab3c872353527e232096bc7
--- /dev/null
+++ b/model/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7914c118847ad488314299dbb0c575a148ee54f5a5c1f6c12e7aee4c46b81c2
+size 67109160
diff --git a/model/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/model/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1f92f6dc5d24e78d81f179de590fa40795bcb42c
--- /dev/null
+++ b/model/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4eb37011419a0b399a27fd59784a2b26fbb2213e478208258fc46ccf069d630
+size 4192
diff --git a/model/model/final_layer_norm/pp_block/model_weight.safetensors b/model/model/final_layer_norm/pp_block/model_weight.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1ee1b050dbd1b75fd18ea3afd3edc75a3a41c6bd
--- /dev/null
+++ b/model/model/final_layer_norm/pp_block/model_weight.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:737ca10e080afa2bde7a35a01f2fff6de1faabe10b1e46ef54d5d7365f6967c0
+size 4192
diff --git a/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a139c347d8c93c3727cda486e8978e1cef783eec
--- /dev/null
+++ b/model/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf1fb347ac754ae6f5f70a6230dbedf7beea68e43f489ad34c1ff4661628cef0
+size 205914352
diff --git a/model_config.json b/model_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5
--- /dev/null
+++ b/model_config.json
@@ -0,0 +1 @@
+{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272}
\ No newline at end of file