diff --git a/arxiv_papers/checkpoint_metadata.json b/arxiv_papers/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/arxiv_papers/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/arxiv_papers/config.yaml b/arxiv_papers/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1bfbc63321fa30dab4c6f2de37255ad015ecbd09 --- /dev/null +++ b/arxiv_papers/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredarxiv_papers-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredarxiv_papers-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredarxiv_papers-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredarxiv_papers-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredarxiv_papers-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredarxiv_papers-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/arxiv_papers/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d566c04860f0ce71c9147c686ad5e2084bb9a19 --- /dev/null +++ b/arxiv_papers/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d83ef891ea48ba66573e4cce8802a221d311b359e006af069bec3501a0b0d8 +size 8388848 diff --git a/arxiv_papers/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81a6ed09955451d264e00cbc7e4e855744d3222d --- /dev/null +++ b/arxiv_papers/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36c8af6c6a9859e869b48c7ec07b1ffb31d73bb4f51de223045ce811d14533cb +size 25166176 diff --git a/arxiv_papers/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c48d68c7ffa569c82b0ad9c33137ebdb8f4ffcf2 --- /dev/null +++ b/arxiv_papers/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93bcc323404a0d28579d429f2c897cf857daa05a1536d9df5b48f14edc2217b8 +size 4192 diff --git a/arxiv_papers/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c353e561b3c712c613f57f7f348b5ba93918994d --- /dev/null +++ b/arxiv_papers/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c96957e1959d1814f779e6298a5bbcaa39a3674e79e19ce9211dd81d632c427a +size 33554672 diff --git a/arxiv_papers/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31e39db2e29fb58efcbcc6548b0441f1fb849640 --- /dev/null +++ b/arxiv_papers/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8237f0e476c2342e14dbf6b7bb05ae7a1a584a976e3ecdef25a57b730b939ab0 +size 67109160 diff --git a/arxiv_papers/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39858ad7603a17c33d3fe0d7851e9fe1dd771896 --- /dev/null +++ b/arxiv_papers/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055a7da36b180de0cd80f0c32bda26381173573d645ecbfda384776b55e68c21 +size 4192 diff --git a/arxiv_papers/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14adef90f96c7b30a10cf7e97c8f858bbc96eb1e --- /dev/null +++ b/arxiv_papers/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d8189ed7ae25d2471a6f04e2a0466c8bf0773bf453561ab3afdc280c3a8144d +size 8388848 diff --git a/arxiv_papers/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..198cb84eac8f905de256c3909a813eda819ada84 --- /dev/null +++ b/arxiv_papers/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980917c055e9681e06ea4dd249da917e28a0336a6cedd9f8b0747268a9d133a2 +size 25166176 diff --git a/arxiv_papers/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2490908404dc82135866dfa5d51f8db9b701b381 --- /dev/null +++ b/arxiv_papers/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4174ab76a3a81521f2962558e01c4a7535f1d48fdcbdbc325a96200c066dab47 +size 4192 diff --git a/arxiv_papers/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eac5005e7f5312193480df92c162915cb113a6d6 --- /dev/null +++ b/arxiv_papers/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47496c4f91bba54327b3fb27274701ecb91966f13986bb1e27927c2b6cf2e483 +size 33554672 diff --git a/arxiv_papers/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd09b74c3d7d8034998d994072fa3964468e178a --- /dev/null +++ b/arxiv_papers/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f6264a3e6dc9c6f05b47a37fd4297fb2cb0720bf392e4d58231831d48297579 +size 67109160 diff --git a/arxiv_papers/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..115a83f249702d3d39889a231aadcaddd701b821 --- /dev/null +++ b/arxiv_papers/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0333fcc4da7a02bf3ed1b8a52601b3798ea2df0ca8a58fba1f93554ceed0c00 +size 4192 diff --git a/arxiv_papers/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d5bdc27510564f5188bb3927c574a88d5b4ffa6 --- /dev/null +++ b/arxiv_papers/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97d2d7df20778b009164674f76b6178b461c328d4ed04bede15f35b742d9d797 +size 8388848 diff --git a/arxiv_papers/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b9911a4b2abc1f82fc5bf164bb8c93df8b9c686 --- /dev/null +++ b/arxiv_papers/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05898d3f7854f9b466f2ab75b580de88a50202636622097645d3e667fb4a45f9 +size 25166176 diff --git a/arxiv_papers/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9478aba773471b85c371eee54d2dbd8122db7832 --- /dev/null +++ b/arxiv_papers/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd7dcd44fab01b7aad5a21a1ac4c28553c91ac3ae360aadb01a4c284ac7f6c0a +size 4192 diff --git a/arxiv_papers/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..414b3ee3d6608309e9f7fab1163081d80086210b --- /dev/null +++ b/arxiv_papers/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b03b0322b643c697edc1dbd0b020190264f2614439e165b9e784507a94c8ee +size 33554672 diff --git a/arxiv_papers/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..971664c21564e8afde53ea9fd113d333bbbd039a --- /dev/null +++ b/arxiv_papers/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:290f71feb11dd01b57e170e75d594067f40cf59746b22e819e56bab2738df394 +size 67109160 diff --git a/arxiv_papers/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebe4697a93e3bf57b228d1dcf1499979500ae669 --- /dev/null +++ b/arxiv_papers/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcaa60a3b03cbf1f8fc9c78429a4f3d4455021198a1074cac73d6a55e5778ba3 +size 4192 diff --git a/arxiv_papers/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78acb10fc432b10f7d787a66855a79593717fcd7 --- /dev/null +++ b/arxiv_papers/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1cd9e9276953c817668c9cb64a60b51e905cb8af7a993e999d7e93c59d64a66 +size 8388848 diff --git a/arxiv_papers/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc6a2cee6a65e80ff5660d8fa13c142e15eaa8f9 --- /dev/null +++ b/arxiv_papers/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d226ec81c772785886795c1659d4685b0fc6058e29107945a576123329741e9 +size 25166176 diff --git a/arxiv_papers/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d535f3eec58b7f14c44ff6322c5f17c9c57cebd6 --- /dev/null +++ b/arxiv_papers/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628daa47fb0caec0dd0b3eb435815af7315bc66a846ad6892eddb847fc0f725e +size 4192 diff --git a/arxiv_papers/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71b79e390a69694a1eda612a4d13270a315b4b2d --- /dev/null +++ b/arxiv_papers/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02a3ce9143b1c9765c4c5f9dffc641de6a5213f6bdffebfeeb31e42a22475c97 +size 33554672 diff --git a/arxiv_papers/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d34a6593234625a47c6de0efda2d23a16e68e9de --- /dev/null +++ b/arxiv_papers/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1593d722651fd5b567fd6f87d69bafe6f1c4f635f0c48a156fd13859f17a26d +size 67109160 diff --git a/arxiv_papers/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bcf0893380ea55876e990d68222d9581a16793e8 --- /dev/null +++ b/arxiv_papers/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3effe1931b95d6390a570c5840bd4f039157436aa6f59e1a67110d01e1ff0421 +size 4192 diff --git a/arxiv_papers/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39cf8189a2b12267a2176d6394106332cb466846 --- /dev/null +++ b/arxiv_papers/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0821a089c8e422d4780c7416103c3c8a800c6ec4a558e779e0dedc7e19a94d05 +size 8388848 diff --git a/arxiv_papers/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c7492c937fe2a65295a9117ee6b7267b649b42f --- /dev/null +++ b/arxiv_papers/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9afd4705e7ffea40f53d24932d688f06a7534f3b386145e076f732330030335f +size 25166176 diff --git a/arxiv_papers/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b4b31d6fa08c9890fa284c83abd8eb7c75573d1 --- /dev/null +++ b/arxiv_papers/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e0a399ae2c62944fb5bf4e8907ac674674df7bae5b2a264ab23ddf252f4911c +size 4192 diff --git a/arxiv_papers/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b8d32908de87ddf5b584f3097b6e105a01ff69f --- /dev/null +++ b/arxiv_papers/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c2fcdd0bcf96aaa2a7e55472ab10488acff6fb6768e619473eb9c7e71f185cc +size 33554672 diff --git a/arxiv_papers/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bbd2990a1bd72a89c551bb3a68b171bfa711e50 --- /dev/null +++ b/arxiv_papers/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47548fd48a98477a3b1a15e76cbed1a9041c03a3dada253737df4cc07c38f168 +size 67109160 diff --git a/arxiv_papers/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57501c35f1b49a2e697c990ce6b0a574faf81b9b --- /dev/null +++ b/arxiv_papers/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4702911bc67577212f2b0911c95fff717d6175a2671451145a960c818980c81 +size 4192 diff --git a/arxiv_papers/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f1e2ec913f9215cd3407330ef5f5770bd73c24b --- /dev/null +++ b/arxiv_papers/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd448ae1c26abd57462009715fc979792ad50e62ad7fcb103f6ae5bdfba5092 +size 8388848 diff --git a/arxiv_papers/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..920ac33b758360a0a50095ff07a3751564873ed6 --- /dev/null +++ b/arxiv_papers/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa1368596dec823114975b8e29361dc04a283b3827a294ddbcc59b6cd225a513 +size 25166176 diff --git a/arxiv_papers/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1d6d46a0cbb091be76528fe3d44680224137de4 --- /dev/null +++ b/arxiv_papers/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:821695b511e08007a28eeb5aaa3b7eff6aee838ca9e054f5255f662371d2a219 +size 4192 diff --git a/arxiv_papers/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d69b7e274121b66df05ec75b2aeb0ab4e4a2d8ad --- /dev/null +++ b/arxiv_papers/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa1e03deac41391e87bf6280b7004761ab9d0899be016bbe0902a235af6eaa4 +size 33554672 diff --git a/arxiv_papers/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb489a17179b1bc05ed5f27b520af014be870116 --- /dev/null +++ b/arxiv_papers/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:863e26bfeafc90e78150f561b6a8acf40f3eb19d7118bb5379c81559db57f8e0 +size 67109160 diff --git a/arxiv_papers/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25a2724fb69658fc45a756a704615628160dc847 --- /dev/null +++ b/arxiv_papers/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd1f6f0b0b7b5345d7ba8cc6d793ec937017b2497a3d98a4a7c519cc35c1153a +size 4192 diff --git a/arxiv_papers/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb86829b212ed8593d5eca32f94a8b21b02c3108 --- /dev/null +++ b/arxiv_papers/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26a00c3825186b3633e7f7d06a003a681b89bf2b81c355f35120a80ed33d437 +size 8388848 diff --git a/arxiv_papers/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4cf1ac3bd4d4000050867e0c271fb3014c09a2ff --- /dev/null +++ b/arxiv_papers/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:157182612ee0443a3caa372af097c5c01b80c83927762e6ded3b582ca4bf1e85 +size 25166176 diff --git a/arxiv_papers/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce9c3b4106f220a6074fb4c00e7b4afda7ea76ef --- /dev/null +++ b/arxiv_papers/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01c43197a5aa2648b8f617193ea635ae1bab34270deac2c904b5a66025a4f3ee +size 4192 diff --git a/arxiv_papers/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07aaa628b68e31fa4b41104ee53b0454a38a3208 --- /dev/null +++ b/arxiv_papers/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f55762482682435ca29c270f4807610b03aca418d4a8fb444b5bbf315f8be1 +size 33554672 diff --git a/arxiv_papers/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..93ae47fe865f9769d09f60156931ff3dc6809c7f --- /dev/null +++ b/arxiv_papers/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6756a2c55358de350fb1a712a55a4d7850a7a76567532cb38b5fc5cb32e392e3 +size 67109160 diff --git a/arxiv_papers/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7cfbc5fee4c72a05f70a6bc7cb2844534c6f9c58 --- /dev/null +++ b/arxiv_papers/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a41e1bdf54a7f7d8016e4756abc5676b1cbfc3a2efa3cfc6f997a9ecdccb6ab5 +size 4192 diff --git a/arxiv_papers/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b563e7498fc2143309ab549565916c06e86c35ed --- /dev/null +++ b/arxiv_papers/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:686dcf4083072273359f92e637549e458c26ef8d288b48d5e3e6d3ff10df4121 +size 8388848 diff --git a/arxiv_papers/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eef6cb012d7cf026da46315b567d009dd08e0e81 --- /dev/null +++ b/arxiv_papers/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd6ecb056a320eb7b5e6473fa82c3eea181cdead46b6c80559bce7646216186b +size 25166176 diff --git a/arxiv_papers/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce1f94d378cbec01f492acff05b11c7fdf3cb82c --- /dev/null +++ b/arxiv_papers/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bc323f62a8242a15e10dbb8d27d73abad6e372a63a13af05f0dfec59476f456 +size 4192 diff --git a/arxiv_papers/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83a21a66447dde28cf8263a82b23ebc814f9f13f --- /dev/null +++ b/arxiv_papers/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b5d78fa8596785210f87d55723faeb0ab46e7dd21deb9f8e1977611a681ad0f +size 33554672 diff --git a/arxiv_papers/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2a30332ec8952bc985bd88693a5201f45575539 --- /dev/null +++ b/arxiv_papers/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d96f01c1527e7f4d5f27b4bab6a4dd85a708aaf003527ba73ec5a23a042c507 +size 67109160 diff --git a/arxiv_papers/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cce72c9df487f2d9b8732b5718035445210fd0be --- /dev/null +++ b/arxiv_papers/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5552c575a432bf1f33e7fb34f2ea9da1a95d39b2c717422ec9699e9108d9324 +size 4192 diff --git a/arxiv_papers/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..203b057b4913d5a252bfa44b2e6a4633b912d42f --- /dev/null +++ b/arxiv_papers/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b9fe96d029b9bc7ea5a3a233708a1a9dbdaf821f20f0f3df3f0e4ad54ddd59 +size 8388848 diff --git a/arxiv_papers/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71587c115233c3f51e203a314b9b569a0c42e02f --- /dev/null +++ b/arxiv_papers/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80dc8ae21305f3477d3df700886b87ec816e8283c8fbe7b4669d145df7efe87f +size 25166176 diff --git a/arxiv_papers/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4ea7a6af0c7c7fe1f68b31b552e309d6b877dea --- /dev/null +++ b/arxiv_papers/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:967bc26b04e6fb41e6a4961866feb4f0930e0caa8f6102e58f85f47a43f7bbbc +size 4192 diff --git a/arxiv_papers/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe435fa103ec7b07cf50423d63f4b0858dc4ebc4 --- /dev/null +++ b/arxiv_papers/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a125732854937a967dd217f406a5f7fe09557ceb4e6748cd9da53cadc5dbf5f0 +size 33554672 diff --git a/arxiv_papers/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af94e0dc64ae99141c3c7050728f53c02c20c542 --- /dev/null +++ b/arxiv_papers/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b18eab150a5c2314887e35425cd3ab8027b74557e442744df59f2fe4d03613f +size 67109160 diff --git a/arxiv_papers/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9387d99164933297570411fa93f0e685350e91a3 --- /dev/null +++ b/arxiv_papers/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1e9025e8787bb281e3ef37d07e5bd68e6f7c321a8408e5363ad36b30a0ed9b8 +size 4192 diff --git a/arxiv_papers/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da134e333b066afa055f4d3ae029d1d1aba4269e --- /dev/null +++ b/arxiv_papers/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96f83a8f1f01ceac4902d70e2210220e152afd400dd6f5a1764d949b8ec93825 +size 8388848 diff --git a/arxiv_papers/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7168bb2f70291c86c311f5c85fcf987395e45b14 --- /dev/null +++ b/arxiv_papers/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b697c33c4e9fd626708a1d888b500e44a7b9965a59db7ae1da61aeb6895b4b3e +size 25166176 diff --git a/arxiv_papers/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f7b9f46ca4973407acf62cbd35372a2878e73ec --- /dev/null +++ b/arxiv_papers/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3581c48695bbf646b33c6a483b5ca8bf2f8183c2d1d28f3fd26c20c385f501ba +size 4192 diff --git a/arxiv_papers/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..515e7685b74d7f887f7f470fcb0adb989ffd09d0 --- /dev/null +++ b/arxiv_papers/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d598f3acfd5e70fff2207d1a821e67b3a8a4c53e536ad835432873af9501c0c7 +size 33554672 diff --git a/arxiv_papers/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1f0e31687b0dea92c884f7481b22bec946365f2 --- /dev/null +++ b/arxiv_papers/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da78af40a2a7cd94f4197eb24785dc2778245e0c6342b206601b4207e1ec819b +size 67109160 diff --git a/arxiv_papers/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2de67c2f9de625a72e7d88e75b1f377bf55ce14 --- /dev/null +++ b/arxiv_papers/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dabb2c7754e3c9d12b0f1ff8bbb6ad83306f53528069fed6b5f1a6de418b1fc2 +size 4192 diff --git a/arxiv_papers/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7634013a6a3384f6cb0af0e3b43c78052b4ee4dc --- /dev/null +++ b/arxiv_papers/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce63638e746aeef640c62804b60cb4e9e37c8cba442c6c836947e0fe182a2221 +size 8388848 diff --git a/arxiv_papers/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d3b4f7817e1321749b1d526bc8b25e27a82008d --- /dev/null +++ b/arxiv_papers/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65a2fab03d3f9c8bac2bc1d435d4e2a1a54642e1eeff1fa6bc37146195ba5df9 +size 25166176 diff --git a/arxiv_papers/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63d3afdb6e7efc3ba7077987d1954283b6a6252d --- /dev/null +++ b/arxiv_papers/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85658e118c400fcd0e01957e334627bb0d8e363cfa208e747969b2d1bf947060 +size 4192 diff --git a/arxiv_papers/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da7c47b5e4140eeeaa3d286a48157fd68378daaf --- /dev/null +++ b/arxiv_papers/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbcd39ad6426fb7e50be3a9b755b26628d1d0869babb227eb4f39d948add825e +size 33554672 diff --git a/arxiv_papers/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2a6b00bd26934dcc1a804e84da704dbc58282f1 --- /dev/null +++ b/arxiv_papers/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0aa1e8a6aefa85663b12f5aec7d6df9bf4916738f1ed2c596bf5fc73988bcd8 +size 67109160 diff --git a/arxiv_papers/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..faba5e4c9fe002962a37eb8a17c1f828a11f5d70 --- /dev/null +++ b/arxiv_papers/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d47fb37ef0621edccbbd3e53e7f6ac4dec1c22d2e5d8c4b32164a2441356887 +size 4192 diff --git a/arxiv_papers/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce4722299622b21aeb7e3315703126b6f671f721 --- /dev/null +++ b/arxiv_papers/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ea222a58f9ba9955e452d7fad6de741c458c303da732b61f866be44a12d056e +size 8388848 diff --git a/arxiv_papers/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6df05c6e254d4e8c2dcd3c36715b777e4da53179 --- /dev/null +++ b/arxiv_papers/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95022d7bc13499d581243dc81dcb09a027cce1268de746a266536d81dbbe298f +size 25166176 diff --git a/arxiv_papers/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf4efb4ae1c342575cae90dcb9321c25d826809f --- /dev/null +++ b/arxiv_papers/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:799fb046747075c4cc01ed70b50bbfe422c2ba5470bb28de92f225e1cb0e211a +size 4192 diff --git a/arxiv_papers/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6fe23aa124b38b37302c423b22fa51962c44d555 --- /dev/null +++ b/arxiv_papers/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:796707fe47ba144c4a6acc8aa20240794d509b198b0b86681f60e1ebe8404595 +size 33554672 diff --git a/arxiv_papers/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7cdbc63808f48c80e9317fc0ccb79d537cd689be --- /dev/null +++ b/arxiv_papers/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b11391e78bde5e44252ebf6e9f905bb52369beb07989fdb2ddcd8205426880ff +size 67109160 diff --git a/arxiv_papers/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ee6ab7f0efcda81742d9c1daefa9828fa737cb7 --- /dev/null +++ b/arxiv_papers/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb39c19e8882a1307a87a9ee6c1e617be8dfe47ddcc15d1d0143469fc200554b +size 4192 diff --git a/arxiv_papers/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d41a25a604244951650779a627513c0b46041b1 --- /dev/null +++ b/arxiv_papers/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0524457cca1ea1ddcd65148e171e35147694e05b9e2b7e6d306013a34c240de +size 8388848 diff --git a/arxiv_papers/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..443aa30337ce40f3d3bd7c389808a3c19f6f1928 --- /dev/null +++ b/arxiv_papers/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07958db93b6f8ba998049f78235fb8ceade904dbb209c5cc02ff0f8e6555f484 +size 25166176 diff --git a/arxiv_papers/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4946ee3f5804aa63130e9a14b3a856e5feb0572d --- /dev/null +++ b/arxiv_papers/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f32e480c3f2e0775600d214eae3deaca5f39f852afcd34e7a455e0bec37faaca +size 4192 diff --git a/arxiv_papers/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7295a91b7dad9c33cc499b9b2307a76856d8d83 --- /dev/null +++ b/arxiv_papers/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0682ae2f2ee3445a3de945563717dcf6080c6e2ad434156af5a339c079549756 +size 33554672 diff --git a/arxiv_papers/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8aaa8953681341fb308aba320d9fb5c7e35fddb2 --- /dev/null +++ b/arxiv_papers/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be5168076c0939e9b99b0def5a6f977b014879629aca116459f3553d842ab5d +size 67109160 diff --git a/arxiv_papers/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a46fae1e14bd3cecb7442ea4727d61b77fb7c93 --- /dev/null +++ b/arxiv_papers/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e30d3d3a32015321f4c09ab6d3cb1295d04cd82b63270634766d37a3c2fed1 +size 4192 diff --git a/arxiv_papers/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..810daf064a93ac98b16d3b563d12febab7cc80b5 --- /dev/null +++ b/arxiv_papers/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14619f259cdcfc6fbc5d3bcf605c09a3a434ff47e3f5ae83a4829fb8a7559020 +size 8388848 diff --git a/arxiv_papers/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ef002eafc0bfbe820c7a4db2eea3eb1f5e1a9da --- /dev/null +++ b/arxiv_papers/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d7adaf61e8e40c49d303df3faf7798f6cbab001dec86bf8031a9b7f065d4923 +size 25166176 diff --git a/arxiv_papers/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb2de929ec1bb2464ff5c1b72f1369022bc304e7 --- /dev/null +++ b/arxiv_papers/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf24731080a09779ad2cb7dcb35e717f56c07f6acd95960b061713bc83282d8 +size 4192 diff --git a/arxiv_papers/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e93c7727240fafa3ba559adb3244f4a93bfc63d8 --- /dev/null +++ b/arxiv_papers/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b99671a1fb2c9a97d0f1d23245e89093a2aeebbe8b4d515709288add65408811 +size 33554672 diff --git a/arxiv_papers/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5fbccbc97d3b6fd474f3cc7fff3a775c78adb014 --- /dev/null +++ b/arxiv_papers/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10939078bcaef5ba1a56e7e76bab00c1f9cfbc4a12d09e37ce162285c768963d +size 67109160 diff --git a/arxiv_papers/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..694778766e00dda69301883c9ecf655c9f651290 --- /dev/null +++ b/arxiv_papers/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbcdd748272fa1f122140749810e8665bb27f6f36930a821161d410649150924 +size 4192 diff --git a/arxiv_papers/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2f0acc8a70d16c1c5ff20ecfc5a1d702ff4d806 --- /dev/null +++ b/arxiv_papers/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:848d2f14a7c9f07aaa0a95cc61d868b6f986d4e63e26bc4fed27b1655a465384 +size 8388848 diff --git a/arxiv_papers/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4dfd3366fb6c00a4cd3a784676e28db57827c64a --- /dev/null +++ b/arxiv_papers/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e63e44cabc22553642101dbc61c315e6fd574e19058a3bffa9bdf8519c789cd +size 25166176 diff --git a/arxiv_papers/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9efe12256f5d3b3eddf94367f55fe666e9a67b37 --- /dev/null +++ b/arxiv_papers/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:469fdc6db450a2805976aa807907b0a3fc6b30338086264c5df17d8143bcea23 +size 4192 diff --git a/arxiv_papers/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5eaec25f769cf77e7079c9e0d58566e01b7dfa77 --- /dev/null +++ b/arxiv_papers/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce041c8a063c950028be00a055b8606afc48c8c7da98028f9c7a2a1931b9bc61 +size 33554672 diff --git a/arxiv_papers/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e50590e213f616e508e9d3fb7eebc86f93c74b4b --- /dev/null +++ b/arxiv_papers/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45e762fe11afb4aadf25f34f9b2da26d69bceca88090aef1f9a4c0005b3fe246 +size 67109160 diff --git a/arxiv_papers/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1824b51c9df890f1fe4d51c3fa48111d746e2d39 --- /dev/null +++ b/arxiv_papers/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19e2f3317590e7f96891f962c0af9ee10e1df19f06772f6d3b35603b0df4ec5 +size 4192 diff --git a/arxiv_papers/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3db68ac1e05dd20fcf0092b24bf3d00ef92e21e6 --- /dev/null +++ b/arxiv_papers/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e9345f022155b750408191a15818a0c0f9c7c117080319c8737f2db5f5fec6c +size 8388848 diff --git a/arxiv_papers/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0a5f0d5fef39fb81e33e29cca0306a9cba680a8 --- /dev/null +++ b/arxiv_papers/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49869d2eef77a4e0f68961a4d47ba5e1eeed3bbf4df31d63e0340b5006d20bdf +size 25166176 diff --git a/arxiv_papers/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1932d0fc5aba6d9c2ca63a794036ec970ce1309 --- /dev/null +++ b/arxiv_papers/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d14c38274ef525b3bd3f06ab8e39494cc71e1be1c20a5830eb92bedf4148568a +size 4192 diff --git a/arxiv_papers/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1271f63eecaba0ced695dd0a5844195e52801e8a --- /dev/null +++ b/arxiv_papers/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bd17125a327f732d2f4189003013b4a054ebc56914fde95c8167cba6c7d0aaf +size 33554672 diff --git a/arxiv_papers/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..72b89f5ee76db50068dac3afe4a7d29e23235bcd --- /dev/null +++ b/arxiv_papers/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c4a6cf6521924de04308b694bb06d096354e672f8bd75128733ad7547c1e2f +size 67109160 diff --git a/arxiv_papers/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e13555609d68b83bf2172a5ad97150dbdf77776 --- /dev/null +++ b/arxiv_papers/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e47da4c244cf2c99e36ea56f8f069c44868173965694426f37ca6cd79c721c82 +size 4192 diff --git a/arxiv_papers/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..719f917e503de2bc8cf28c42b6c2c6b17762094e --- /dev/null +++ b/arxiv_papers/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16c7b291842c3bf6476f3e2ec4a724fbb6192cb4841381d7b5859fb4592e7edb +size 8388848 diff --git a/arxiv_papers/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85574ea9de237765b015c06c4a68aeec7fbb68ac --- /dev/null +++ b/arxiv_papers/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c206a34772c018ac1ab11bdbeb47e5ce8a24e0a186eb853dc3c20aa8d718cd07 +size 25166176 diff --git a/arxiv_papers/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbf4e89d9ec0f4ec59515abf91d1ae9d6d178751 --- /dev/null +++ b/arxiv_papers/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55aeae004b90c47bde6ef9c3e88c259c7657b29ee6d579a5438eeac33e2c262 +size 4192 diff --git a/arxiv_papers/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b40351236e56be24c7a55913d37fb437590b05b --- /dev/null +++ b/arxiv_papers/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:094334764935e083c03902b65150ad323227b6847cbdb5d080cb087e30479388 +size 33554672 diff --git a/arxiv_papers/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17970aeffa3638f6ec462d4599b98d4d9cb79a0b --- /dev/null +++ b/arxiv_papers/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5acd156dde2ccb719d3a638f69bcd5d7bcd36eb8df7e97488e7e06db15cce20c +size 67109160 diff --git a/arxiv_papers/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8e3789ffd905f6f6b3e85ff82c8a1ffcdd8a253 --- /dev/null +++ b/arxiv_papers/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea45e0d864c6d912d3c07cc7f0710b7cc31ec9fab42b2f3451dcc98a876d2e2 +size 4192 diff --git a/arxiv_papers/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c63a0dc6794c7ec7d723cb0b776db9c8ea5fe31 --- /dev/null +++ b/arxiv_papers/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1174339c3c2e972154d68d877557b482544bca7d5a0081ed7862e5ff70784c2 +size 8388848 diff --git a/arxiv_papers/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1061f4144f65b5acaeaed78a13e823db2281b776 --- /dev/null +++ b/arxiv_papers/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f076f006011d905ead1f5c4dcbfe99a4a28163065c07d7c9d2450a75771d7885 +size 25166176 diff --git a/arxiv_papers/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bef4fdbbae109c2bb794a0ed43d8a5ea1740a39 --- /dev/null +++ b/arxiv_papers/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87bb9e6c15fcda0c5b40e3404b0df3bd6308cc2428a2762de2d915d33d2b469b +size 4192 diff --git a/arxiv_papers/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac6621042337671ed02f1f996d567fb8f42b69d3 --- /dev/null +++ b/arxiv_papers/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0af8ab3d84df19d67ed7b2e241959495ebd00cd75ece362687094ef8a314a132 +size 33554672 diff --git a/arxiv_papers/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cc6a9d2b17b893016ab6064e71221c6e4509138 --- /dev/null +++ b/arxiv_papers/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7769f08d374bc94d3c797882c32b354387eece8816bf98be12244c2b146405e0 +size 67109160 diff --git a/arxiv_papers/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..726dfbc1de6d0036d9b0a23a324fc03a71934563 --- /dev/null +++ b/arxiv_papers/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7931255d08f864418e522fd77427e7a4e0754b9ae699d1952b7c7a70bd41e1bc +size 4192 diff --git a/arxiv_papers/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9636ce579eeaf1741f76ce87db38209f08d90222 --- /dev/null +++ b/arxiv_papers/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16dce3bcb6f15a509a300f384f04fc5be287b1cab60ef6967dbd8702772bf5de +size 8388848 diff --git a/arxiv_papers/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..211981d75fe557f76da1f5c2c042042d6b9f6edf --- /dev/null +++ b/arxiv_papers/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9265e76a43075d671da983c7aeceadea19647e52c218bfaf2fae97dbbff70e6 +size 25166176 diff --git a/arxiv_papers/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..541c8e3b96377fba3597611a7e18872b097d1c90 --- /dev/null +++ b/arxiv_papers/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9bdd9a273badee9447129aeb1f49032e3e4fa3f54ae81ab8d1644b0151218a0 +size 4192 diff --git a/arxiv_papers/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..117bf91db14692aa1562252d83acffe42cfa4eae --- /dev/null +++ b/arxiv_papers/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:321ab8a791950c58e7a2fa4fed000d6f05c050b188f82dd9a0c9e68287bcca3d +size 33554672 diff --git a/arxiv_papers/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f0f3b2d1de1cf5cf56af93ef8298d2f364164a1 --- /dev/null +++ b/arxiv_papers/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5986dfbefea465f410ed080eec3829bd49809b7bebf9f2a1fe9ed94bb110b75a +size 67109160 diff --git a/arxiv_papers/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a6a5fb49674a2088e413873803df91057800b7b --- /dev/null +++ b/arxiv_papers/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e0d15a6d53faad0e6d16d2c6c37509bca9cc09c613620ac87dc29567f22d686 +size 4192 diff --git a/arxiv_papers/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cb283a68d3b425537c83230664b27c3f2ec9685 --- /dev/null +++ b/arxiv_papers/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8d46aa959fa385a6d37af30f10f34d70674a3c0b3c6c94237fe067320ed2281 +size 8388848 diff --git a/arxiv_papers/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21aea7c58de46449e680a873e32f75ec257f64e1 --- /dev/null +++ b/arxiv_papers/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:253291bd4d06a33d27c86e3c18d0f9a56198f8be761fa3a1c3b1ae742f173ff8 +size 25166176 diff --git a/arxiv_papers/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e7647f85415efa6492826568de6d352eb82f71b --- /dev/null +++ b/arxiv_papers/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ebdb808e6df34ac2f1d5eaba6df049b53f6f52b412974dc7de9a2dad9b9ad5 +size 4192 diff --git a/arxiv_papers/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94df65b85ed7f8843b18171845ff90134d6bc370 --- /dev/null +++ b/arxiv_papers/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3431535582c4b54304d933d6e26f9180758726b72c6658fe6459d23bdcb027f9 +size 33554672 diff --git a/arxiv_papers/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d479fb4107eb01530235a1a93d31003462ddc3c --- /dev/null +++ b/arxiv_papers/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f79ceb78589baf1f455001de0f4414ab247fbb012ca8e82c9fcf8678240f6327 +size 67109160 diff --git a/arxiv_papers/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1308997a180a82d3a7560ffb4a89071254766976 --- /dev/null +++ b/arxiv_papers/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fcc2ce5646a1f670a2c23245bf190609bf144da350d065762ce2c4ba19a01f2 +size 4192 diff --git a/arxiv_papers/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..500ece54ad459c7c7f379be90af5b2de03bbc358 --- /dev/null +++ b/arxiv_papers/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6784d916a71b1047c5911b6483c1ded137f23af4ded7fef78a33fca9bb4206f +size 8388848 diff --git a/arxiv_papers/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..597648623f51f19ed2d40db7fec41f09b939b277 --- /dev/null +++ b/arxiv_papers/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c0468759cb80883341d683e4b107bd922546d1692ee0ec654656f36126b4ff +size 25166176 diff --git a/arxiv_papers/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0abe43e1c4389c9450b8b66d977b6cf17160cdb1 --- /dev/null +++ b/arxiv_papers/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4628c48cd37d39419b609809f5450b2c45306ec1dd6efb836c21f45a5712687a +size 4192 diff --git a/arxiv_papers/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0d6bbeec3879685eec7b1676cb9c3e0e2cc926c --- /dev/null +++ b/arxiv_papers/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34e878f30e69ad51a1f0425d2824c0d970752245f0697c669e6557d14fa8eb4a +size 33554672 diff --git a/arxiv_papers/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..482a3c81c42e71f88c7bc1ff713170ef7bb7f94c --- /dev/null +++ b/arxiv_papers/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc15325e3a365ad3ae472f6be975179a12ebdc11d2d780772d2a1cfca15d2f6d +size 67109160 diff --git a/arxiv_papers/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae713f4f469d60fa1ff5db3b1ec67df591b7cc11 --- /dev/null +++ b/arxiv_papers/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f13ffdc6c4268767d9b6b3d2024f45f969188bda9d6d8c648d6e8cd6433d4562 +size 4192 diff --git a/arxiv_papers/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2bb9c6ece223ee08823531915bf83c3666583769 --- /dev/null +++ b/arxiv_papers/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeae76a9d2cc4bc50a4b649c447292244087d17b9737c9ee58c28c01b233f1c9 +size 8388848 diff --git a/arxiv_papers/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..983d31d464ed01d663cc32e0c46f31c6ab5fb440 --- /dev/null +++ b/arxiv_papers/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b86884324fc3c2d0d995443dbd084ce1c7ea6fa2f3a658ecfa13416825315ae2 +size 25166176 diff --git a/arxiv_papers/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d730c53cadd8a0ce2abd14817c993eb231f4b823 --- /dev/null +++ b/arxiv_papers/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f522c0d75dfa684cee9de8ce9a8f3b1afff4e4f620575e2cf3413b3481e57cda +size 4192 diff --git a/arxiv_papers/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fab52a760eeba4a789bcc1aaa1e143563851dc12 --- /dev/null +++ b/arxiv_papers/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b9e7044db9da5bfd8274941371a451ed9febbbf63efcc41019dc8e9ba965b3 +size 33554672 diff --git a/arxiv_papers/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..961c1dd613ade128a4cce4c38cf1d5ca8a20726c --- /dev/null +++ b/arxiv_papers/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420fbc15dfc7724807c92c4e70fe8d850fd5535988e117dc58a57dc2fc0ff4d3 +size 67109160 diff --git a/arxiv_papers/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b280481e7d0c9789b70ce184aa4c62ba309163fb --- /dev/null +++ b/arxiv_papers/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ebc069cf0270b4df58cda91640be6620024bd920d2a59c58aa0d30df2db34e +size 4192 diff --git a/arxiv_papers/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fce2740fe2ebd26afddc8067494f1c0ccf49fbb6 --- /dev/null +++ b/arxiv_papers/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18cc8978ed371b3c109c76d54bd4ff7d457ad705b1b49831cf7b5df217f6ad09 +size 8388848 diff --git a/arxiv_papers/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80396d975de46cc5d15451aee8a85ecb84c29435 --- /dev/null +++ b/arxiv_papers/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b57ae7c3e17c46d7c392cf51f747f3b9f0e6fa9932dd901317defbfad55f49 +size 25166176 diff --git a/arxiv_papers/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad20a05866227c286e145cc53231eda06253b6e0 --- /dev/null +++ b/arxiv_papers/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f37a1fa707b2f4a09aa0508119c7ea7eae079df2a29cc98c17a65a632bf7ec57 +size 4192 diff --git a/arxiv_papers/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a488894c15bc606869a63065da979954caff155e --- /dev/null +++ b/arxiv_papers/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ecfd5c52861ee14871c5a7121f04dbb3d03ee9289bf81efd97dded9157b97d3 +size 33554672 diff --git a/arxiv_papers/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6346db615322d78aadd02dd57b36e0d2b053161a --- /dev/null +++ b/arxiv_papers/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b95d7124ea245232842ba5669eb68015e400b0d861e7930779266dbc7c68c2 +size 67109160 diff --git a/arxiv_papers/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9d72ddb7943775269e68badf69aaeb95aeeab2d --- /dev/null +++ b/arxiv_papers/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5a6515831b8e3a54ad43ee846cbee6232e8c3a72fcfc2468ec210c85e250b12 +size 4192 diff --git a/arxiv_papers/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bd8131e31b93e42954f9e065b09924d72ce8ceb --- /dev/null +++ b/arxiv_papers/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e10d80a9de00733a70668a15dc877740c11df51b69ed062b1f9e7a55cfb13379 +size 8388848 diff --git a/arxiv_papers/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6811e75479a5b8d181508c7759852b8ab3b88f1a --- /dev/null +++ b/arxiv_papers/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9030712188cfaf3179074ce22d2661f579f76e83fbe27d8e504b1f5eca5f0d92 +size 25166176 diff --git a/arxiv_papers/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e946514e99354e6f52bb6abfacc02a7810936c05 --- /dev/null +++ b/arxiv_papers/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae10d6a6547eef45711b7a418197d1158aa81a0f59a7cb13fbe2afa9b6de1f9e +size 4192 diff --git a/arxiv_papers/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5af1268b23bb271b89ba1652db6eda701f98a217 --- /dev/null +++ b/arxiv_papers/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96db0dcda82000e8589be71db185ee5aac5d52811eca66fc66b5ea891c7d7425 +size 33554672 diff --git a/arxiv_papers/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd6cbbf05210bcc1a989327f2f9ff6dcbd6a8290 --- /dev/null +++ b/arxiv_papers/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68aed84681ae01e756a2923bf1bc3fa4b226798f3479fcddddf659fd66a3f993 +size 67109160 diff --git a/arxiv_papers/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/arxiv_papers/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24d321f5eed7c98c3ca3a3599eda422a44ae904a --- /dev/null +++ b/arxiv_papers/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21cb91da3eece0441556162566831edd1dd8db9edae74bdd75aa1f4b696db9c2 +size 4192 diff --git a/arxiv_papers/model/final_layer_norm/pp_block/model_weight.safetensors b/arxiv_papers/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de5e1d2b1eb1b290b07f9c141c7993ea2aafb6f8 --- /dev/null +++ b/arxiv_papers/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7221d566c6500f5880930f998ca88d53e15bd4130b1b0f9d7a11addb94e2c841 +size 4192 diff --git a/arxiv_papers/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/arxiv_papers/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5192376ffd8379545544f64fef36aca6438a1393 --- /dev/null +++ b/arxiv_papers/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:245601b406edeb83fdb4f9a3f31c6823fe854b0822a300f4b1c8d0a8efe1ccdf +size 205914352 diff --git a/arxiv_papers/model_config.json b/arxiv_papers/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/arxiv_papers/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/biodiversity_heritage_library/checkpoint_metadata.json b/biodiversity_heritage_library/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/biodiversity_heritage_library/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/biodiversity_heritage_library/config.yaml b/biodiversity_heritage_library/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d33bcf3090e72cba52467c362758fd3d5f6f310 --- /dev/null +++ b/biodiversity_heritage_library/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredbiodiversity_heritage_library-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredbiodiversity_heritage_library-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredbiodiversity_heritage_library-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredbiodiversity_heritage_library-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredbiodiversity_heritage_library-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredbiodiversity_heritage_library-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/biodiversity_heritage_library/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6e766356eb045dce3c6201000a7af75fc288f2f --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc4c6067e5d4777800f65a598177759e8bd2842aa4ac41ad1c7393dfdf26b600 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c970728a364ef6c5f16b6a2267b403fc164d498 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebd349ed91c5b4951ea8459d93b069d4dc52e1bf10f11418026d83969b7b038b +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98013848d85a4efbc739f3ccd4bd5b9b0bca0be1 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3c2dbd6675fdfc60729394ffbefba8d4e0d723755034361a6095371d41d92b5 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f5b336b346e9eec4691c4d8c01fe5ca7d63334c --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ead3b4670794d22f02dc655fc0181ad95894d882e328c4b98924d9535b01d98 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f68f7ae36daefc5ee428e5eb6ab0a43984d1482e --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd872346cd0de8cb545658568663bc757df5defb93e7f15ef007dca6580b7779 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38846b2184a4ed3934e99c6743e8515b701b4aab --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc49bda7e6cbf49ece7e0abfc52c8ce960cf059b954f18c2d729f78e7c73e617 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8684c4017f1a672c33958a1e0bf228a22e95e90e --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1484c59f83817b9e6e6702218f3e5b746f35983b99745ab2737fc7154c4af061 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..040d78212653b6465f063e7b764d80468874359a --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b04a47348bc2b295178229811e804b183cb700f886f626b7f71fd4507f57f48 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9dcae7b173587e09a0e4df28d299cb4761f6c037 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e856633e03b6b1b578832673db1ad2f54064fac63e776e4e2dd28c4c1deba7b1 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85760adf3a7ce856af1133c68dd1d25d6a352816 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d4afbf2fa902c012e6ebcc92b0182a6c8ae768f9f06b6ec211656c553e239c6 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8daf0a7f8e47e18d08bf4b0243aa74e75a38685a --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7196b0c1c9951b6a2dee75b97f453b7447c2218555872fe0fd84d91e6d77b7e5 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94940973cda18e80c25e501dac4cc41eb745945b --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f914ad3ddde263ee96a062186cdececddb0df01c056073a65fc345a26b5d81ae +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e766091e119e4e7bfa77db8e6874621df5d905a --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:410ed511077e32a6174123cce14233a6ce2b8f30a4c6f855e68bccbef24a5e0c +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ce3156674955be46dbf72e1832950927959f56b --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f90ecad9b2f56d99a16f1b7c02542f4c625062f27b03befae5e97ef456c341f +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb69a9be711a3bec0624d4ecec230e30797d3d33 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f4e408e819b7509859e8735ee866609562a66447d22ff52691e63cfde75a03a +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7edf40592f848b78e0a15490399d3c990a5e3f4 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ff89bb2b05dbc6f0819c77ee2d3cded6ed9e2ddcf0700858666971cf71916b1 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb099d2f00d5419e559cb8ac5e697ab40b464090 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00e2133b636b7478be8c4a4e313fcad765ddc9117f74c2fcdffe924ff315a743 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ae4443291374dea9e758da2f79ea30b9fc4e412 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a1649f40cecab8c22d7530eb8549c5d5ac51abf278c54dd7074c88199e1299a +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..367ca627c61a2ec46da9930489ebe0acfecf8def --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38502f66817b3f1833c59e7f8b51f63d92a533a533737223d73e62ea34ee1d32 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df1ab83026ca7eaca67184b5abfd3d46df89f8f3 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e43a11c465483a64327187e520716d1b33a4d1532a44920db3cee00e0f107ea3 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..978b77e85c890141bb3e9604894a60373ec5214d --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdd5d7006dca11fadb294c19850c17b10e8cdb04e386b61aebc94a339dc72778 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ceabba884d2ab9e7748407a0a53593e31f75f6f --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c4d9d1a0871d2a8ca3a7298e574562e8ffc53d9d83da9c8aa651a6e0db4141 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..397be4a4b24f1bed81ff31c0b10549f6c8b8dcf7 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f8458843bf0bebde502dbf3e5404aff800560fb75a6ed385e83362613e911f +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e77f494f0402964a1d77e907a88ee19dd3e08350 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6185583bc4414097dbb4a21506b92612e08b3eae6db6e224e8e5288f4bc66c04 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..986210a4f469041bec39240a7c2c13c97431acf1 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eab7b5ef293d06821eb9a02b8c95b5f8107b02076206e85dc34cb29ede3cc29f +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33a091c5514da0ac84074b45e6c4bf15ba8fc67b --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b3afd9c24b834948d714c4287a906f613dced7450b358d50929ff8349760a2 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ae0df72e3ab0f6e51fbeb6499d370cb9d591062 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0467ad38b2f37f3b30e6094f1b285bd75d8fb735cee246f31aa32ddcf4cc5d84 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..509e67be5dfb87e16e07ef0ec8fd099ce606c7ce --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b57e8569ebfcb97c5e3754723b929fa5b8841fd13c564000161e33d0f2136b37 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7e0512a85a70adb5d4d7b1b60f8fc3d000cdaae --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8e1e9e40351888275d9a0db927c37ed068d4d46abb212f7e79dcb61ed3b2eae +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98d651065c2fec1131df2e63c6a5dbac0262c9ad --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42e4b210144b7cd78b5c3fa254c01edee9fe6c2be1d04153121731e99c392d77 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6cf669d316014a3df0323615ccdac0f46fcc52e --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ec30f3d6979a18e29730802329c219104e49e94f11e98112188dec9aab1876 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1afcc81b114b181918c9685b50b1b5e033b41703 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33d6ecb3894f986a4c8da53c3ac2ff0a4af5f9c98011569129294325d496c579 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3fa7d1d15b63c08e8473e1d0d181c45b4d20d02b --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42867240b93737eeee2d55df74c9231d378e0ad03303776b82e7505bb9c55e3 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cbfbaefb6dab8326c397cea795ad54e96f71ff1a --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6020b41263dbf06ac9d9431942854546bdf0dcd83e93bcda1aa0caf26b01f7b +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ef27ad436752b8a53757b309468f4f1d9acd817 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:378e70bc979b7cd28fc6639cd58f989e05f853df9195d3342f7b347ae5888166 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b15d39614f0c20aadfa1f2c604d23abc8beb6dcd --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9edbdc890a8aa5b7075db25325906702a522255e038312c5910b92e3205ec03 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c58ab043fb7d2f54b084c46f4ce301c994627eb --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:867b09ddd539d86a3b66e3bff1c8a67d1ebaa5c5d6bce6c9d00a20e6b085a502 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce306079ec164bec8508df1d7f1b27a1e42df0ff --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f957ac050cef4a455cd04395b7199d63cf7c320b50103167f2b8ffa99d5f7ca9 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77c7700a0b35de743974e52c1f9f050038c1258d --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:540747c35ad3c754df5e6cde217fbd7fc9752ca3355042d4105e3d5d1bd69797 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47582d12f8e079403adc67bc2f6db77befd1d429 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67153089569ea1ce4587be573aed568adb15c52f3f3abce28c5985ce5c03e1a0 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c198f678e95b97351e4cdfb0b3885c6c50406da8 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9779a34e13287e0f8cfebacc9d20612a8cba431a6f700660c7cde00e6c293f45 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49d298bf3b86a05e6a83b56d592553a5591c7d57 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1582ee660c7c969692a247d1c56e1fba82e83a25ab4e3fb89d99fa627409c01 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d575b4aeb731093293a031332592fd4560935900 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e1f50941048ba537bd5bdab8373767a5feebcb12b204f0b9458a004248b7336 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d8e8792b0f748ebf8d011e7d3e5fdc3caf5af4f --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:016d37084ca46d5adacae53b610b534602e035ca395fef29c945329dfbf0b857 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41744bb51de3004ac44a392382cc20c47c546608 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b2068c1caa06bb2a1e29330b5eea087199a439594a2e736c42046e60e4b87cb +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e1af61342494ede805b70479fd3695c3104145d --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd35ff85cf0025c0e7035f3732a22d4de206f5627a37b2ee972c0aad173325ca +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e46759ac3f3dfcfa6339bb1daf162557483fd43 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49131357a5ac6496fc53b4819e369d5d83adf6bc715e325059ed0cf0b712cdc2 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f7c65f5f2b6008aa74503eb1484dbd97f8d6c0c --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16410623141a6cef3f4c8c9f94447201072eb0b679862d6974aed5a423d6290d +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af602568b0598d805ce7ba950cc50146f7118bb9 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14698bc64df856e6ff0b9e073a0a544a8ed766336a79768933a126067d431435 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ab967b34ddc5d9103dc471d3bd13711779c9426 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdcdca542e8f9a210d5b8e8bf85b42e04899cba49329e31af810b72d77fc15f8 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26bad9ab7a7d17d404c25e79a67c551aff820897 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6c3064a1183fb7dac5e53fd1f6f677807553d4be47593cea7d71ea3e8baf520 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..032d7c237cad6a24b6cb76c61071e5bd484bacf4 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d652101a0a77f235abe5cb7f7540877f2ae8e8fb8e1209fca5193c915d31e866 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..decbaf92f14d23bb7b7fe9f8f7b3d6669eb55d97 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5340bab698e5a3f1b90a68c6d34774d1f2edf83d0e7b8e8dda5dd566d9ac31ef +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a8607eaba8b1831d65f81897ecef2cfd7a1b160 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:248f0bd6120288a8199647eb1a8a9335112b7cc39bdcac61282886f80249538d +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa88db7f07f3a96dc1372fdb7ee8873864a83c61 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d181f6d6d09a79226530bb78d7fbc69da1c1ab77fbb94eda749dbc8736d72ab +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6905935b5275ededb907dc8237a0242b16e7bda --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f52f849b7b7a78d217a96eef83525c26a923e96dcb3a1e89884f01d9017931 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45c91150b262ae34906c97728a8177acda81c26e --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a7f19e0a2471d4078bfd1eeb2e85a4a730108aacb04913df5f9fbabf0033f0a +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2da0802979774f83bb367a386471091c4b25abd --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d0496a24c52bcf3b3a7ee6fc12904a7a0458de2f7d0849281a3dfc6d09492aa +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8117352357535ac25165e060385a99e955d9848 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdaa3d79f370e80f06d7c8f42a338e8593b8acf061963a5e8142b8e56038a48d +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5653b98a80b15a02f847265bca641f762925a819 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0068d359ec6d5082c6d3461f810f6f4814a47e16020f6a757b860563d11c8b1e +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88a504262758a182bafee34ab220c5158548495f --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5206e1fb06a40509383cd526e58cacbacedf41cd3f126e7c819069e1a068a89d +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c46237e3b45448d7662461c5cf185056270bf1e --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d8512a16bca75871f9fb4e27c1c981e9919a5fdb45a12f845c4701a23fdb6da +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea9e407726604558973b8d77173279cf20040ef4 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8df34808924eead92d8d5c4d9f814dceed43bcf89e09ce829a51d64d7ca3ee24 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4579c0587dce20a015ea82a86779735f23f4830 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9febb6f4a42f8603248380aea026f9aee01114c0af95982bd046127e64780100 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..908471b8484dd4f7dad0ff7239e25ba8f7636f98 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:714a038fe1ef415913c9e15c9b923a09aff2ad600884b3f00c52f28c1b8ca6c4 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df205b626b08beafe8a8953d8b284514afd45302 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54378e60f5138ff4b6f86c5f66cda40eff9f48db79f253d4042c500ff9fb0dd0 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..056881c3693dbded6176564f45391a89e48bc735 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f509930de5ddd34c1e99fe069827697c9d8a303082ef945f66e40417d1a53e7e +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c560143adba75ac4a96c1db0f63d7796785484f7 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bf86dc79e80740dc241a7048c265ecd51ae4085f9c38dfe7f73d3847f617d8c +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17e9678a174e0132b0495d3527c77dea32f58be3 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e560a15a08351d36fad5570d77e02fed45bf24d6b8232b36a23775be9c39a10 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81b476eca9da4ea204b6306e7c16c2f9fbf6e36d --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36d699d43c677a943b7a1475f9744303bb4431c4ad0b5193966e7b91e0260438 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf0463a90325b0f1044028910ba3a8163c1dc83c --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71e4d77fbb9fbded9c842919103b796c6f4b12ad390c9fb6a880ad76eef30d4a +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c43ffb09710f4f53bb0742cbacec552309963844 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a33caaf8857b4c65495caf5fcae0b8fbbf06340b749eff0dac644424f9bc1a0d +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27edb4bec6a433b02845d2cd57163f38dff9392c --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa68fe93d31c9d12a79f8c7b0ca831d2a3d3544ea2b57d6e6f2786ff086ceb8c +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c9cfc8f941555b470aa2e0d10b7d5f85c26d7ca --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1555665ff80f14bac0593c277287cf5e176ea5cfc05b0ebbfb21dfb7efb72c36 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a9eb0cf92e5dfe168704a9463037ac9f8c67202 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b819abb47c6b0d4774e5f43ec924411e26e6c355b34ef159713156d3033e96bf +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ad7ccc935cd241fd41019985e4cd52d86e5e012 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8fe5b3f0ab3207a03245a46d62f89a9ede791a41524e501d44b117f9c1319d9 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4abd557b08e6d9e8822a88551e34ab27ac75b759 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c3b3d59b515c3066fbe6839116b655c3b6c858771cc569c6c81275dddc2584f +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03d48dddfa26dbc61c388632e31ed0d4678b3c9b --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:298af8a77f0d13c4fee27d5b55d9524e79b6b1a64fd166f97577a668bee36287 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b0ee5dc6a8eb09936bc652d2a67d9c87bb25b6a --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26f7bfab34fe9a9fa57afa938d2a619c18a3d4ecc673af08aad1815d7da020c8 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e2cbf0e8ddce904ddc7803590f6fdce557d1a62 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a7957ef0d091be3e01dd6c4c8db6afa1f9d467d98926462f7b04ab759960fc8 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be8a011b49737017cfbf27fbf668925d86d21ba7 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a538bdb670eccf408727445603cd95b26bafbf266838e7dca7dbb568d8dd346b +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..033cfb4b099751261b26d74406237bf3b2f90988 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f99e3b7c130c6a11c2c45e80baca53b4a2708c539cfaa582bcddbac15748c668 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70583ad6058f602ef8bbde0aca67f1d3f4e81bf6 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32143cfefa138ef942664d0dfcac04deba2c14af50bcf0f757d4f197f2f38267 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24c92d41d0ee92e4dee3cd72b5a1917fd56a1171 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88767a684c030a61e9d44db1b73d1eac7d695a736b1e906552ad525c57191798 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dca1b8867a1648119fbf83bdf6f37e39b0525204 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c56e5dd33cd4aef0cd6fe8230b2e36d390f9bb23ceebbe8e65ca66325e282e2 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cef114b4cdd2cadcd20a08ba75215c7a5bb9ac0e --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:103c61e1a761e4f8ca7a550cd95f22e32235d2c1c4db32ec679ee47e066d9987 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e96cf9f2e2035e778c2be6f2963025c621dbff90 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:813a93e6d5874115f1681351e32418796a14626b6487bea3caec954760f0ca33 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59fe19717b077e39b97cc2bfa4da4462730ddccc --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9d014f7592047157119128f29009c56d7f0f3629bc5c7a56e01e47a0031c72e +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2ecd9dfd8c0f00db25fd807c2eea0ac7dea2aed --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a86c79af7c1a77768c2187f3739d330b3c0177f7defe37b7ce7cb4f8393d9d74 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58f83f478a8c8bc9a5ab73e4daa273196652f788 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24cb03b26d421487ab780126a33bb773a8322ebcb802fb40852037a61539b695 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f1cd4e8e1ae70e2b1336757923c8e111ee235962 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e3fcb4c192267a8d1dcee95c48e37b70b088ada960307c21d4375e14c0bd4bf +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a86766ee401136517559e58a39bf13d73de192b5 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eb6111690ee634b46698ef0baffd35afc62fa4a139db0daf494a4554b69255f +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48ee9cc71606866ee9221436ca25c7ef924fa7f8 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94273decb14c7ceb7e3abe878881228efd94f9e2acd2ddacdf98379755d7fddb +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..271fc3c7bfffa4efbda4ffc5ce2d384c6fb5770d --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e225e22d47fab7a1576c80f02e9c44c39b2c7618b49ef797300b7550f8939c4f +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7cfb2e900a35bdc2df1d01f725d9617eeceeb76 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350d853103b9da050eb72140f0ddda43f7376e626f92a30883a6453301ba5752 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82fb7105eaadc472aefa34017a97b898f0235e9f --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc0ac87e3aa7c44675bcb5368012221c9fdb7cabcf0bc2d02251a1a8be9ad30 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84febafdb40c41e85943c8e7f8f2662bbbf6a1bd --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca3d6935f97342442897da010dfb22deccb0d93073e78213ea2217331f4db8bf +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20b5af32246881cc47a659d21502bed42d3a00fa --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6272876c9fc7d4849c210b621c41375447a883af78d86ff3dd776530de938029 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1144e5c194b45c295d2c3e5ab78fabafae53c122 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:036929d4a1fe8a7a1ea9ab4d6e2ed00b0aa2f2429b15b272073fc19e4a16e5c9 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b74c6b028b1354e4d513861bb821f2a3287e01b1 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d28ad083a746d48e240fed23fcb733b2ef98c57dcb37391375855fb63450d0e +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6153185f175b9c2d391681086e833aee5b708318 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2ab28f0f3cfbf7cada0a86a820a398f48d145ee46c034e08690adfd499e1cd7 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29a7ea87db5da807732ce6356146d16ef2b0281b --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55636dfc9e1b90e6fb9fc2310b8f6e2cf85dffeda3308d7b0afba1b8a2d0252f +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d4fcf3c7c9f8cab01d72671d5ec32e95604c79a --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:090db3d50ced6e006e4e0849a6836d6afc0fdabb6679c70b3313de8cffc1a498 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7fb650f263d4386650bfe3c4e1988a501b01c6e --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:406e9bbb9f504257c5c84621165796e8c72bcddf7c134f3f42d2ed4ffeca0466 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3023310ecf091fb72b0a6cab41cd74f4ee69426b --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34107c296383cc7010b29126ee652c6af096a90a5ba8e492613ff759a9b5bf57 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..216f05104b28886bca0fa45a4654c208a821ee78 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a777fa2e4d79a1605aaa10e6114f0cf5de61138d3ff3318496f20d293870e60b +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90babda60a4449f91561f164154e6b580189e344 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f905ddb2e64580eac0889f99c093269741ff753e863f99420b477b48bfb7788 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c4a84bb029cb44359e6a00e477369d86f1f2537 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:061a70946393827483bfe39672fba6539e47bba3204f75e951111523cb151c96 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3372ae9b40e7208c8af9bd5a20a0d9fbcdd70f66 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:802c4d1e211b3d03560b3ff8c80bb9759996778e27b2ee49dcc83a75ec86b0eb +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f6b6386f50f3cd0b0ffb8fceb9b24a6b25ced01 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3cea1d189515c0fe8da9b6599b408474f6f96b8f586c049bacaf90868b01d86 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35cdb795bce08cebedb427605d5402c6188e7809 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:303b3d996ffb4ce7136981a680b2396358a75900fdf6c0e25fd8e70cfdf84904 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7693764b7884e46b2dc72dafcc81fc2e9c86ba01 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:626c4ae16c886285b4939df744ba256fe7897b94ecff1aab83fcb35ea0fa8ac7 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39325fdf994285d6bbda094283c57579588a0d93 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1783aea5e11226940b921ee65f5bb5032b1bfe722111848e952be1e5ff3513b4 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba0265c94aab3a441b9d3ac6bbb906c0be41a319 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2005d25beb17dc85aa43da2aec65aae4be974a94b1bb0d9e2b8a78cf7b542e48 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8db9c5ab054ee4b04d803a9384af5ad138a09074 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4d8ab8ba2d726737664c41d4d1cb642b95f8562fcc4f48771010b9fd08d4cf9 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fabd2a036559ec337d346e4e947425fb09406afa --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbef057bd0007de5bb69d5853566b37b9ff99f20175f45a39ecdbbbcc0ea175c +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b30e72a5af128041d4d7ba461f2fd9643aecfdbe --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e1108633cc270c8bf5b872dd2a13396b6dc3ed573fb0c733022a5a5f6e80ec9 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c82c929ec8cef4bd11f11b34ca3349c8db3d59b9 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f403ab96079c8303d7d066b8434d2bfcb51278fbb3860ea5193edfd396f3f0c4 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2fb9cceedf0154fa41d1c87fbfbc2dea29e199a --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d5b599fe1e6b4756008b10ed2f39982e387c7b8d1c044d07d3111f420b6a6c1 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e252c47470134d65e64e51e47139caed928d3151 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d349bf200ba76222fbe79d15957f627499c68e502bc2fc98ea8954bf1a1298 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..344b358c05fdf1322bf9889ca20fc468026f92e3 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4e3a7c3e47c084a1ba470b09f9ef905687156c6076e72dddf55723751e62c10 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..470b97ac5a3f0a7456053e437063734c1b683c54 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:442be13b4759da8b7559ba57db38bab7edfd03fe16a5c1ae7541e3570ab0bf8b +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c97b2580ace12ca59ad2a338b25db9dec0d2d69 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf0a6bfdd508ccc4d7c12114f636972b59afa79eafaca00e3c350c201e05e66 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15fd2538da35654ca6016ead9f809d122c0071ed --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add2cea3c272ead59cbc8e2b43386fbaa09dc979b7e515ac73d2981767478c99 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e188e88b717425ac547605c1c1da90d1d817b1f --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7880c3a6513c612be738bdc5bb53f67ab522754978d339139a9e395cf0daa6d4 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb365868e62a8410af99614b30cca6d46f1a35b6 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60735bd49536116b016728e9fbab410be00e904cbb91383d5ce3f3f9e979fcf5 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7c39978661f4542cdaa8a344220566cfffdcd92 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b78f9ad679c41031733d35b20521555cbe4417d240e7834f99d09c0b194052 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..addf9a6864b3ea10efed2ccd4fdb672d99c8fd59 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:288aef5b888a8749fde55a6ae6346b2d39133d973da08784117331b660375de4 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66a808c436aebe4a0a717b619b181344aea7633b --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e1d76e06623af55499076a1aecc1f44eb6a12929410e8c4cd5273e9131ccf94 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..540c74346f88a4d9bd9f7080547afb48200cd0e6 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbdeea2c1711ca16e988418734eedf77efc64f5aa15067dd7c38a962632878ee +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ad9cb8ab6490917865aace0216bb3dba43ffd02 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc83298845f39308df843abfcf5e96b9b4503972ac426ea869347ea5ac588ec1 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98ae1a2b3ae026303875c6ca0f810a08e3312896 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:430b8158656b3ac43c3c783420d69dcfcecd6c1fbc43b07888d05064ee46a037 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2dcc63a552869ee7f2a4fdd4657b29b369272e6e --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0037f98d20caa9841cb775b2870ef798d8b251395d3b9108814b0e60b137bc30 +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bf1da11c6797a3e84b9e4852a70bed37eb19048 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1fc534e90038bd5954ec193fb772443c86b3ddd9598aba5fde66fcd858bae4f +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffb248a1ad8c67f578fe8088a8a6cbf4ae04497f --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52606d713ac920086877a51513495095a5f2a5404e9696d66d493dbfc60337b4 +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7254908070de69e526f18d1c19bf43fee2a0e80f --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495e9450da97da946bd71a9f19b89c53f3c659237305d3d6623612df4947296a +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5772ca46c3f9c6170554dc2353f4bcf46a11299c --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2a99fbbb233a1bdbf62015658885c93216d41a1185bb06dbb851a23006fb39a +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..249a4174d1e21e8b6ce3b320675a0858df3c4332 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df75b4e0553829ed864ed84fea7031a209a24bf35ebd228256303f2d4e6ebd6c +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42a7ca5783e23ba1d47585ac0749014ff52813f1 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b48e25e8d55ba46b9ad95386ffc950bb3a92f3cb3522cd090f270209e3a31f +size 8388848 diff --git a/biodiversity_heritage_library/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..909158207d8509bb8bae19280c4198a7093d9400 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e126590c05a9aa7d7e1daa3fa7cd25cdea84d3dee78222d945bcaa6df2af83 +size 25166176 diff --git a/biodiversity_heritage_library/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e1fc9842a1cfd0c14fae4db23609f6925fb6c43 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c5b7ac13c330e4da6b2173ac8debb3e73c85280c07d79826065591c177a5d1a +size 4192 diff --git a/biodiversity_heritage_library/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bacf7aa7fde092f877197cfd30030a6a14e947c1 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e644656620ea93859c5358230feaf32a5afaa4416668c7d63a7608199936f06 +size 33554672 diff --git a/biodiversity_heritage_library/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56a201bf96347ac547bae014743869deb7e928cc --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82847e6ccf58f08a95982e9ab2f4d5ef73fbb0456cc96c2475291c3e48a5b256 +size 67109160 diff --git a/biodiversity_heritage_library/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/biodiversity_heritage_library/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77f096b42933d62ef55e2c398ad43cce3ea1dfb7 --- /dev/null +++ b/biodiversity_heritage_library/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ab1fa1947f2d75ef32e5ef2b9f9f371cd0e5f2c5d6ff77a725645d103db31e +size 4192 diff --git a/biodiversity_heritage_library/model/final_layer_norm/pp_block/model_weight.safetensors b/biodiversity_heritage_library/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..516d40362e0fabf2f282c27b204b85d8ab61f510 --- /dev/null +++ b/biodiversity_heritage_library/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ba0cf5c1d6981ddc82499fa56fe55176c3c65abc93c703317eb8a1b0a19083a +size 4192 diff --git a/biodiversity_heritage_library/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/biodiversity_heritage_library/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da848e1dda9dee59c5716240b1542aa80c6f6fe3 --- /dev/null +++ b/biodiversity_heritage_library/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd3a4ec13d3640188e67cc80619cf33ef21503a63d7a052140539fb4433163c7 +size 205914352 diff --git a/biodiversity_heritage_library/model_config.json b/biodiversity_heritage_library/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/biodiversity_heritage_library/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/caselaw_access_project/checkpoint_metadata.json b/caselaw_access_project/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/caselaw_access_project/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/caselaw_access_project/config.yaml b/caselaw_access_project/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9327d6a35b3b85507b86d017d2cec7bdf00b5a60 --- /dev/null +++ b/caselaw_access_project/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredcaselaw_access_project-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredcaselaw_access_project-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredcaselaw_access_project-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredcaselaw_access_project-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredcaselaw_access_project-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredcaselaw_access_project-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/caselaw_access_project/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f79ab882c4dcfa115fedda28e99d541911e6553a --- /dev/null +++ b/caselaw_access_project/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d04206c1ff98c67e6acf19797cbe1f9f04802916ede9c830a537ce1f2130d096 +size 8388848 diff --git a/caselaw_access_project/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee12760c38261769739b34c8f077a10aab1bce88 --- /dev/null +++ b/caselaw_access_project/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:633dca1aa33d61c469969897785ca432ba7851d93b963de52d5e46f72052aef9 +size 25166176 diff --git a/caselaw_access_project/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3234e96d076804756ea465a716ce1c0dc5493f55 --- /dev/null +++ b/caselaw_access_project/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7077a853a84b0bed8855495b2dafdccc485e673a62604f67eb6101bebbeeb731 +size 4192 diff --git a/caselaw_access_project/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68f37725b602e2d586e71d332e4e96f76bf06020 --- /dev/null +++ b/caselaw_access_project/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadcb7ef8212a40c1027c82c8e9aea0af7f29ad37c37f78e1a9d970b8f00221e +size 33554672 diff --git a/caselaw_access_project/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee3e4f86b406b2f62c179b65aef81918e08f539d --- /dev/null +++ b/caselaw_access_project/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a64d7cdffc2e4ad77b21d7de7d9679e178c7b1160cd6476c091fa1364bec6d72 +size 67109160 diff --git a/caselaw_access_project/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c341cd071df38a471efbfd420758d6853fd8e9b --- /dev/null +++ b/caselaw_access_project/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa1fa7d2ef4eb54b84226dbc77170c1b2d17cb52d2e9adb35aaecc0ee18a6a0 +size 4192 diff --git a/caselaw_access_project/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6cfee82115c2015120d113ffe317923432100e6c --- /dev/null +++ b/caselaw_access_project/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a622a3b30542deab72e5e6eed1579bd58e77aa59ed8c4cf13bb78c218b8d9a +size 8388848 diff --git a/caselaw_access_project/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68ca02b199f18edc0441502af4cb9d90db90f12c --- /dev/null +++ b/caselaw_access_project/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb73dc1f422e50013615212d38839631a9fda584f307a149759d550be1c2452 +size 25166176 diff --git a/caselaw_access_project/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff99f93fe5d249777c548f520343bc71097e1290 --- /dev/null +++ b/caselaw_access_project/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3902fa5d87fcda5c5a6f35f2f2edb7049034e3834d8c317ffb0b02569f186ba6 +size 4192 diff --git a/caselaw_access_project/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f82be003c2436ff5bbd167b83467069921da6dd4 --- /dev/null +++ b/caselaw_access_project/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:461629e65bfe972a5dba329964178e9a03092de2cb202fa1c030b31939264587 +size 33554672 diff --git a/caselaw_access_project/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..982e841d5c73ec27bf875feaec4bea428177ee83 --- /dev/null +++ b/caselaw_access_project/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b6b966fe3dc897050bd6d3c6191bc6b8103919d791febd6663c676b4192c3fb +size 67109160 diff --git a/caselaw_access_project/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d71fc1bec5cbdf59e1d5def46621d7cdb0bebb9 --- /dev/null +++ b/caselaw_access_project/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40ab66c5a4093d1b752e374c8debf62c6da1ff523cd677c8a63e9fb0ca13e8fe +size 4192 diff --git a/caselaw_access_project/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf08a529b6caf467afa4a74bb17e4fb39bd60dfb --- /dev/null +++ b/caselaw_access_project/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27aa77d530adfdd3f76d170cdd661311b3444226f00e24475fb022bf7f54b4b1 +size 8388848 diff --git a/caselaw_access_project/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8464b0e681f9f1b1e39396c9072ebd1c2e699169 --- /dev/null +++ b/caselaw_access_project/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14b714d077657e4eeaef0fed20b2d3b800a75ed5a825a9ea4c6a81a6a7539373 +size 25166176 diff --git a/caselaw_access_project/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d6ab0a82a2c303d2f4fd71cf329a25ab33cacd5 --- /dev/null +++ b/caselaw_access_project/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2956a5df0025e10a02c9f0fb332312377c3c1ec173a002c8ba88763c7b60f731 +size 4192 diff --git a/caselaw_access_project/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4c423c00f1ed1f44e0e841b73f45402b96ec247 --- /dev/null +++ b/caselaw_access_project/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a034ac99da0bc9622bf5c26f218b2f3eeeb46fd718ef2854a07d3a1c7936e6f +size 33554672 diff --git a/caselaw_access_project/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88ecec0e781673b18df4471154c1228dba4c6424 --- /dev/null +++ b/caselaw_access_project/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51de7fc90f707b8f8addced129f0fe4a4b826e796aeb697eda87bd7586a8af35 +size 67109160 diff --git a/caselaw_access_project/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c5607cb58dcee01fac54af30641e47ad22d40b23 --- /dev/null +++ b/caselaw_access_project/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f9475dabb23e5e1575f57cc8eb99436b5947fea5333b0072da655c1142784c7 +size 4192 diff --git a/caselaw_access_project/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..434a5877fe1d1f6a0e766b6ee391b420dc2e3c03 --- /dev/null +++ b/caselaw_access_project/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c673089602c070e00a5074d6dc46b27a191e8a542f0e5a756ba8f4c178ae5e3 +size 8388848 diff --git a/caselaw_access_project/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7064ad9b3cb53915c8a4f0218e4c1f36e48ddb2e --- /dev/null +++ b/caselaw_access_project/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23c27058d6148034f57732dd91261fe8d18b6f8a07522d6d3328817f8848bf9d +size 25166176 diff --git a/caselaw_access_project/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba91f0dec66d082570e031572a32458ccaab5d23 --- /dev/null +++ b/caselaw_access_project/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c5974cc95cc6c5963e4c85ead56813bab1471b364cf3284e38fdfdfd1f2471a +size 4192 diff --git a/caselaw_access_project/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1c7298a3107f9542c91339a16def010c9c2cdc5 --- /dev/null +++ b/caselaw_access_project/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82de8516710bc205aa551457f5b1f5dbf85bb9fcee80f7c0573a305f4a130941 +size 33554672 diff --git a/caselaw_access_project/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..62446eb80be9ca45d73ee9709db22528660bb585 --- /dev/null +++ b/caselaw_access_project/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:211242ab04b4e0d313b8a8365516512399c937d45514b82d9a8fa6207b1a39b9 +size 67109160 diff --git a/caselaw_access_project/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5508c4a59c6c540e00cb5c0b85c0312c00951a9f --- /dev/null +++ b/caselaw_access_project/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e179ed9a19b15ebf70a1e15c3fe25a01d1c8430ee06e85ee8958afcaf6ebcd46 +size 4192 diff --git a/caselaw_access_project/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50fdf7406334b98faac08fbff4e8602e868ee89f --- /dev/null +++ b/caselaw_access_project/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d4fb46b363b6a1a6b857dfa57ba70ccfd4ea0c21d50c588faf2e275dd3b17b8 +size 8388848 diff --git a/caselaw_access_project/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e066edff4eecc471a594d6d212ead21ff56a979 --- /dev/null +++ b/caselaw_access_project/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e1f6fc8c64cd7cd6aba0905bc34a4d7f6563209941c72075483e93af7d9087d +size 25166176 diff --git a/caselaw_access_project/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bdb6736504083e5f904716a667fb279ca75b9a2d --- /dev/null +++ b/caselaw_access_project/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22f5c37c466341fb2592402615d30caa357b04559f54579b7f8d2905f25b69e0 +size 4192 diff --git a/caselaw_access_project/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b88ae81ed7a26557ba789b11b5aaea73ddeccfe --- /dev/null +++ b/caselaw_access_project/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c64ace11e65b4b9124e32443c1dcb18dccf9fb4cf3a8e919cbcc0e1676e9c00 +size 33554672 diff --git a/caselaw_access_project/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e8616d91f16f3686cd76405fba8c22e92b68031 --- /dev/null +++ b/caselaw_access_project/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bac9830565e6b902ebf2ff2db3b69af134b65197de2301f97e2a0737c8c7179 +size 67109160 diff --git a/caselaw_access_project/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b12c0166c1ff0f74db03d8d42948030b655ec68 --- /dev/null +++ b/caselaw_access_project/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c73b7d6b6041a3dd532a05ffec7cc1f54342e6d5abfac26fc6c3978e40be9d03 +size 4192 diff --git a/caselaw_access_project/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b32c3cff1bc8c17e31f9a5ad12edefc8c5211869 --- /dev/null +++ b/caselaw_access_project/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c574821606a05d9ec5f800e18bcf7dfff6b4b4bd3b02aa922eb6a5b366848e +size 8388848 diff --git a/caselaw_access_project/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f0290f77d806b49262d03616976ea05670a6702 --- /dev/null +++ b/caselaw_access_project/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7591c6313f12ce9c0dd91515aa4038d91d67ff0a8a31e3c267c35adfa4df3e4 +size 25166176 diff --git a/caselaw_access_project/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b819b97728b6b49d6c0716bb1b20c93451e8599 --- /dev/null +++ b/caselaw_access_project/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70661fef2d98ed20ef75b4051039a19ca2222c5bf94a532c91775d827d6875fa +size 4192 diff --git a/caselaw_access_project/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3eb50a38b6627f47755a8b5d66f9255e8db30a30 --- /dev/null +++ b/caselaw_access_project/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b904680daef598e67f29e52bdb2a73534ab01497537cfa307b24524c99dced +size 33554672 diff --git a/caselaw_access_project/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..888d56fc969ccf7ae556d9447b51f7153edd6ecc --- /dev/null +++ b/caselaw_access_project/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd4efa0d8761e7efa85f4098e7a5c6bd78b714228d48544c11998a1559a27253 +size 67109160 diff --git a/caselaw_access_project/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7004f74dfab752714f3fc9e00c1ed9b61511126d --- /dev/null +++ b/caselaw_access_project/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e4a7ebde8f2cd6041801bbf30d8d28eddc34de7bea39006525c2aa76ad6567c +size 4192 diff --git a/caselaw_access_project/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9517e135e8fe31a5d687afac9ba7fe5ffd9c5c63 --- /dev/null +++ b/caselaw_access_project/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0306e5aed6f18183e29a6b7ec905c85b73b4e4cf0ae963cb7a93ee075d1ba766 +size 8388848 diff --git a/caselaw_access_project/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..168473e3d1a953083b1ffc1086aa3289a05ae9eb --- /dev/null +++ b/caselaw_access_project/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a975ef50773e75814a8a3861fbba26235244cc721872fd959f2f73a0dfc8d59 +size 25166176 diff --git a/caselaw_access_project/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a91565a49baf2627a84cd72266f6404a58e56baf --- /dev/null +++ b/caselaw_access_project/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8832d435226853ba0a86450d47f16a301716dbaefe592daff4a716bfd358a87 +size 4192 diff --git a/caselaw_access_project/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..243b173f0d2b48cae87eeb195c4a4fd75a67d9a5 --- /dev/null +++ b/caselaw_access_project/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d474a662fb6aebb133e252ac10da3f0fdbc565f4de9cca911bf36ae0f65bbec0 +size 33554672 diff --git a/caselaw_access_project/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f205680f13dce8df7cc9020cbfc8ac9dda85cdb --- /dev/null +++ b/caselaw_access_project/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a49b8ce718aea334fa9a31cf19ec3ddfa20bb7bd157affb7d112dc6c4fbe6d6 +size 67109160 diff --git a/caselaw_access_project/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bf79ae714c62919ee179611c5347c4bef72e5ff --- /dev/null +++ b/caselaw_access_project/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b61430ac57f39b1663db38726a68b14ab7bfc724b777a8532f7866fc8114c184 +size 4192 diff --git a/caselaw_access_project/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4327e49c795522402c84bf2dcbb46355a2b2d38 --- /dev/null +++ b/caselaw_access_project/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73e325a64653feed3c88b767b74467803d030b478ecd24a2a5bd202a24e1cb35 +size 8388848 diff --git a/caselaw_access_project/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7f3514c59b1adf70ad5b7cd369c85ff593e8fe8 --- /dev/null +++ b/caselaw_access_project/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f7017156601e5ed4cabdafa07d9b4e6536f85b0830c6f85c51360aa9aa4a0c +size 25166176 diff --git a/caselaw_access_project/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e46649ac010191ce862bf6a19bf1d68eedaa7992 --- /dev/null +++ b/caselaw_access_project/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7406d948321c74e0fefa17d89ed130f856f1e245ff52150d3aaf06e30c080621 +size 4192 diff --git a/caselaw_access_project/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3659e62461b88445debd7856a97fdb487e3dcea8 --- /dev/null +++ b/caselaw_access_project/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:522a0fd3d10aec2b2c1477de2bbbf47cadb79a4f32cf01e91f3deceec98dfc62 +size 33554672 diff --git a/caselaw_access_project/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2ec92cf6b0eee7d442e9f1e3666f9ffd738abe7 --- /dev/null +++ b/caselaw_access_project/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:269cf8086ef7a6a4ab28672a7e909443bb44ff3acdf0ff2649678c79a14912c0 +size 67109160 diff --git a/caselaw_access_project/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..256270c66314e66d5e8a15b08e8c4d458cba0990 --- /dev/null +++ b/caselaw_access_project/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ae139a39edd184a414aaf0e1cadabfdbfa9841874de92cbe01770c51df0fb8 +size 4192 diff --git a/caselaw_access_project/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff9bd7ce5d3fdb932b114f931c1a4976e5fb6e61 --- /dev/null +++ b/caselaw_access_project/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54e3870aeace21c6ed19b9399ede9f13a66a42604c10b0fa0510ce2fcf607d2 +size 8388848 diff --git a/caselaw_access_project/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7406829565461e2d6faa366e0895d83bdb336e02 --- /dev/null +++ b/caselaw_access_project/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc6990fa5a2f54b816388976e4985845a040b8ba1c8922f230c3078cb5fb5fdb +size 25166176 diff --git a/caselaw_access_project/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e959406c2b702e4316a14dbf57975cc2222734b --- /dev/null +++ b/caselaw_access_project/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d6ad9899bb91eaeae8be35b4b9fad0fabab38e76b4231ef363c65b62bb6f87 +size 4192 diff --git a/caselaw_access_project/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa48d33f644e40322a43a74ad5d837c535d2823d --- /dev/null +++ b/caselaw_access_project/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba05e4e77908a0f9fcd99cc212349d84e5cd7795de6842a3529878d98346c92b +size 33554672 diff --git a/caselaw_access_project/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9b4a6f9afc681edcd1241c9aa09251849e0d422 --- /dev/null +++ b/caselaw_access_project/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f93411ecfc7ccb3472704e9eba0a96167319167d0632aef0304e48196e03392 +size 67109160 diff --git a/caselaw_access_project/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f0ca27bbe50e2bb2cef68d5d76fdc168721df2b --- /dev/null +++ b/caselaw_access_project/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c8086acc51ccf4623c0f9b8734f84d3a5e337e5e1ae8d62db7078ce93f67711 +size 4192 diff --git a/caselaw_access_project/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9864a4a6c48d62975e87b4a88e408fd7f7366f33 --- /dev/null +++ b/caselaw_access_project/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef2ecf9aea2c5e177e1309de17b7b35f38186e00dec86f2feca2b16f5cd3beb0 +size 8388848 diff --git a/caselaw_access_project/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05ec494080d04fe07451d51bb649acb95c02f426 --- /dev/null +++ b/caselaw_access_project/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe15f4e6a3914ec06d1e3711e5e6fec2f30ba2bd8dc42596706757555dbf90a8 +size 25166176 diff --git a/caselaw_access_project/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35b972cffd73d635b2620c24f4ce055af0e16ee4 --- /dev/null +++ b/caselaw_access_project/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cba016fc3e6d0c09077430bcbf4fa7770062a17b3c9aacee456344651ddee9e7 +size 4192 diff --git a/caselaw_access_project/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c21015267dd60818d7b0248d4c7b5290bad40aa --- /dev/null +++ b/caselaw_access_project/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7657a22e60a1ef7a70eeea85b816b5cb2c593a9fdc713e9b4c6ca1e9dc7cdd7 +size 33554672 diff --git a/caselaw_access_project/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d311048d8079d76b0dd72e9fa0f624c1aa29be9 --- /dev/null +++ b/caselaw_access_project/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a1f170f9e162744eba1a2e284a91ab073d9f3195d19b0cc80fef85b942a9ee +size 67109160 diff --git a/caselaw_access_project/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98eba4461d819f5770e1bef06c4e4843994afe86 --- /dev/null +++ b/caselaw_access_project/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d464f6410aa06744a202039b03e69f5a64503c1c8fd173e4869832c41c38450 +size 4192 diff --git a/caselaw_access_project/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..440d3e54d0488fa7bb441ca9958e7647d70cb27d --- /dev/null +++ b/caselaw_access_project/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84506ce225ceea69c6b9992b2d5dda0baf4f77683b02e3da7ac4ed6d49a521fa +size 8388848 diff --git a/caselaw_access_project/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8682c063f9c2d8fbe1c2b2b2d0db537c77e2f083 --- /dev/null +++ b/caselaw_access_project/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abde39fead037b5fc4ac41c8e3ea53da1f20608dee1fb6ebc52e55d94f00f8df +size 25166176 diff --git a/caselaw_access_project/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bec1db7482034af0dd7e94e285c2a982d88b798 --- /dev/null +++ b/caselaw_access_project/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abda74f396fadbd2e6d979e0dea120315a170486b2ac42a5ad66f56418c12977 +size 4192 diff --git a/caselaw_access_project/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98a10824830ffc2c8db091d122b9649d9bb91370 --- /dev/null +++ b/caselaw_access_project/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751ed18dff70939533aac6e8c05b598392f582d6de294b126641426431ccaf82 +size 33554672 diff --git a/caselaw_access_project/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..538f39a5d1b31c6e96662b65ec356366da89a9b5 --- /dev/null +++ b/caselaw_access_project/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0de6d24dd4e82c1bf508d60483e73c01d5d92000c2252abf398a1337396e2112 +size 67109160 diff --git a/caselaw_access_project/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9eb2c912f5c71b59d60a2b39dd91bd95f6c791ec --- /dev/null +++ b/caselaw_access_project/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e726294244e46672b9d5e6f61af125e3b5bc54a47e4fee269ff8ed780e7a3f +size 4192 diff --git a/caselaw_access_project/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2460977d97e532a4d408ef22eaac297d2a655298 --- /dev/null +++ b/caselaw_access_project/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb20bf415407055d981996b65bcfa2546ef288aa5862810add5bfcacd8deb59 +size 8388848 diff --git a/caselaw_access_project/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bcb39d6a857ffc40d8e94331445fd68652bbaecb --- /dev/null +++ b/caselaw_access_project/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e603210c5da620e763b59dc70bc77d1bcb27997474953be2c87ed698bbfc22c8 +size 25166176 diff --git a/caselaw_access_project/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae83ca52003175fedc7a93c241a674dec9af0109 --- /dev/null +++ b/caselaw_access_project/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0c183524670f21ecbd0d60a2af223847142b39a18a34d2947cabff44e8aaf6b +size 4192 diff --git a/caselaw_access_project/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f9a00fdd01996d5a896e7b5aafb3bd8f7249ccd --- /dev/null +++ b/caselaw_access_project/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f64cea7b997a17a4d011d06fbc65c8383b2d34e939f005c641e67eeb1a489c05 +size 33554672 diff --git a/caselaw_access_project/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..292800cf28650661e957d05f2f6dda2ee4ef0f1c --- /dev/null +++ b/caselaw_access_project/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d863e6921c965353504100833dfa2b77b4cbb4616f8b2bede370549f5626f2 +size 67109160 diff --git a/caselaw_access_project/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ebba0fb22b09c57079b53d250ed7dbbfbe3a8fa --- /dev/null +++ b/caselaw_access_project/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e30dfd9cde42d9abc63a78b43d59d366e581c1d68294586c576852149636cf33 +size 4192 diff --git a/caselaw_access_project/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cee3289d11aff32f34bcacb5357e5b73dffc1fb4 --- /dev/null +++ b/caselaw_access_project/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:270f2c8c76d13d177814f6bb5b5db27da67cf4e5e12823f83d658f3299668963 +size 8388848 diff --git a/caselaw_access_project/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d7182d28b1f937e18092bc23f8d584e9125fc405 --- /dev/null +++ b/caselaw_access_project/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2339215ee70fc642fe11a4b7f9984383c880c9936b28fb8c33ac27f9f969aa20 +size 25166176 diff --git a/caselaw_access_project/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..62a83acc713d6ad29a772700c1c56be735c63b8a --- /dev/null +++ b/caselaw_access_project/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4209822436cc6f37e99656e35b76604e8d0883efff0b3367224c79927a715488 +size 4192 diff --git a/caselaw_access_project/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89d5e898a6bdfd94a3f6cb857d4347f3479b5036 --- /dev/null +++ b/caselaw_access_project/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e173b33f1d60f046143a681a5ca77cab73309c05dd65c447b2aa0f6ba87d4b93 +size 33554672 diff --git a/caselaw_access_project/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8847a55852b237ea6e4f516f8c2e405a9bc9ccb --- /dev/null +++ b/caselaw_access_project/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5e3e956fe8a56bc94aadce16dde2944d4d28cd55bdeda97ac2158e2266f18a8 +size 67109160 diff --git a/caselaw_access_project/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9bba5202a9e5e10c761600d7f424686a161362ab --- /dev/null +++ b/caselaw_access_project/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ce97075a689f235e303bd3eb2bc19e142adebd632f35ba9b4e69d1b8f7fd7d +size 4192 diff --git a/caselaw_access_project/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bb21c99396788466fdd53547cde57c53d8852e1 --- /dev/null +++ b/caselaw_access_project/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b3569c9aa4ceab49930e8107d4b5ee6b3b09ed472493109e0323d2242afe5cf +size 8388848 diff --git a/caselaw_access_project/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61cf4a6c4035313baf24bc6dbf185122111c7831 --- /dev/null +++ b/caselaw_access_project/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b64869dd15a6485da4604ad1fbf8fca1b85075795d65c46b4dd28903eb71011 +size 25166176 diff --git a/caselaw_access_project/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1a741c5e1fe14a16d0d9fbe3b858d7d45332fbd --- /dev/null +++ b/caselaw_access_project/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d19353d7af2f2e52fe5d79e0d3c5ff8ccb5386609c26d200f3ab6b6868da1fe4 +size 4192 diff --git a/caselaw_access_project/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8a1d164c83d31697ee8e1198cd8c30ed53f241d --- /dev/null +++ b/caselaw_access_project/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dee73cb798f140717ad2036b1b294b9957c89defd643cbb70e74bdb75dbc725a +size 33554672 diff --git a/caselaw_access_project/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abbb01e13e45c04554891db13be97d1253fed8bd --- /dev/null +++ b/caselaw_access_project/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b22f4755c595bc222636b45abb7930239f6eb5ff232e16e3013672b5ac0f02af +size 67109160 diff --git a/caselaw_access_project/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8416f7005efe23d3f451116019f4d75727f258f0 --- /dev/null +++ b/caselaw_access_project/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4aee54ce1340e83069f18d33778f4b0772ed373e84d788d47565429f2c28e4e +size 4192 diff --git a/caselaw_access_project/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..918fd4725b85a1e55f16acc77da72e0fcbf7b65d --- /dev/null +++ b/caselaw_access_project/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:740c03b6e1786379b5b4810a508de530ae5249dfe1b9259ee2166f29453fc610 +size 8388848 diff --git a/caselaw_access_project/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa5a420f3a3dbc5a28999990a6fbf2bd50e41b50 --- /dev/null +++ b/caselaw_access_project/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e13a523459979f5af6e99213044de41ff742c7cde0fb601c78799dc3abfeac0 +size 25166176 diff --git a/caselaw_access_project/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..757e403ca96a73046caca4e5c57cc80255efb28e --- /dev/null +++ b/caselaw_access_project/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9efeff97de5c68800e065435432f8aef4b8c329a25812a55e2003084c1b91e8 +size 4192 diff --git a/caselaw_access_project/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16c61469ecb751ad333c13b2e5dfcbaa7efe234f --- /dev/null +++ b/caselaw_access_project/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4be02ad58ae39b198300ff05fdae3071e4ca616ee112b8df5c4ff243377d62fe +size 33554672 diff --git a/caselaw_access_project/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad84867fd59bfd429c03a8e187204f74b6905e0b --- /dev/null +++ b/caselaw_access_project/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10d96ab85cfaf02153bbb7e70ab7b60be4b5006cce57e93dd235094ae1304db8 +size 67109160 diff --git a/caselaw_access_project/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffda534315cb6ea5e1531583d4ea0ed9d7b7414d --- /dev/null +++ b/caselaw_access_project/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a21b98df15db6f25d50182d152ab1bc156e7f2bafdd810261ded073e7abc6ce6 +size 4192 diff --git a/caselaw_access_project/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63211faecbb242eefeb6f62b98509fe78c4fa0be --- /dev/null +++ b/caselaw_access_project/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe3bb512c07787a7a63ecc419653a6f40f6f420037a312b04a40d49240f62db6 +size 8388848 diff --git a/caselaw_access_project/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7dd9639861bebd2fb6a6fd22dc2d371d49f0cb00 --- /dev/null +++ b/caselaw_access_project/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bb8b9984ce705f69f62de2bcc767b192c16530eb56fd3e0b852a998b71e814e +size 25166176 diff --git a/caselaw_access_project/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e90e8d3c1f9970f92cd5cedf0f5b2d6a9f9c219 --- /dev/null +++ b/caselaw_access_project/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e841208abaa2bac04fcf16a22ee730cd80beaeaba85a87632e0e22f646b08599 +size 4192 diff --git a/caselaw_access_project/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4461d551afb7a6b788dcbc590187d944c88c7165 --- /dev/null +++ b/caselaw_access_project/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f85682fe3590ce70875cf6c63296add2b197a2949015fffde91f79a3e75c335d +size 33554672 diff --git a/caselaw_access_project/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5fdafaac146158697f22ede3fd99b5b46a021904 --- /dev/null +++ b/caselaw_access_project/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89a238c3ee9c7219e1a2e3b485ed983fac54944e1f26b52b14c1e811e64a93a9 +size 67109160 diff --git a/caselaw_access_project/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fcddc8f0e13d61986c4d29ee81060511757d3704 --- /dev/null +++ b/caselaw_access_project/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47d5b5248dc353427a27731b29eccae32b6522118e411b7055a7a622fdf08c93 +size 4192 diff --git a/caselaw_access_project/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4cbc2dbfdce40240fdf3920e3a2d6612313b9741 --- /dev/null +++ b/caselaw_access_project/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c2750be90ad614ab8b1f38fa10394139120e51837b73f9f33391bf532c5ba1 +size 8388848 diff --git a/caselaw_access_project/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..809d379f3851e4641ee18373348a337c40f8d2aa --- /dev/null +++ b/caselaw_access_project/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e9f8f53b15ff80d2aea00a274d5ef80f597f2d648a410bdbd2025bcef21d755 +size 25166176 diff --git a/caselaw_access_project/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23311aa8929b4132c34f3368540f676221191d61 --- /dev/null +++ b/caselaw_access_project/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:860a2691166e2357bf6578c1f23815fb490f4cf69cd161ed22862819711a7e81 +size 4192 diff --git a/caselaw_access_project/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66ac30df0dd29f51c2b86c33dfcf3a02eac22d75 --- /dev/null +++ b/caselaw_access_project/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2135c7849208bbd493643e9de6a07ffe3adf39fb1845a338144728bbeea12a0 +size 33554672 diff --git a/caselaw_access_project/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4588a8dcbac75832bfdbbdd9aa8cc0bff43e8033 --- /dev/null +++ b/caselaw_access_project/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:998d1c58a69354af419dbf4d1a683a81b305e7f1e3ab42f2f7ea92d68885d14b +size 67109160 diff --git a/caselaw_access_project/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5aa19b2a9272a482453cb591939f344ec52f3081 --- /dev/null +++ b/caselaw_access_project/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:731acd1a133b59da5ddfd38929d4a80eb6525dd72324267bb827eaf981ac7331 +size 4192 diff --git a/caselaw_access_project/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e749262a43b177d7ca39993636ee555ee088cbf --- /dev/null +++ b/caselaw_access_project/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b92e7a41adb5ddb92e583c506a00580ece07f64279eb47c721526de1cdff2c +size 8388848 diff --git a/caselaw_access_project/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c1a42a8242bd48d518326ff52eecf280c419967 --- /dev/null +++ b/caselaw_access_project/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deae364c86fa3777251884306384ee6246e1a7eb791dd62028415f28238db1f5 +size 25166176 diff --git a/caselaw_access_project/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1013d44bbb98bfc7fefe926dac2d60714f69a110 --- /dev/null +++ b/caselaw_access_project/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90947ab0e6338f882114e0a9efd1916021a8347ba73a763662d50657cd49287 +size 4192 diff --git a/caselaw_access_project/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cc8cd27d2312db7700fc7d6ee29cc09bcbc945e --- /dev/null +++ b/caselaw_access_project/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce3c33d4a14e5ff26f283776d9ba9bf70ca1f493ac5440f67901f5ae2b1e6dc +size 33554672 diff --git a/caselaw_access_project/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f5b4a6bac873f5f20df76013c35253841efd91e --- /dev/null +++ b/caselaw_access_project/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6570be8890024afa18a8792668e1b937eb994fe0e13f89a299d05c8451fe2e00 +size 67109160 diff --git a/caselaw_access_project/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2d0f20f00958dadda0b8b956865ea8fbaca0588 --- /dev/null +++ b/caselaw_access_project/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e128b14058de88342236632f9608b519d3996dcd61020e520775681eb0d5c4f +size 4192 diff --git a/caselaw_access_project/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..601952ad6f76c78832768b6a935237fdba71459b --- /dev/null +++ b/caselaw_access_project/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:244dbc08f848b9bee4944542735b65e2c21f0215c44024d5ffbbb03833eeebb7 +size 8388848 diff --git a/caselaw_access_project/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e7056abb3db051c810ebc4aaff8d2626fa35ce9 --- /dev/null +++ b/caselaw_access_project/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34b67457e91689284df43cf9edb2516cf149dc885be294ab8d8ab3ce18e1b3fa +size 25166176 diff --git a/caselaw_access_project/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..279ecf4bbfb54d234d2f9e2293194fb2bd95fa02 --- /dev/null +++ b/caselaw_access_project/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b353fb32099ece670c8023d5fdabb7819a77405c99f45172fa3fc249a88b46 +size 4192 diff --git a/caselaw_access_project/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ade0958e89d16779250a9bc9c1c6423036879a8 --- /dev/null +++ b/caselaw_access_project/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffc7072b94a805ec3ae777165cf00c01d2b93d11b1c0fe94979975ceb5d0d753 +size 33554672 diff --git a/caselaw_access_project/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bec2f3110ffb906e3cb0832f2e967b06a695ab35 --- /dev/null +++ b/caselaw_access_project/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7272235bbbb28a5b9998522bd9e90def12cf4d8bf139773f108b75c46318736 +size 67109160 diff --git a/caselaw_access_project/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18f392c90fe9045df095102476eed78094425d77 --- /dev/null +++ b/caselaw_access_project/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a053f89aa97d1a094ddbf4ae667e7e6b7e1dc64549cd79e831a510d98fe7e2 +size 4192 diff --git a/caselaw_access_project/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19b02e7fe4a7ead0f0d86af037a23ddeaaa77792 --- /dev/null +++ b/caselaw_access_project/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccaf5c01cf2798ae6b7810c460f2623029dc59003550d9c983808ebd1cf27d +size 8388848 diff --git a/caselaw_access_project/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41b9df1b62563ab756a2a46525a0bb27f4aee671 --- /dev/null +++ b/caselaw_access_project/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e984d9bf0a4a19ce291660de571b5a109cc190b53bdf44f4a6477aec3756a7f3 +size 25166176 diff --git a/caselaw_access_project/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04905339b22b7da081fe0c8b81d444f6563883a1 --- /dev/null +++ b/caselaw_access_project/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f45bcb7d83bfe37cb9de97fd717f1d811ef7c2e8552aaed86a2be8ecfab8683e +size 4192 diff --git a/caselaw_access_project/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd2640f950a63ce080ba03aa0d491f90b0c87573 --- /dev/null +++ b/caselaw_access_project/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c6387f2e8cab8874161d20f5bc6769b75edda17392938ee1e364d8ff3ed3ab +size 33554672 diff --git a/caselaw_access_project/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90e6806827a2410dd2c39c47a4b3f65c4645cb44 --- /dev/null +++ b/caselaw_access_project/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb65dcbbceb627bb84f1109caf7a133e4c80bd2d9aafec9fdc7bd34dcaaebde +size 67109160 diff --git a/caselaw_access_project/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9bb5f747f0cba01a2c1f7ab85bc779b3d130ccc2 --- /dev/null +++ b/caselaw_access_project/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebde4755f7dad3ccf6fb38bb0b6d81672694af566b9647d99f8a94dca8648a6a +size 4192 diff --git a/caselaw_access_project/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3a51a33425e54feb4e3be0c334583767449ba29 --- /dev/null +++ b/caselaw_access_project/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbd6ff0eea959bf08953abd95f2195391e21c40f025369677eb70cd3c6d92232 +size 8388848 diff --git a/caselaw_access_project/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02015e0d6b9f3e35e20cebf37e2e735ab2ad2abb --- /dev/null +++ b/caselaw_access_project/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79398b206729720b5a7a4d6b377622dedb6ac144b1ca3579484884cfe904a804 +size 25166176 diff --git a/caselaw_access_project/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e02ddbae4997ca34998744938962cf35e0e2bf6 --- /dev/null +++ b/caselaw_access_project/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1481af80fd16f0eccb2c628fa62fb3c5eb68b3f5c44b93c79c2b099cb18b5544 +size 4192 diff --git a/caselaw_access_project/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5924cd694ca94d3ad7fbca79a87478bd7dfb4719 --- /dev/null +++ b/caselaw_access_project/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08dc0e36ea6e8ecaf40e6414a9313d751dcee69b0c2ac072939cc0c5d6ce0ce2 +size 33554672 diff --git a/caselaw_access_project/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b577af84c58e37d83e7697cab3b5220ee07ed66 --- /dev/null +++ b/caselaw_access_project/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da4da5c05862975fca9675209bae962910569eb3c2f6f74b1e91a7732f6c1b6 +size 67109160 diff --git a/caselaw_access_project/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de63fed9e2270a2ad661f205f48f1f78c99f9b1e --- /dev/null +++ b/caselaw_access_project/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd7214e88a4476742ed8632392b3c5c824e3d5267931195952b2221f61435bee +size 4192 diff --git a/caselaw_access_project/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f678ba940ed692ae9e005e711261535c6e1d849 --- /dev/null +++ b/caselaw_access_project/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f57aeb90b2f0eb156b36221f8412a72f601246a58408b093e4b474b659635f +size 8388848 diff --git a/caselaw_access_project/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..646afc9ae31a47e96e8f501664be5de010196842 --- /dev/null +++ b/caselaw_access_project/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a848a0860ac00c45cc9aa73002923643500e94e11412ac2f05f3a67d748e8c55 +size 25166176 diff --git a/caselaw_access_project/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..294648e29cae587db9054c2dd0f4b9ffcf4062ce --- /dev/null +++ b/caselaw_access_project/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:445266394aa64663d6ba5cfe6b4b8a9fef52da0f06a0624e2ebddd1dea90efc4 +size 4192 diff --git a/caselaw_access_project/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0616278b1d163e77fb67080d727c21dd27df9e92 --- /dev/null +++ b/caselaw_access_project/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e095dd2d5d14d90c4ee43a4319b9717c6d5dab4021dfac373900fb88f9046131 +size 33554672 diff --git a/caselaw_access_project/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2f27c79c64c559e20d7f2dae3d1aa66650fd87d --- /dev/null +++ b/caselaw_access_project/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:438e585523de0015862b0a1a63abeac910bf64363dd8a891bf174275f9ff1530 +size 67109160 diff --git a/caselaw_access_project/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd271a8788680bdb5a10e89113875b5243f6f59f --- /dev/null +++ b/caselaw_access_project/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da9ea678b3a720825e3b0e0e3ac5b3167239c4abcbc59650842eb23c4a01a3f8 +size 4192 diff --git a/caselaw_access_project/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89ae62704d013f4b5ed4b4483b88ca0b4c03ceb7 --- /dev/null +++ b/caselaw_access_project/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25b2f6b0d10c6709f5b70752235f23bce4b8f5d06ba4fe5bb82ecb39d86c2bfc +size 8388848 diff --git a/caselaw_access_project/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3a938d2b13ba3d336bed384fab3c797dd65abc3 --- /dev/null +++ b/caselaw_access_project/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdaa2d8c765303b4f28640de5116f24f681918979f6f9956eca21e40437a5539 +size 25166176 diff --git a/caselaw_access_project/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04a880d51589df2721f2ac3c54759355f9b50be8 --- /dev/null +++ b/caselaw_access_project/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a27f14ea54eaa30a9fdcf376b7cd316b28e7d8dbffcb2b3c3a1a2720c01676bd +size 4192 diff --git a/caselaw_access_project/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e99cbbc38f939cee1e56b888b296683a1a5cfa43 --- /dev/null +++ b/caselaw_access_project/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cddb56df24e3062fd37b0666837c9d2dbec5dc56ac45118f6857f2b57333c82 +size 33554672 diff --git a/caselaw_access_project/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f67ff381099e37318899897d767582a7ee1e03ec --- /dev/null +++ b/caselaw_access_project/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1e55f6ac10019dd2b83637a073d81773808dfaab145f3f7c540285ee93bdeb6 +size 67109160 diff --git a/caselaw_access_project/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2953d43bceee53854f742109020b8c345d262fe --- /dev/null +++ b/caselaw_access_project/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:389744941f8ecbecd5df48a1ddd3b5661b43c36eaa05f68b45b1aa54cac739cd +size 4192 diff --git a/caselaw_access_project/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3f81a51973f9bbe97a1b38d9083d5bfcf9fb69b --- /dev/null +++ b/caselaw_access_project/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca4d724707af55bb4bcc8a44f0eb9c2518e36e33aeac7648aab78ebce25fc467 +size 8388848 diff --git a/caselaw_access_project/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b10c38df4eceb3dcc2d088207bcb5d0c0902564 --- /dev/null +++ b/caselaw_access_project/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:643b29076aa2196c01b8b0ef0517a296005754edf3a59bb40bd79ca0bf489910 +size 25166176 diff --git a/caselaw_access_project/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5eac16cd2cb22e6c3367827e25792ba2ae589561 --- /dev/null +++ b/caselaw_access_project/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ef422e86ca7754f64ebe914f5ab9bc4359a15680642ddd432581d826d97bbe8 +size 4192 diff --git a/caselaw_access_project/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f8b35b5f22a239ce7cade60b1596e7b88129df7 --- /dev/null +++ b/caselaw_access_project/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1d75af34ac9d12d4a5fbbf91d18ad942adc7e6c5c78cb5496ff74482b765dbd +size 33554672 diff --git a/caselaw_access_project/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abea9c741eb69b674822cbec7ac2b6a74afdc0e3 --- /dev/null +++ b/caselaw_access_project/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9dd5da512389b4185c1618663cee60fc22dbb7956378b72993e70b3f331eafb +size 67109160 diff --git a/caselaw_access_project/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/caselaw_access_project/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84504396318fe27f754bc92e6f496a38d72512d1 --- /dev/null +++ b/caselaw_access_project/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61f63048103bec47d1da485f7afcf2fc705dfb9df75ca774922d3789559c52e3 +size 4192 diff --git a/caselaw_access_project/model/final_layer_norm/pp_block/model_weight.safetensors b/caselaw_access_project/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..653931995bf043dd79587a1ba941609034691cc3 --- /dev/null +++ b/caselaw_access_project/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73a80e0234a077ffd11720bfa1126c8909434ad06b9b984f7e5efbece5b12e6c +size 4192 diff --git a/caselaw_access_project/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/caselaw_access_project/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f69e059c8f24ebb0935e1b60b63c9393cba2a55 --- /dev/null +++ b/caselaw_access_project/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:092efe03d07d45e4e8ecafef24ecc9e99a4d45564ced845aac0244e353e2e3f1 +size 205914352 diff --git a/caselaw_access_project/model_config.json b/caselaw_access_project/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/caselaw_access_project/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/cccc/checkpoint_metadata.json b/cccc/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/cccc/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/cccc/config.yaml b/cccc/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b09f66f4c08bfb287a6363aedade14ff7e68c810 --- /dev/null +++ b/cccc/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredcccc-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredcccc-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredcccc-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredcccc-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredcccc-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredcccc-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/cccc/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5d61d5c58d2cd2407e25124305d6e2c4d263de7 --- /dev/null +++ b/cccc/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b61f6f0c174234033bca361ae80198428f756c1dc0f86b64e47f3dc702a56acf +size 8388848 diff --git a/cccc/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00fca9747e3d07ec8ff4e7861e9902a2385aecc2 --- /dev/null +++ b/cccc/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2123b9f001744f6fb51e440f942d474886f5353cec77b1ccd0112672c8dcf815 +size 25166176 diff --git a/cccc/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e11b035ebb5ee2fe05b5ca51bccbfd566e4162c8 --- /dev/null +++ b/cccc/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf8647741bda4d4c6cf568f070b55f84c611fe4f93713c15569e0b649dda5ac5 +size 4192 diff --git a/cccc/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15e79be598e2cacfd8beaa428d0c458e0ffcc44f --- /dev/null +++ b/cccc/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457c5ece6e5e1fe7722e207ffa1cd0ddbf0a8050ea979bb858ac67d840f90061 +size 33554672 diff --git a/cccc/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81c967f97ad7e0b0cd5108c0065b38cf80f35fad --- /dev/null +++ b/cccc/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c041386a10bf255f7b86c0c36ca6d6e28158ef05ecf47fe8e91995873c8f709 +size 67109160 diff --git a/cccc/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b81314d28c6d8c5565ad1ec04a15c70486f1d59 --- /dev/null +++ b/cccc/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c77434cf0a7bc386ebd37d423ade2c56c4f3b774c6d3c3747d5900d43f1c1941 +size 4192 diff --git a/cccc/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d45fba3200f3529dd4cd505e77c3e1a2cbf52db --- /dev/null +++ b/cccc/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c3cfa567e20f94d810a0d56fac6f4dbbe47be81ce177da159b9efd689264828 +size 8388848 diff --git a/cccc/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9b1fdf95530b619329e22379d946630d030ee59 --- /dev/null +++ b/cccc/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa0725f683329670809146afa22cfd6917086967a345ebc7848ce4b169f7327 +size 25166176 diff --git a/cccc/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9fe7b1e1f4b1b9c9299cb2d8222501d43a1c6fd7 --- /dev/null +++ b/cccc/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec2006e9a7143f7a202267a08c64c522ba4cd65e1967c16a844f45f10ea1ed26 +size 4192 diff --git a/cccc/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22284a9ec4fb73f26e27f3e9914de7dfe07cc3aa --- /dev/null +++ b/cccc/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c12c0556fad330831fbb8c3a74a4989c6465df869ce00bfe760e4c29f76853c4 +size 33554672 diff --git a/cccc/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1584cfc4c38c43a1a40cdbb935bf45723816defa --- /dev/null +++ b/cccc/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e5a1bc49c5f8f5efe06e0fbf1ce4ac1bd4144a32bba428109f4e3fe2e2b76fc +size 67109160 diff --git a/cccc/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c102c03a12eaa4d54d7ef2a22062e79205daf9b1 --- /dev/null +++ b/cccc/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c8b79cc40a1a46b3f149a9e39338a16dc689a5ac4be102fa824de18573477ce +size 4192 diff --git a/cccc/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19292dac833a49a33c39aa0f799d09118574cecc --- /dev/null +++ b/cccc/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba6a0635a5571b5aff794b08a6f295108f75b0b47ec033fe9cea7ab1cf12ba1 +size 8388848 diff --git a/cccc/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9be5686f57a61ad775501f533fcaebb9f557e7e4 --- /dev/null +++ b/cccc/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e607b0c73fef45925a4b4dcfc752f73c4ddfc7c9d9f2bc8644582d004b784837 +size 25166176 diff --git a/cccc/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a2108cadff1dff93f801349da456d0b91ec6bdc --- /dev/null +++ b/cccc/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b6f8a08e35d2321fcacd153c3ef423fa61581f835c9ce37dd4571ca3f01a6b +size 4192 diff --git a/cccc/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a08e3ebfc2d29a0eb348045d1eb8af7308275e0 --- /dev/null +++ b/cccc/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44fc08c6f3903e9afcc0bc8e0c5d311402fd218e34b37a5bd9e870912bb451b9 +size 33554672 diff --git a/cccc/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3aba359cf30d99f338dead412e807111c18c2a91 --- /dev/null +++ b/cccc/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad926e01cfddc951d462856c1c9a5f6afa44343009676d63f478ae723d289d88 +size 67109160 diff --git a/cccc/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6844ddc1e73eda43c560643fa6401ef86ea7ccc --- /dev/null +++ b/cccc/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ace58ba2d9357750b0ee67067491c63ced833d02508193e675d30c8395348d5 +size 4192 diff --git a/cccc/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be073da278c67b45ee814c675a49676de926bde4 --- /dev/null +++ b/cccc/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af83ab8aefb2291947088284d0ade009b1831925312574be7b9e7e77a91be501 +size 8388848 diff --git a/cccc/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1dc372bd67717f991be7b0641ba89b316d9b023 --- /dev/null +++ b/cccc/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6763f3cfc33a6e9a552d6a54fff038afeda757e28404f02489437a2cd0fba138 +size 25166176 diff --git a/cccc/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b897208af5b56a4f2b22d1cedbc688bf5b61d383 --- /dev/null +++ b/cccc/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea82160efac0ec9dbdddcc94eceb3669e7c8256b48f6adcd370f58cc16fac2dd +size 4192 diff --git a/cccc/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d0fd242eea7ca0c68f1311983ec1ca002cc667f --- /dev/null +++ b/cccc/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b6cc9dcfdececdc946b13920c9b6ae977d1d37d1220e955c35b37e79049584f +size 33554672 diff --git a/cccc/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b683135b45673e907d4dd587a685d8e23be3fbf2 --- /dev/null +++ b/cccc/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40f7b139aa8903e523ba704a2b798c9ac654c761adb010368e5bb0cffa38f63 +size 67109160 diff --git a/cccc/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc5934db70e4c09737cdecf44e8770071f4a832d --- /dev/null +++ b/cccc/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c582b66786e6b11c1ada9276d82ecb7efd2f286a2727bd7bf346f9b9adccbdb1 +size 4192 diff --git a/cccc/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..020d8bb231c51686485df8dab06dc4462163293c --- /dev/null +++ b/cccc/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d5926897b994c7f4de9b23bc6ed669760fa03f96fd1ab9768ab4ad525ad4c34 +size 8388848 diff --git a/cccc/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eaa7eb34b6c76a1351f70cdb25377d5418c328a3 --- /dev/null +++ b/cccc/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b2bac2606afee234af5ecda568ffaa2130a84f07f87985e1dac2053561b02bc +size 25166176 diff --git a/cccc/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c3086dc0562932f414d9e61edf20d581172fe6a --- /dev/null +++ b/cccc/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473edec1b97aea35f9d132469b51c3c38a8448e7c267372fe3b8643c14a4fb82 +size 4192 diff --git a/cccc/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08a3ede2076e61a21ebe4b09593f9cc4f3b524bb --- /dev/null +++ b/cccc/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49120b81b044ae05c7a6fe88e0a0a309f74cbcedf8ebbcd6ae0430a8e93f57c +size 33554672 diff --git a/cccc/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..531fdbd40dfcd00966fdca068b6474380d689b35 --- /dev/null +++ b/cccc/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d13fc7ea26db2fe34920be0457ba1d79de629895574e7c795e272660bc272c2 +size 67109160 diff --git a/cccc/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fde49c2a8d72d4ce23ae6651688083609e9f2601 --- /dev/null +++ b/cccc/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:587b22efb4a005dfd2b991466793928cd5350f0202ac7fa9b9ff133181c22683 +size 4192 diff --git a/cccc/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5815234f6778356038bd3cad24b259e246ca0004 --- /dev/null +++ b/cccc/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb7569b04e035f6d7960dd9adda5f3f241f381c7f7a6c9eb0327b9816e490ae5 +size 8388848 diff --git a/cccc/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5187d5de654b7afa603f1ecde793cf78f95255cf --- /dev/null +++ b/cccc/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3924502d37ddb999d326580c436c745324caf165954924b82cbcc4033aa2baa1 +size 25166176 diff --git a/cccc/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8414779c065e19b0646829fd029373d21f8d39bd --- /dev/null +++ b/cccc/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6f5f4034b83edd3939516d07e82f14fe7f61efd02449948081d5641bff312b +size 4192 diff --git a/cccc/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a49244cfc8d4abbd1eab8d75fcdcfea9bef151ba --- /dev/null +++ b/cccc/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b73118fa00507e80d49cfa02f40735ce7e02a186453f2ce213bd907d137e6e58 +size 33554672 diff --git a/cccc/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5736d8defa2adfb8e015182578dbb1d039cd6f3b --- /dev/null +++ b/cccc/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c042273e56db8b3ee04e5abab27a32196e3b1c4d83dfc0512c2f75a7b759ca3e +size 67109160 diff --git a/cccc/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e3be8bbd95cc738c29d5dd4650bd7447fa6d600b --- /dev/null +++ b/cccc/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:956bd0f7f54706b65b61ff5478e5ab2625824998f7d466e61fd60bf09450006e +size 4192 diff --git a/cccc/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32687544efa678b8e487e59148d17564b73182e5 --- /dev/null +++ b/cccc/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7b349c42d4ad183b0fd3e0d54e82bf8965c9266486f375da08f3775a45c4d5 +size 8388848 diff --git a/cccc/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15c6e20cd3babcb50fa591bf4212b76daeaa2f12 --- /dev/null +++ b/cccc/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c1aa70bddd4731f670a8abd273ca975eb77c293a91de718b4a4652c21f159e6 +size 25166176 diff --git a/cccc/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..740e91883bf8ca25bf06fd4e7b486a7c45572152 --- /dev/null +++ b/cccc/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ccaf2ad91adbe380b59cebcc305e3c6698ace3c730dd8646371929e136c738a +size 4192 diff --git a/cccc/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95f4c3c6b2a957de1d4cba5b9dbfaf54bfd95720 --- /dev/null +++ b/cccc/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:489fafa809bf9ac971d3b7c618b5d51ee1a8a2b566a99a57a836a80c2c96e7e7 +size 33554672 diff --git a/cccc/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24b05bd50cb5eec33bab8d8e7a0e38257b6e31e7 --- /dev/null +++ b/cccc/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c9b600fa55fe6783827c3d3c78a35f786b58ce271a626303666aaac06dc7e06 +size 67109160 diff --git a/cccc/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3dda004eb5d78b9fb1aae5a1aa6328fc83081ee6 --- /dev/null +++ b/cccc/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124afdb51b06ca330387cc1dde748b98a029caef23503dc4a3fd85e82acedcaa +size 4192 diff --git a/cccc/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..001a6c70e2a15cb29f9428712461f6d73615e1d5 --- /dev/null +++ b/cccc/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c55edc53d498928e0341bc51a9d683d5a486c770f7822a29529756744a6bb775 +size 8388848 diff --git a/cccc/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..495d6a983f919372b8ae282cdbfa924fb3f8815a --- /dev/null +++ b/cccc/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8611304b2b248866c4f65b4bc092a1dc657d79e98c5eef0d96e6158fd0077207 +size 25166176 diff --git a/cccc/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ee755012266387e02f9604161bc4562e4c0df9e --- /dev/null +++ b/cccc/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ded698d0f5d3eae56b1271ee4e5064494a966a75713e77c6a25c2d66482a02 +size 4192 diff --git a/cccc/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02bab985f98c034b812fa690a37b7614cdf16214 --- /dev/null +++ b/cccc/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cab36b4f5370ae11946833e3ff3648fa09b6bdfe9cf8f40de9d79d9bf71c3586 +size 33554672 diff --git a/cccc/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28401dd690c3f0e1578a7d4f1ba72da86b466433 --- /dev/null +++ b/cccc/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4667a2e1fa1097cdb7e95fd6c686b4e8405b511d20ede15161abc4d65aef33bb +size 67109160 diff --git a/cccc/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9f29514c946043084940232851ef0a772359efe --- /dev/null +++ b/cccc/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53decf18a9fba44d4136ada978587daf9b7b4c74439d6c33feb98d603e396bab +size 4192 diff --git a/cccc/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50fb0d8e5c75b6e9f4f39dd1d867d5f0e3c85196 --- /dev/null +++ b/cccc/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fb3fcae9572b8385c5cc901857175dec15da89c618f3993e2ca233753e4a7e +size 8388848 diff --git a/cccc/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ae692c5de0853703d0b2680bc5ca535c0221320 --- /dev/null +++ b/cccc/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f726fc8dac03d22c22c3da6c1832362a066a71e7e1351ead981f17ab99f0fca +size 25166176 diff --git a/cccc/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34fffb350a166f06eb777113fab7d780119646b2 --- /dev/null +++ b/cccc/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8731c0d51258d0e76dd09d4b2c54f6c7916eed9e09e785e36ae5671a2ffde536 +size 4192 diff --git a/cccc/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2de99ed479e6098ec5dfbcaae53b067119310a26 --- /dev/null +++ b/cccc/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4930741fa1e2e66ddfba90ef16883df6b8a96fd4b86b2dd2030d03f9b4fe7339 +size 33554672 diff --git a/cccc/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60c313904b9a7ff7a0c1d68821e1fab5142e417c --- /dev/null +++ b/cccc/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e3776a56980034fba25260fc788c6c53c15f0656361744ec2d9d1b21a43f80 +size 67109160 diff --git a/cccc/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1cb898da76654f23c85c783ddc2735a00adab07d --- /dev/null +++ b/cccc/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e9685850e8010cf67470eaaaf62313fe0b39d457d3da4795d3aacd800a0b06 +size 4192 diff --git a/cccc/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0907834895d88852bbfc7bcab6d9f0dade4ec36 --- /dev/null +++ b/cccc/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a83f572f9149e3a7a6fe697b2007570eea8bf23f26813848d691b222292688 +size 8388848 diff --git a/cccc/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb3ec73a55fcefa853e2fdb3f12f7f9f8b410b73 --- /dev/null +++ b/cccc/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a88c5e33abe8864b93411982e530400b7f92bf08ebcb9fd813b8353df729930 +size 25166176 diff --git a/cccc/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e2c1eda5da6d7a691fcf656332920748ca15c6f --- /dev/null +++ b/cccc/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51f491740826f2958fb1cd3d08484316a77fdeda8985352f1b0620c5ca3575c4 +size 4192 diff --git a/cccc/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3fb4de00441923efccb54eb5cee37e5cc59188fc --- /dev/null +++ b/cccc/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d34c07e6f4a1d9b32e561775d1450b61b5e3d3d099528265fd81c4a9e5d1669 +size 33554672 diff --git a/cccc/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32aafcf5cf8fa0fd6711b09707e4180ee2fd19ba --- /dev/null +++ b/cccc/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf352a88a7d3e691b9981269836717095aad77a46bc9fb7bafff825632644c6 +size 67109160 diff --git a/cccc/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2344ba85f1d4544ffae0cc358f4622d335aa4745 --- /dev/null +++ b/cccc/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b764a077cffe718e9fffb1639c30faf63eaa85c6f7f112cb769773e1b294a51 +size 4192 diff --git a/cccc/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c17dcfef78169f7781275a76333f46283c3fabf --- /dev/null +++ b/cccc/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9b8f88b232187fc733b8b9662c383e6771588f35cea167c7e40a6314968fd7c +size 8388848 diff --git a/cccc/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8bfe0a0431c4c701a1a9737d690f99342766536 --- /dev/null +++ b/cccc/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec0b737a88abdbbf6febe9988051fc6d973c3f9f589907c977cc286d4a4ea9ac +size 25166176 diff --git a/cccc/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8b28b1b5824f507de64068ef6f0d82758589616 --- /dev/null +++ b/cccc/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90c02859c8954eed23a1fcafc593f8e452fffc873a2e170f1f043cb9705b1be1 +size 4192 diff --git a/cccc/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bcc4e7887112cf0c1c832c7f2c16d229a4e25a86 --- /dev/null +++ b/cccc/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:627757af667eb2d1c90f25fdd6139019f5ce3a93ecc0a815d722209ab239f349 +size 33554672 diff --git a/cccc/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52047f42f0fe7090a9c0f18a9f92d3f57afa44f4 --- /dev/null +++ b/cccc/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f82cf33d882a817bb261473a8d8b49f51e99a98d76935d40c2d9752f5c23d36f +size 67109160 diff --git a/cccc/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bae8d82e301174b8cd4191d8aeb8fc0b89f1299f --- /dev/null +++ b/cccc/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ebc0c939562584919afa551e87fb8edeb3f63438cd70a6e7189c0359cfa2da3 +size 4192 diff --git a/cccc/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbbe0df4fd8f2d2e64b99e0224a1191f6f9b08d3 --- /dev/null +++ b/cccc/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b342571c7e8982362fda0026b16c919ce1ba232365690975332df33df068b2f +size 8388848 diff --git a/cccc/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a78af00d688db9eff193c70a62b9f59af1be0e49 --- /dev/null +++ b/cccc/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b8a5305a5af43e75f2eb06985cc58cb70c606f81b598bea3a2056f21d06a80 +size 25166176 diff --git a/cccc/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b40e771f072c95f5456559d1bcdbbcbdd455099 --- /dev/null +++ b/cccc/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0434c33d274483f37ad269e7920f5a8abfcd30d1852f239b921855b98dc50d6 +size 4192 diff --git a/cccc/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc525841218b8f82d2013a26b3d52196ed642f0d --- /dev/null +++ b/cccc/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15f359dc2924a3d7170c485971328f0e687ca3384e2f00f277b0500594df4680 +size 33554672 diff --git a/cccc/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca16dc6844e4b2a72ff571fe36e54d0fdd5f2c44 --- /dev/null +++ b/cccc/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a76e1ac655c27a547b2e570a8606132d716bb4930faa6fbd16e1adfdb332f3f9 +size 67109160 diff --git a/cccc/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c974cfbfb41c0d862fe78e16a4809a9733e123fa --- /dev/null +++ b/cccc/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59648eff2d09ea9817bdeeeaf34ffc9a10b03a828f26eba747c22f1d08f10adf +size 4192 diff --git a/cccc/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39eb67ab5cfe44ee079a1592afad3ded84040ce4 --- /dev/null +++ b/cccc/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c27cd972cfb2fdab4497f28b25736be2630779b84bbea3af4ecb3cfeb3f6ea2 +size 8388848 diff --git a/cccc/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afdcd2d82205e86ab0e70b81072720370fd3b3ee --- /dev/null +++ b/cccc/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89181e5638f26d69cd2409f2c04cae78ec05b9251323270ecc13ec0e1a5d5de2 +size 25166176 diff --git a/cccc/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c7656ba7d34268d68c18dbd04849a9650bfcd80 --- /dev/null +++ b/cccc/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ac803eeba3e0e0273eb10fe4263632c33eb08f8306c0707a46fa6c7cc97d59 +size 4192 diff --git a/cccc/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e2de28213f5993c0ca8cc53993072dfe4885717 --- /dev/null +++ b/cccc/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2e280b8e78d363829639f29a203ad25c816fc3209a21829acfbf956bd71580e +size 33554672 diff --git a/cccc/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdf61d44c02f63b1fa3e37a38e0c2cdff74ef947 --- /dev/null +++ b/cccc/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215c6f2b1559ac96c2d660b8dfa206b187e1f9627d908094524406b8e289c539 +size 67109160 diff --git a/cccc/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c92717dd82fcc88fe41f3587d1101bf895cee5b9 --- /dev/null +++ b/cccc/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e033b0396b335fd6e1edf0536d194720e3ef9947de4a18b8498ab3bf18c4a143 +size 4192 diff --git a/cccc/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..edb4a9eed59ab7e90d4b184647732a0bcb0f5f55 --- /dev/null +++ b/cccc/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a0cbb0ecf73a22e11f846609eafa0f5bca1f85877c680561b5f8e9dc59702f4 +size 8388848 diff --git a/cccc/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a743091748a65e579e42609cd8cc60ea95bb38da --- /dev/null +++ b/cccc/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6745194be662eb0f0fd6d3a98ccefd31c6455274fd4bc35c06b09d0dd9cc922 +size 25166176 diff --git a/cccc/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be062be6e55d729e801051cdba45d722dc40d6d6 --- /dev/null +++ b/cccc/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b42d76a47b6bf4e89119b6201fafb23690167bc04b7593253ab91fc359428ec +size 4192 diff --git a/cccc/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0edc21ded871b1ce00231ff0a3abda265f5a8040 --- /dev/null +++ b/cccc/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:161330c24929ec9f2d0c787718277dc8f0f5407ddf24ae0c5441496afc192461 +size 33554672 diff --git a/cccc/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..151c8d7e074266af0fa708e4b30378dfe97fa758 --- /dev/null +++ b/cccc/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a371b58cffdf942e66424a27a709b7ca56576ba7ef5486e3d2809d51e91b85b1 +size 67109160 diff --git a/cccc/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49b8b34251f0c21bc4d8f988bab5609366a5da85 --- /dev/null +++ b/cccc/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7beea0871798c0f50fad5e39da913c6de01af3a1340ad83f99f5e783d42fd282 +size 4192 diff --git a/cccc/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..288ab1f5aa6dffec277e6095b4a6be4cd53601f9 --- /dev/null +++ b/cccc/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86a52a802ddce0b93bff081892ea8586c3b7af6d52729df40592a9276da3bb70 +size 8388848 diff --git a/cccc/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4276c2e82a5a74d7d0ec5b38dd632672ce4d4856 --- /dev/null +++ b/cccc/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5c036ae9589029c23e7383c706e9c1ee7fc15d781a1e752687c9e3eba0642cd +size 25166176 diff --git a/cccc/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92082028ec306692d071ac5ece7bd6ab9a2d01a2 --- /dev/null +++ b/cccc/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b8a35ac4b1b31a903319a57f3de28f1d51d906d791085767a7298848444e2e7 +size 4192 diff --git a/cccc/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c09a4919862ade92a7e35aaca5766d08ffb3138 --- /dev/null +++ b/cccc/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cc6ae951335bf2ee860537a341c622e2aac03db635efba7829fee3ebbd27211 +size 33554672 diff --git a/cccc/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9519d639cea3b8b65f9617551746ebb6165c9b21 --- /dev/null +++ b/cccc/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfadbcbae90f2a7d3f05dc8936807999d5a9bbe1b55902b878fae0d150e2baa9 +size 67109160 diff --git a/cccc/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..253631e603451e5975862716ec8ce5e3215cf0ff --- /dev/null +++ b/cccc/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74856be45b00eda2f320eaea9477ea85eced5f8904247da4797df98f35418df +size 4192 diff --git a/cccc/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12873e3814adbe72672b15365129f11c3875a376 --- /dev/null +++ b/cccc/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c918644bc5ee03250e1d3d9fd4b9ed95df811f24f9f0f3b1a46d4076d0b22f +size 8388848 diff --git a/cccc/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..187fb709749b9d66e5c933cd9a0431d0ccca138f --- /dev/null +++ b/cccc/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5f7a54da9e9961ab968ad9fcabff9b4cf1bb5cb1bd6ababcc5beecbb01ea8b +size 25166176 diff --git a/cccc/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6188ee6c9aed96b03a2a0db61e1c18042c4a159e --- /dev/null +++ b/cccc/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27047e28323c1e9e6b1eb4b09a4ce1dd81bfaa430e234996de5391063367b51 +size 4192 diff --git a/cccc/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9fb0a2b9603ce1ec5a7810f442f4424a6393143 --- /dev/null +++ b/cccc/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6a6d2b79693659e8e5d22020d854a3d28221e60c85895d710bb25879da434a7 +size 33554672 diff --git a/cccc/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e330e1a4ff87b419b9fc4ac0a0ecfd46aafa2a0 --- /dev/null +++ b/cccc/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6d7ea6021b00d339789dfdb5600079ce38d6584e6f66c6d8859096d6c23a217 +size 67109160 diff --git a/cccc/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2449710c50e067acfbbeb2ea346bacdf5f702c2d --- /dev/null +++ b/cccc/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d86d63cdc91472b61bf69437381deeeb69c02f7a528ca297cb2001b47eb0d2f3 +size 4192 diff --git a/cccc/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..769ddce204f3add8eff74f3e97a498e47805825c --- /dev/null +++ b/cccc/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e72ba07aa7b517d60781451dfc1c0ea1afdc91af040bcd3cccb316c7412cef94 +size 8388848 diff --git a/cccc/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd58c90252efb8099ef9c31589f14604e10aa8e2 --- /dev/null +++ b/cccc/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aaa42e1c744c5d64c2216cdd7343813d61f5756a3c91df1536e471716517446 +size 25166176 diff --git a/cccc/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..743a336a1e50095f7e6c6ba752d3b6e7a8c64afd --- /dev/null +++ b/cccc/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e45eef7677ea478ee83091d04115ac507075add555c8f618018a0569544ec0b +size 4192 diff --git a/cccc/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9979990005e87996fedeb93e6281efcb9ffe24ba --- /dev/null +++ b/cccc/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a69d024d23468964aabc1ad6b8f4b7c9d5fe343549f252e715716f9e48e11ad +size 33554672 diff --git a/cccc/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31f6fb7a0282e0c7cfb2774a4c8a7bdf53ffd519 --- /dev/null +++ b/cccc/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbb5a53a4c2f69df5e487ccb1a1feddd2146c242bcc8119fd23889b959ec8432 +size 67109160 diff --git a/cccc/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3a2674a1fd5f0add8572378c1be7f76701ae0c6 --- /dev/null +++ b/cccc/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:071eb7651d464d965c7d6ec07a000004ceefd9a623e7dd6cdeffdeb7f3f45837 +size 4192 diff --git a/cccc/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bf3510ca20b5854809d7805b706681603af3caf --- /dev/null +++ b/cccc/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c2c5e8f615c08e989325d8693373a51cb66bb5145241271cebe2be6d19731f +size 8388848 diff --git a/cccc/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab5c6ad0a79ba18ef4613241018546e55338468b --- /dev/null +++ b/cccc/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:888c1608306d632c3ed36e909d2ac13b1d541cbfdbdd511eda8330f8a4bc139b +size 25166176 diff --git a/cccc/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50f0bee77a2fbd1ce1e11d7058677d63828a0e78 --- /dev/null +++ b/cccc/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a3c7ce8d937213bdf37815e24420d01a1f634404e140b1554f8ff704f393340 +size 4192 diff --git a/cccc/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ba15c2eaf497b834ec40ced0eddbadba05271f9 --- /dev/null +++ b/cccc/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:390643891e18a10a2660ed36e1dfca1cba8e55f6b27379b1f2062e45fcfbc5b0 +size 33554672 diff --git a/cccc/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e29fc75cadd22fd93572bba878a7d5f44cd894d6 --- /dev/null +++ b/cccc/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea0e383e834371f9f7602af17d0801840dec293dcc00984a469ee0af8dd9a67a +size 67109160 diff --git a/cccc/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0357bc5a197a428b3642b1c3733ea5cd5bc64293 --- /dev/null +++ b/cccc/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:814c42f3b7ea809b0e420ee5845a1c98b30e7e1c5c92e31b631554429f8a9482 +size 4192 diff --git a/cccc/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2975a049bf5f1b8d578939680b1eeb587184439 --- /dev/null +++ b/cccc/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b433d814bf88829b4f3731e623d1db392ab55c5518f0e2079da354986e3be1a +size 8388848 diff --git a/cccc/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2da62de18b205897e8e6113e02758c982fcf6e79 --- /dev/null +++ b/cccc/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c294a76918c22488302e33f6e0e623103a225502b8364f65177ddda630e93d8 +size 25166176 diff --git a/cccc/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f989e0d7827a012c49e1f9d7ba431dfa72b1e738 --- /dev/null +++ b/cccc/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bdb9aa4cb8ec1de042b3ca3d4f9ac3083985427e78e3bb4cc36fa51a0c73397 +size 4192 diff --git a/cccc/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6aa55895c2ba2e044cd6497a96424fae5452d357 --- /dev/null +++ b/cccc/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0313f1684433f9309cc846308e62af7adc4087f7b377e036ea1a12bdcf2c5dbd +size 33554672 diff --git a/cccc/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b8b32e91e4239a3542490c4911736adc2957abf --- /dev/null +++ b/cccc/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7df64c50cb7cc105fce873d42a410dcd7b153d568aaebbc1af09101b5990eeef +size 67109160 diff --git a/cccc/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e9422197b32274b895b586c13ceb5d9d01cb23a --- /dev/null +++ b/cccc/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0733ed8c8f09d886d07fa0d30201699277a83f4b2e9a608556f8950578bf0238 +size 4192 diff --git a/cccc/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83783426a6feb44d98e01625e97d4d8bfd5e6ce9 --- /dev/null +++ b/cccc/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0d6601fc3c93d8eed7e4c5f84b145ae496943811b975bfb4d32fb47fe3cb089 +size 8388848 diff --git a/cccc/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89cbe267350e75c2e3a41c5d3e9723a803194850 --- /dev/null +++ b/cccc/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c902922c379ed6ea70ab40e4b5ac3da9700f6e17bcb0d5eafbb9c43345f05325 +size 25166176 diff --git a/cccc/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb9c9e3391fd60b6a3339a981ab5a52c6f2f23fc --- /dev/null +++ b/cccc/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da9910e5f2b0332418705a5d23e8540ecb985b35197ab5c2b987ee6c452d0069 +size 4192 diff --git a/cccc/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a02d87e47bb224b8290f4d1c244f5537e0f4eded --- /dev/null +++ b/cccc/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ae0623c6fbd9ee5add3126566c843a61a5663bdf41749da56296e3e9cb73bb4 +size 33554672 diff --git a/cccc/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f40b7338b258cea7c7576974b3e7d966a73dc40 --- /dev/null +++ b/cccc/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ba2c6bf60cd71515f316d21c80776432639dce69014f9fd2ed137c949e89ce +size 67109160 diff --git a/cccc/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90e26581d5f2bc2e1517636e560bb21217c8428a --- /dev/null +++ b/cccc/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1330e441e97eb3ea10e75abcb723aad3ee647cff0efb262bcae56b0b50152e2 +size 4192 diff --git a/cccc/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d72f15f66a36e79d0b51beeffa0c1e44457d5ab7 --- /dev/null +++ b/cccc/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abfb26ce42cf2efd55833fe618ebada1dc6de9f44fcb4fe060e810e9ad10fd7 +size 8388848 diff --git a/cccc/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8d9da28630b1e14662bd9f0b4397969ae850e7b --- /dev/null +++ b/cccc/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16965b5686a0ce7a527a5e2069ad3aa50e0903d49283e9ac87a084b6266081a7 +size 25166176 diff --git a/cccc/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82e0266dce44b4c45874d578068477d68dddea59 --- /dev/null +++ b/cccc/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46ab17ac8bd237742b13b0f78790a36816046506dca875569fbf60982fcd54ea +size 4192 diff --git a/cccc/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49a2f959fd315b00cc419672b3f0eee79de6337b --- /dev/null +++ b/cccc/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aba96cbda2febd68802e40f90aff32f5a10e38c8758d1f05262ea4418edc6daf +size 33554672 diff --git a/cccc/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d80d4c0426be364fc2643d584552f3269fdcd985 --- /dev/null +++ b/cccc/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fdd4e80031c375b1ff667abb78bd67be373ede138c13503eab53e6072da5af9 +size 67109160 diff --git a/cccc/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..168e1046e2e09ae1d2eaeb54af5cabba03319695 --- /dev/null +++ b/cccc/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee17f58f12f519ec783a6d24b35b146817396db467cc7685f2643594e042ddd +size 4192 diff --git a/cccc/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e29e832936450206ed3e52177ee49a70f925dc1 --- /dev/null +++ b/cccc/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4e371a447389699585305c650f40c1e0e97e5c084c12360dc28e63a69e0d09 +size 8388848 diff --git a/cccc/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f13c2c442bcbe6a5c0524ea213b49c5da89e676 --- /dev/null +++ b/cccc/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4342fd306d92d0e219c4ed3a003577368e5854b24431ceb3392009313c996599 +size 25166176 diff --git a/cccc/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c25f71ad9752c83cd501fdca57250991df3dab23 --- /dev/null +++ b/cccc/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:785b60c890bbf132e2e39dec45d8643865d9971a2f5c52cff53bdc0c0e09825a +size 4192 diff --git a/cccc/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbfee23d12dd482cfb0c59eb1950d7adfe0a639f --- /dev/null +++ b/cccc/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf285e04b4783a206c30ad8a5f6c9c304cf3e5978a05a022235406c15c222844 +size 33554672 diff --git a/cccc/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4173cc8697b9400e566740680ccb5d000e1ae540 --- /dev/null +++ b/cccc/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2b6bdedce94486c0e438b49bd10b577d6e7730a039c0754394b0bc647c63ff2 +size 67109160 diff --git a/cccc/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..776689b38a3653aecaf12d9eab3ab0b12a22b011 --- /dev/null +++ b/cccc/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a9c1201d173faf0f113490f8af241a378cd9dddb4f25f723a3940e93b8bb8eb +size 4192 diff --git a/cccc/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8cefb1bb6ac024d52110516f174b08728b7d312 --- /dev/null +++ b/cccc/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b57bac19a4de88349ed26d1cd28ea581ab11b44202f8ca323be9ce6790ca881e +size 8388848 diff --git a/cccc/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83c792880bc4dbe7598ee91c9802dc602dee81a9 --- /dev/null +++ b/cccc/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78b82f9c863ffabd16e9cc8f4faf35dd2148ccef8588c40441a3514c06076adf +size 25166176 diff --git a/cccc/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c7465fb86161f4392a179c18b86bd0f0d6957f3 --- /dev/null +++ b/cccc/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0c901dce4c6d54d43e9d644b7f2f497ece6a6b84c3f2e81e37e8c14fbf031dc +size 4192 diff --git a/cccc/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8d050720401b78683326b7b1210cfd73f8f32b52 --- /dev/null +++ b/cccc/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6a2424df451aa18361a2e04727c8380413fbb17ad486cd2fc41ea9672a1e2dd +size 33554672 diff --git a/cccc/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ac20385d6a42b74288423af283f16d728da45c0 --- /dev/null +++ b/cccc/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fc3b107b89729c897b37188f4f49364bbc22402b660e1985b9575f3f0a585ea +size 67109160 diff --git a/cccc/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c851d12cf746d0cd5cff10531c4e97f7247397f4 --- /dev/null +++ b/cccc/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df950b30695c6cf9f716dc9967cab4222a930b7f7e11546e657c90a6898a8a8f +size 4192 diff --git a/cccc/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f2dd5f0903ffefe859712d778cda169375d895c --- /dev/null +++ b/cccc/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65bdbc54f413ee01f72216df93337ea8af2596673643b9eae0bc02a9151bc3d1 +size 8388848 diff --git a/cccc/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f7ce5514c74d6a946590ab20d8533233d8dc8c3 --- /dev/null +++ b/cccc/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b08986bb164fd5e05164c1e9ef88bb8d59a232a1352b6dbb09f80280cec79090 +size 25166176 diff --git a/cccc/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/cccc/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d61b40fba6772815cc2bc7c9437340f1e98ba94a --- /dev/null +++ b/cccc/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:774b574b8d94c47600f56cebfcc53ab072ec73a6b000fa65e7f76e5c4faaae0b +size 4192 diff --git a/cccc/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e947ed1f81d7841a9013e1323e728743c05486cb --- /dev/null +++ b/cccc/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba7aadbf9438890ef151c4284b4c1886bf69f56a300623ab3fd222cf8af74509 +size 33554672 diff --git a/cccc/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e8c4f48d7342298fa31451bff519ee2fb1952e2 --- /dev/null +++ b/cccc/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eeca5185da60b40a282fffb94051726e9c40c5c85558d266cf146c71ca1cd53 +size 67109160 diff --git a/cccc/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/cccc/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d855ba6ffb454b1c2339cc466220c6ad2fe0679 --- /dev/null +++ b/cccc/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3a43d0acd4fbee1d52cc980844f9ba380122203ac25b9b635577abd949d04d8 +size 4192 diff --git a/cccc/model/final_layer_norm/pp_block/model_weight.safetensors b/cccc/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4111d4af93b2b95c9f87804d22881c398f3f3a00 --- /dev/null +++ b/cccc/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ac0b65e9621040969414c0665ac855502199a1b863e8e3d8009baea6c09a0dc +size 4192 diff --git a/cccc/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/cccc/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..980262beb5048ebcef715d2211f08413dd03e96d --- /dev/null +++ b/cccc/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54bfa5c20bf60684e9772584ea4bd7aedf50470bb934ab1a6f2fc612cbb17876 +size 205914352 diff --git a/cccc/model_config.json b/cccc/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/cccc/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/library_of_congress/checkpoint_metadata.json b/library_of_congress/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/library_of_congress/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/library_of_congress/config.yaml b/library_of_congress/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..393f663328a4d0176c33518a486e01f083a6c5af --- /dev/null +++ b/library_of_congress/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredlibrary_of_congress-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredlibrary_of_congress-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredlibrary_of_congress-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredlibrary_of_congress-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredlibrary_of_congress-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredlibrary_of_congress-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/library_of_congress/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5cb7c7121bb53575a81c3dcfd6c659de5733162 --- /dev/null +++ b/library_of_congress/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ca637418b804214bb0775f4d90d333fce550a0ef38dc39225fce576a53a507e +size 8388848 diff --git a/library_of_congress/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..997b8a2fcc306c1bb365a9402adc8804dfd22559 --- /dev/null +++ b/library_of_congress/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b6e25255dbcf8c9e2f761985c197148082698b4b7f48d60ecf8ac045e789134 +size 25166176 diff --git a/library_of_congress/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d1d6e62674435cf69f20685fd25db43f1032a09 --- /dev/null +++ b/library_of_congress/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1df82249444fcb6faa4b09afd8f21e0d127f5dbf8cad7e1c74998d3670ead7b +size 4192 diff --git a/library_of_congress/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b119e56e77eff697cb781a5696466ca53c4085c1 --- /dev/null +++ b/library_of_congress/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1f8d75dda75d1bf8668b5cb49b6a13bab0faa9779b184b683fd86e0e9a3aa86 +size 33554672 diff --git a/library_of_congress/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61b7be886a558ed816bce63a7a5b4ba31d828aa2 --- /dev/null +++ b/library_of_congress/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7332c22afb9e63a97b711c9d90344e6bae5374322e67350a0c900c103c5f3564 +size 67109160 diff --git a/library_of_congress/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d7b3081898a746f44c069d31ad28f575dd2da76 --- /dev/null +++ b/library_of_congress/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfbec84e33f4833f9f3111b486be56de0194c3b2d6ca81cd1a15c2aa95415412 +size 4192 diff --git a/library_of_congress/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb464145418ca4d61defdbe7b5ce4a710bec36db --- /dev/null +++ b/library_of_congress/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:649cf2937910e92c5241fb45d51c3e9acb6532b959b99f1d6b687d35e6c073f3 +size 8388848 diff --git a/library_of_congress/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33277c8fd24cb8563d3f66553d607a713b28deeb --- /dev/null +++ b/library_of_congress/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d373d103748958b2c89046a9925bee6c7cd6786f5f88f02cb1f773b786f43404 +size 25166176 diff --git a/library_of_congress/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed34729d6b81a36519c753d3b395def7961a3ffa --- /dev/null +++ b/library_of_congress/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40069672aea3cc60d9e2f2d091b8f178c727c54434545c356bacb9ddc1fff1e4 +size 4192 diff --git a/library_of_congress/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..860f756b5d80fa4fdc539bd7d48bfbb08cb51f88 --- /dev/null +++ b/library_of_congress/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa7eb8500aa22648fad0cc42fecf532558a5e47705272c6b98ed75d38064ecb +size 33554672 diff --git a/library_of_congress/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9590fa1f516bf57d2d9a7e2e827da2a175204c56 --- /dev/null +++ b/library_of_congress/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a791ed7d713bb6c836d7ee73c2db9c60923b75167b80f77b54065c24271cd1d +size 67109160 diff --git a/library_of_congress/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32967838a67bacb71c7f3cdba5decf3178a4b35b --- /dev/null +++ b/library_of_congress/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcca3389a1ae12483344fcf44bb77324fa497e6e5ef3126e9664aa09bde0da39 +size 4192 diff --git a/library_of_congress/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1cef272e036b567f28f1ed702e6af7dddf819c76 --- /dev/null +++ b/library_of_congress/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81bfeb09e235f720dd746e371958ac604938a34db2a812d71b3623e40c4fd22 +size 8388848 diff --git a/library_of_congress/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95122535e64a895752b5e93285a8f868ff125892 --- /dev/null +++ b/library_of_congress/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce8b6a7529498bfcfd94351e9fba0db908d0743688ce92f4b9decb647c8b1f48 +size 25166176 diff --git a/library_of_congress/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f77cfaad9f8c25e10eebb5842cd0dfa2ce988a6 --- /dev/null +++ b/library_of_congress/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9843b287f11540667d547b144970642f8acb1d786c7b80322938f0208cf261a +size 4192 diff --git a/library_of_congress/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5186ba5af92025e2f25f14faae3c7c8aaf0e379d --- /dev/null +++ b/library_of_congress/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51d0f10bdeeaaca8a2fd1109aff19f5871b631757d63a7c06ff573dabec5d2eb +size 33554672 diff --git a/library_of_congress/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..432b391eb00ed8c8837d1c2964f6d48470b62065 --- /dev/null +++ b/library_of_congress/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee74cd591718df9c52a8e4094d8805e9bb518865ae5865c2f7de34cec53d16f9 +size 67109160 diff --git a/library_of_congress/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82d2d4b39b26ba90af0b859831366bae4ddd5f37 --- /dev/null +++ b/library_of_congress/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e27ba5f0b8ac103be82414f4b3fe7dec941976108cc7f00ee1fe88e80a180b51 +size 4192 diff --git a/library_of_congress/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8f67f46bcc6f4c535f36ecfe544f87078aa8bc6 --- /dev/null +++ b/library_of_congress/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a348f49221b1b25ad64f2d086b8ff8d37a6e29a6cd5280a6db93199a66bb8d17 +size 8388848 diff --git a/library_of_congress/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2bb00f31803f8c54688549db39baaabe9b533be --- /dev/null +++ b/library_of_congress/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdea78b7b4800d79227d51404da8ea1878e8c7935ea6141890ab13a42f1ecedd +size 25166176 diff --git a/library_of_congress/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae085f313ac0e4f5aa78d84a5c0e4e3191bfdd37 --- /dev/null +++ b/library_of_congress/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f5680a9fe786b0c062824144a510b1b574bcfac821cba0e8d0bb7149d35c4f7 +size 4192 diff --git a/library_of_congress/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4fa0afd9d3d2b10c1ef9c376d85c4ef9dd67db08 --- /dev/null +++ b/library_of_congress/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0cd589a91cc24b19dc67a13416e24426ab0add4f2d30140cd02d1b0e5a7cd45 +size 33554672 diff --git a/library_of_congress/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a10de8fcbd83ffa39ed05f5f4e931f4e47cb3db9 --- /dev/null +++ b/library_of_congress/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43ad52a75d14df2105e16db6d1b72018bdcebfdb28835d82642c39fabdf1b729 +size 67109160 diff --git a/library_of_congress/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca69502239aee6259ab12a790fd8e8b4da28f9cd --- /dev/null +++ b/library_of_congress/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87df34bb0b5a93fb51cb25c8584a2d237ae20b6c739089bb250991a35b5e2f95 +size 4192 diff --git a/library_of_congress/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d10cc6b9c8e6badcc223007346af0308f7b2c14 --- /dev/null +++ b/library_of_congress/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:120a00e32efb19812a71704ea8a1c02dba108d00acf4b79d45fc4d7fc22b592a +size 8388848 diff --git a/library_of_congress/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9ae1fa500dbb537894ae5a521aab24496eb7cba --- /dev/null +++ b/library_of_congress/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387295f5216e11817d26a2d271f319c70dcb79c6a5b43e1ec4c26f4c6400a387 +size 25166176 diff --git a/library_of_congress/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8db92bf720a95e4ac50a193dd2c32b5bc56f8702 --- /dev/null +++ b/library_of_congress/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91d1adc9c56253b652c90e4969bb4d67671ee67c7de8e9033c836111babf5f94 +size 4192 diff --git a/library_of_congress/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..793d23f4baa5abc2bfb97a863670e03ea91f9b3d --- /dev/null +++ b/library_of_congress/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd4b257a65397abbbc4cacd1ba51e0cef839c3e510e31fd3d57ecc617bb5556 +size 33554672 diff --git a/library_of_congress/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..164589eae1d3425a7bc29899e7764618329456ca --- /dev/null +++ b/library_of_congress/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b288464676be6a19b8065eb32927d5b41ce907d535b51c81e4040cab20002a +size 67109160 diff --git a/library_of_congress/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ccc26aaf3dd81b7492dabb6d06340349c8f1306 --- /dev/null +++ b/library_of_congress/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63dfeef465c7a72e06f7220a8510e69c258c4a84931da76cc2e7cb614ddef77b +size 4192 diff --git a/library_of_congress/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c9d1cf73c08382bebd85f2aef28d181c096e91f --- /dev/null +++ b/library_of_congress/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edbb8025c05fe03e6173e3eacf265c01105a5106abbb11303d52fdfd6f1e1abf +size 8388848 diff --git a/library_of_congress/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6b7a5d70ce53ffba8dd9faaa643088686d13aae --- /dev/null +++ b/library_of_congress/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286fa65ac8cba4b55c0903a3566914ad26cd13f160a3b3857c8f0354a0f29228 +size 25166176 diff --git a/library_of_congress/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e8fe097da316a7dd9b3e49495da8ec11ef078d9 --- /dev/null +++ b/library_of_congress/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b58186276c20af4a027061587955ade94b092703acb0603e8381e737c7ab5785 +size 4192 diff --git a/library_of_congress/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ed2f47801e5bb409ce8165c04183a3f7d8f64ae --- /dev/null +++ b/library_of_congress/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be1ed49cf5831d60b0e55b9534f0e9bd8b2eab6b8352da22e9df6a76b8f106ec +size 33554672 diff --git a/library_of_congress/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ff419b5680bbe0693b9124c737d3dc13394e319 --- /dev/null +++ b/library_of_congress/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbda3e330a3a566623dcbdc196a789ab4a36d284de51f8b836630c20251f519e +size 67109160 diff --git a/library_of_congress/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b8f0ffc7ba4bfeddec2d7a876840f64d71b7962d --- /dev/null +++ b/library_of_congress/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8304b219f8c6110c2d84508c79cb9e773731b9e57a8a52a176a689055cc6be3 +size 4192 diff --git a/library_of_congress/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30e8751f60eb4a22d47f179b551ebf009f2f1dd9 --- /dev/null +++ b/library_of_congress/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95ded1753688cba73e9151bdb58b96772cb0ce99d35e04e7cd27322a7ac3b978 +size 8388848 diff --git a/library_of_congress/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..316d33912989c6538eb7fa13cc8795bc96c9f500 --- /dev/null +++ b/library_of_congress/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9efff6bce03a2657d11813391b1930281970db0b79415b09c322cef52be01e63 +size 25166176 diff --git a/library_of_congress/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c0b28d37b6e5f85b3c7dd3587cfa03bea325e65 --- /dev/null +++ b/library_of_congress/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a4aee18c116c49da1ed92c64e5f583f1cd91a599c8a03025ee923a32b3851b +size 4192 diff --git a/library_of_congress/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65e25dcc4a30f075f5aac07da6904a328aee93d5 --- /dev/null +++ b/library_of_congress/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b604670cb94254cd9734121b0c329c32f677ae55af8a3f6760b569e9ca147511 +size 33554672 diff --git a/library_of_congress/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4c31a13911e3861421f271a5baefec4b73a0275 --- /dev/null +++ b/library_of_congress/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65df4e87bac5ca2ceb05e692bcae3a60b200bac4791a22323a9ff257c8ff23d5 +size 67109160 diff --git a/library_of_congress/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70b7d122b0d629803a29f11ea90f4003207c8018 --- /dev/null +++ b/library_of_congress/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19ab6fa75e537359c5a8ec4756a149ec51b60e63da14972878b3660cafb2f77f +size 4192 diff --git a/library_of_congress/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..281d77bd7500471bd4ecf8b25e9dbc1884bd0ba4 --- /dev/null +++ b/library_of_congress/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d57fd96aaded939dece4d04759b3a18f2bd885b69b7f6877187c463b78f522b +size 8388848 diff --git a/library_of_congress/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..647e9ee8dba2ed587ec3d22a1f8010e16acf5726 --- /dev/null +++ b/library_of_congress/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29683962a1c28ba72a220da50b4799522b26ecadb3466cdd2697b735c818e2c2 +size 25166176 diff --git a/library_of_congress/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a637f246ec521f1ab03097e1aad5276ef8599f0f --- /dev/null +++ b/library_of_congress/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:613e49b7f776c8d7e1cf5a59fd5997a6b6c2100b4774af9f0854ef1f26b990a8 +size 4192 diff --git a/library_of_congress/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb57efca689f1aae13883e2269f62bb72b4c3e85 --- /dev/null +++ b/library_of_congress/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca09396bdbf49f6786698909e37d58771cc93b9bcb85640d1755fc3423b5cdd +size 33554672 diff --git a/library_of_congress/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e4ad66d91ed3e7a6260d4bd20900243b60a7ff4 --- /dev/null +++ b/library_of_congress/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72ac2dc0d43219ddc89f89ff078aedef7312b1c386c0875e8980fb3eddfae44e +size 67109160 diff --git a/library_of_congress/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4117879907d73a1e8533ed6a7149a46f39657414 --- /dev/null +++ b/library_of_congress/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33409d30bdfd722a2031c928d56c88381c07aace215946271140de503dd2f19d +size 4192 diff --git a/library_of_congress/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffce85a46dc6d840103987afe508ea89a096eade --- /dev/null +++ b/library_of_congress/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae6f137b1bd2530e07f539fba16f8d68978b19d796d41986645e0c86c50a3264 +size 8388848 diff --git a/library_of_congress/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a565293773912c6913974979ff37795f1e478aae --- /dev/null +++ b/library_of_congress/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce91456f60874ac88c54bec67551fe09fef323af49858a28c9bf306443a407fe +size 25166176 diff --git a/library_of_congress/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..289986e845a5ebb7ed2229682b926624b5940e9a --- /dev/null +++ b/library_of_congress/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6e3ac490995a215db5ec30feecd43bae8ced2433adf83b786ef3def79624859 +size 4192 diff --git a/library_of_congress/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea3fac01519fe7b346ffcc937cf6b98e03bc042a --- /dev/null +++ b/library_of_congress/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb6b6678911628edad31b4f7602102ce963ca8c29e6fff5dec8940328548e78 +size 33554672 diff --git a/library_of_congress/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1754483448ba752f93e263b04fb8a81572c6add7 --- /dev/null +++ b/library_of_congress/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d63e60416e6b6a2465d1230e2f3a226b195ebb52187a30c968860e5736057a36 +size 67109160 diff --git a/library_of_congress/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42f1a7f75473afa07cd3db15bd14d36f4d627998 --- /dev/null +++ b/library_of_congress/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124355ae180d46b72f2f65e4c2aeef2b922d9a18e86908e47f1f0d229737c9b4 +size 4192 diff --git a/library_of_congress/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4555b1218778134bafdfd6f201a0a5fab1d8b6b --- /dev/null +++ b/library_of_congress/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0af06468ec673df1a51baf65cadafaeba8cde50fa32c6903ef1a47922932e809 +size 8388848 diff --git a/library_of_congress/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfc06e170562f16d339700717510dbb7f78fb713 --- /dev/null +++ b/library_of_congress/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a8ae0fe7a6d8fd4ac6ff2fcf35ef898b89f9fd625c02acafb1fed948a7af9cd +size 25166176 diff --git a/library_of_congress/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24b73a47e123c5914bdac29f4d9655055b5600db --- /dev/null +++ b/library_of_congress/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e5eb41f4a58979251f979704ff2fec9a0d2308762b98e219f58b9e5d4758cd +size 4192 diff --git a/library_of_congress/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f80a5572bc9b9032938bccd9244c57bca0e5760 --- /dev/null +++ b/library_of_congress/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea833fd7fb600891ff6bd35294d9bcee39f358c1df93e24cee496df5faf1a80d +size 33554672 diff --git a/library_of_congress/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09c9a086c4c089c8e76b12b9047e7ce7cb5c8e94 --- /dev/null +++ b/library_of_congress/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb6faabf51d894f3f67123be2fac12643d4c6537d7da8c2d57425a710affe03a +size 67109160 diff --git a/library_of_congress/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c63f764bcfb8050fa3f5f7a5a37e0ec6db79dcfd --- /dev/null +++ b/library_of_congress/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1f7f8615b66ca36b69e694c297cc9783e761adce854b205f7c457752310eee +size 4192 diff --git a/library_of_congress/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..180341a139044ac1dfb7a4671f9fddc5cf800028 --- /dev/null +++ b/library_of_congress/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53c7a55702cd07b6c46580c9d4c52e4d9fb9152e866429ced2dcbdda99af7673 +size 8388848 diff --git a/library_of_congress/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa1c32ac7c3374141a2346a1b8f533348110737c --- /dev/null +++ b/library_of_congress/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb2eb47310bfad1d8db05b720d46e0cc588f1a8b1d63ca32f8c0c9565cd19fd0 +size 25166176 diff --git a/library_of_congress/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99c1f3ef9e975fad9f63c2348f44b3a13ac31cce --- /dev/null +++ b/library_of_congress/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:540389022fc2f03d64718b9bbe3ff1d342bbe1ad82f7ee09163280b39f813a4a +size 4192 diff --git a/library_of_congress/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..edc2312cd2354bce09da328dfe6c4efe11cf19e1 --- /dev/null +++ b/library_of_congress/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0feb8f723e84532e9faeed6cd64ce537f5b8eab814d45ee8a05b1ca729625f7e +size 33554672 diff --git a/library_of_congress/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..261d6afa50885e87bcd70fdb51d090d6d5425c46 --- /dev/null +++ b/library_of_congress/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a73aab4918a1df94917e28f022fcc88bf96b96ca5bec9b2001f964146d44adc +size 67109160 diff --git a/library_of_congress/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b216a94828ac95eed739ece377cf9afe3c246c2 --- /dev/null +++ b/library_of_congress/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c10ba303cbe7f475b9d6af1a09ce34f075de4e9afd45fe0c94890e00c6ecfa83 +size 4192 diff --git a/library_of_congress/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0dc9c591f38e356efe4230a38a53ef736129ea78 --- /dev/null +++ b/library_of_congress/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e171e50fb39072c78703aab330b7229bcf899fd62384ebbccaa8301b9b3b23b +size 8388848 diff --git a/library_of_congress/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92384700a098a2b91fe2a7c61d1d63d7ceddbaaa --- /dev/null +++ b/library_of_congress/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f4a1c6329f9f4dc9ffc104047cf18b11abd79ec91df636ac6be253f997534ad +size 25166176 diff --git a/library_of_congress/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4354d8047360991a787a3093ae0d0b1d67fff91b --- /dev/null +++ b/library_of_congress/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd8f0836e763beb4664fa8a1697f59a9de0f300da980e02509b19c60858bfb3a +size 4192 diff --git a/library_of_congress/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2553be89c6bda147effd49d7e406b004e2b6899 --- /dev/null +++ b/library_of_congress/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de5d74a4208b98432c6f661f0330cc3fbd2c2207ee8cc9d113fd6b4f148ad625 +size 33554672 diff --git a/library_of_congress/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37eae55d075a1467f7001ddb2a51f1b41607ea0c --- /dev/null +++ b/library_of_congress/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a101a6f9fcab6768021b3aae98a6933e830b2c485a378856516409594666313 +size 67109160 diff --git a/library_of_congress/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..631c4d78ac7b916b13f8caee58640fd911d64d56 --- /dev/null +++ b/library_of_congress/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13b1aa9a701c7e507edfc10541a9fab81f94598a1cd897ed204f92ab94cecdbd +size 4192 diff --git a/library_of_congress/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03d7e4ec41eef8acf7aa10a5596e19d2515c360a --- /dev/null +++ b/library_of_congress/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696e5e72160eaba8fb69bc401187776f2bd531a3cac571d34454eba355ea946d +size 8388848 diff --git a/library_of_congress/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e1742ae2cb2c9f62086506b061c7b058224e8a5 --- /dev/null +++ b/library_of_congress/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db9e266a1fc741d907f1d647bba88d5d9f74b528508249bba1862e9974ed429 +size 25166176 diff --git a/library_of_congress/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b09e131604cc8da1827818e5d7856594d5281b16 --- /dev/null +++ b/library_of_congress/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f62982bd56d2ff590529b8b2ae3dde69b30ea9bb6d90f913fe1229373a95644 +size 4192 diff --git a/library_of_congress/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16fb0f085635bce814974d10f071a44baf1c8a39 --- /dev/null +++ b/library_of_congress/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d5e73ec2c22255555eded84eaa986d963c12f3caa510aac5351e0798c8d8540 +size 33554672 diff --git a/library_of_congress/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44b373a497aeb97179dc5968cff897cfb37e813a --- /dev/null +++ b/library_of_congress/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cd8c33b7f69fc3be0d58d77a87cee41fc6ff098c15e85e27d932f5ab83f0a3d +size 67109160 diff --git a/library_of_congress/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12de4b4e9b078049ce4e5b64c410b5c14b499b58 --- /dev/null +++ b/library_of_congress/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f021b188d0904b7675cec015b59bc9316df72ed0b0493d23bc12b44d65229a62 +size 4192 diff --git a/library_of_congress/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a9933a6a24b18d105ce4f8275011a523ff00026 --- /dev/null +++ b/library_of_congress/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54ad48d8763c34a37ba4e94c817963488fa15d9b3e4fb964ed98dcda827007ce +size 8388848 diff --git a/library_of_congress/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ef22cd9c2c88ebd7dda36649ef3e6b3a4dadaab --- /dev/null +++ b/library_of_congress/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f61cde7d4122ead8ba224971886fc3b0e06f3e3f59bef18e877917f27a72689b +size 25166176 diff --git a/library_of_congress/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ee8843a12fbdf9308cdc6c105be45abd86b4010 --- /dev/null +++ b/library_of_congress/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb01351a74111947f078e391cb134c51df4f6ced459165aa384a15c68e228caa +size 4192 diff --git a/library_of_congress/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c84206137f2b88332e67641b524b38052e68f829 --- /dev/null +++ b/library_of_congress/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b574d8b82d41da23ee1f345419b0c7cce606a1c8f79a92d9d4e3ae91ca1a2e +size 33554672 diff --git a/library_of_congress/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71bb4375abc3c49134ad03831eea7a12bc6b9579 --- /dev/null +++ b/library_of_congress/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76da869c5a3695956fcdffda056841ea4657cd340397dd38b5e3e4c2b96e8f4f +size 67109160 diff --git a/library_of_congress/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95c4d29dcd3c0478dda4f79f057da957930d4c6b --- /dev/null +++ b/library_of_congress/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e608cfbdf3c35e24eb980bc3d6d0190e0d1141a7559ec566f0b04613b4a31a0f +size 4192 diff --git a/library_of_congress/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d593ed08e1f2f80f16d19727109ebe37a9242a3f --- /dev/null +++ b/library_of_congress/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07928b17692121bf07f10fa49d2caa22fa48fd3754e8a318160473d86d69a20d +size 8388848 diff --git a/library_of_congress/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9787b163e80364fa2dcf825de7b89dbfeb8f1149 --- /dev/null +++ b/library_of_congress/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651e3e260acaf8065aaf79b74fdba58176134a60f074bc85001b40e52629a003 +size 25166176 diff --git a/library_of_congress/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b0ad3cca1ea67db7f54a120a01637fb2ecf9cb5 --- /dev/null +++ b/library_of_congress/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ade5e081518e4d6cf8144c92fd8bfd13bb1360c715e40f0810cc49a4da378c3 +size 4192 diff --git a/library_of_congress/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..549e08aefff51c877df6bca1a9c800aabf63807c --- /dev/null +++ b/library_of_congress/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115a742dace2c78c104a24e0f1722254490616cf02ccf9457b56116a8bfc75b6 +size 33554672 diff --git a/library_of_congress/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56f7ba61b7acaa14988bda9665aff65321201969 --- /dev/null +++ b/library_of_congress/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef144736e74121e47610fbb592b32d0abea39c79f9101142f90df944c004b19a +size 67109160 diff --git a/library_of_congress/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2d5edde2d2f3e2ad18b16d5f845d3ae2b80f894 --- /dev/null +++ b/library_of_congress/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23372499fdcf1e9956e59b148656f5ffceabaa555d3fb29908bdb9ff90e1a3f6 +size 4192 diff --git a/library_of_congress/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c0464910b901b3674ff7b9beb252a24ab26d57d --- /dev/null +++ b/library_of_congress/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a5d130d1aa6dacb273a9bb84366e372d244a4d75c147399f6f2ae864054c6fa +size 8388848 diff --git a/library_of_congress/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b6c8c8f60881842712b16889a0d864f8d539546 --- /dev/null +++ b/library_of_congress/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d8e0dab4377ced305d7d4fa4fa15f23abb136c7a3258280824a635840e47982 +size 25166176 diff --git a/library_of_congress/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71371fe04e008fa9ad63170180164bd4b4f93244 --- /dev/null +++ b/library_of_congress/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b398fdce24486af7b59f4eb99b79279386dc9d672cadbcdf33ac48eb2bc977 +size 4192 diff --git a/library_of_congress/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cb80674dc7bca90e4d989419ddc9a1ac726482f --- /dev/null +++ b/library_of_congress/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10ef52a4c6fe9d551da11bf5dd4c07defcd7b61435cb874ebfe934da536eb293 +size 33554672 diff --git a/library_of_congress/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2fc5c75d22e28b84725b4838a91f59fa27da359 --- /dev/null +++ b/library_of_congress/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6271b354d9ec7787c97ae1f8879532269054455d95e4d1b0ac51e1f6aabe2036 +size 67109160 diff --git a/library_of_congress/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d025f80d0646fad95961efb7f439975dd11dd13a --- /dev/null +++ b/library_of_congress/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb60017703a8888b5e99a1c2e241d4bc964a66ce06aff95684aa52e337710d36 +size 4192 diff --git a/library_of_congress/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e7b51976381684457994a0fdf07102166e15e36 --- /dev/null +++ b/library_of_congress/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8978b59f6ea31dfebe5b79cba3166fdbcbc563920fc8a59b4248c60151781a9f +size 8388848 diff --git a/library_of_congress/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9d4b889a9c2f88b36a15659497a68f821204366 --- /dev/null +++ b/library_of_congress/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99100ad498f71021a482198813044cfcc17a857a3ae491791cdd7a3a3bcd4bd5 +size 25166176 diff --git a/library_of_congress/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73539a0a58811401cfec11900d9ca45615c9f646 --- /dev/null +++ b/library_of_congress/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:372d43853e07a597d6498c56063168a0497aaa6db289d67f67156f2eb5789805 +size 4192 diff --git a/library_of_congress/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68425e434b6234d899b3536bfec6a6cf132894fd --- /dev/null +++ b/library_of_congress/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6e7343a1f9098d8d83a033d68ce9c959e709aaff8142a34998e7db266cdeec +size 33554672 diff --git a/library_of_congress/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77aa34f44630f6e77f812576c2687af4a86f8e8a --- /dev/null +++ b/library_of_congress/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8873d7f2bf910d7dd46b4302e41d11d5539f25b7b42296c86913967f4ac18d42 +size 67109160 diff --git a/library_of_congress/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c503049692504aa31a0331c2022083b50cf3e01a --- /dev/null +++ b/library_of_congress/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94471c02dbf88f0d79ba9850a4a83faf4142555f539de059c972e7adcefc6268 +size 4192 diff --git a/library_of_congress/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79bd70e615dea95885cbb79b14e4c4cb7fdcad22 --- /dev/null +++ b/library_of_congress/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:780c508e3e7118fc3be34355c216baa88b9be9f4d9dcab54844e1b3d7d72363d +size 8388848 diff --git a/library_of_congress/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cae579f31dcef3cbfa53bbe47d03dfe3d3cf86d2 --- /dev/null +++ b/library_of_congress/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51bda05f1d98175e5f2666df483353bb446eec4274249a1c18bf3d70ee5490d9 +size 25166176 diff --git a/library_of_congress/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e52017da5f58310880052f53214821b54c4003c4 --- /dev/null +++ b/library_of_congress/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61cc7904e25ffd1293f7039f6bd71bac987143b885a81a1feab74ad789414aae +size 4192 diff --git a/library_of_congress/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9eb1ea5a3d66f0749805a87d05fdf1f063566b37 --- /dev/null +++ b/library_of_congress/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a47a5d7b7b5e539e5ae51356a03470e3eb02d52ebbc902dcbd2b5916a829184 +size 33554672 diff --git a/library_of_congress/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b9f61194bbb046abf88d56b0c1521e3e72c805e --- /dev/null +++ b/library_of_congress/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56654b6cecb1dbfaba59cb017ea172f4a584e3f69506fda30be9a140762f3c77 +size 67109160 diff --git a/library_of_congress/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f2e862678974e8f291007a1d9dd593dd41df2c6 --- /dev/null +++ b/library_of_congress/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4b630b9e97091954651bb136d1043ba70d5635c41188515e245cb874f40c567 +size 4192 diff --git a/library_of_congress/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b44b0960fb94f26c88cd92dd81fa6a5fe8845fcf --- /dev/null +++ b/library_of_congress/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe1e50f36b7e26bc6cacfb635fca5bc2bbcb87cb3b312820d6a082a7c2a905e1 +size 8388848 diff --git a/library_of_congress/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7277c5f4d3ab3c0aa53cf226d5defe267f13f491 --- /dev/null +++ b/library_of_congress/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af6d7c543485217ff8205bc9d26eca100c9d168b860f1d6a022c75cfb2bcefc0 +size 25166176 diff --git a/library_of_congress/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..369eaf7420c366edb55ca718e22fa3bb7628b898 --- /dev/null +++ b/library_of_congress/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ff1ee83ce6eae3d823860fb82e116af0b20fb2f412bce5c148239a3dae6b6c +size 4192 diff --git a/library_of_congress/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4c651fc8e5705421a695caac2d1786d1b3fa8d0 --- /dev/null +++ b/library_of_congress/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab6ec5e8ee3f30bd323156085ea5c8e2fd25fff4b1627f8872c703f8f8827bcd +size 33554672 diff --git a/library_of_congress/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68d99561fa874735e3651d3772c3a82e961e5f67 --- /dev/null +++ b/library_of_congress/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5982e7d5c2eed4c3c777ad75e38f1f23064928c38a8796e3e9bf1120357712a5 +size 67109160 diff --git a/library_of_congress/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f5134540579811ef42acc10bdddeb41842fac6d1 --- /dev/null +++ b/library_of_congress/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b634d41ea2361bb8ecf304b8a5da17d6758ab8f35eaf414a123b8781283e573 +size 4192 diff --git a/library_of_congress/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e649424bdf7389fa33e26f6d71afa593bd87b46 --- /dev/null +++ b/library_of_congress/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a92be7ed9d62e9eba603085bf858f0a6f0030cd825aff92e944d69cd6b1585b +size 8388848 diff --git a/library_of_congress/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52543b681f34b8c4c8ccb10bd19033d739f97f30 --- /dev/null +++ b/library_of_congress/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:103d426ca8c152e862ca0b6654d3740212395165e6db6019d3a3fb173beb32b8 +size 25166176 diff --git a/library_of_congress/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a9f8b0402b6bdd36b5302a8b6cc549ec47f7b05 --- /dev/null +++ b/library_of_congress/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27bb2d5022f6fe56eb952fe2889b56eb3af81dfa2173e4a5c6036d603e67bb00 +size 4192 diff --git a/library_of_congress/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7007de97f08c3ec2eb7933749c7717b2235ce7c9 --- /dev/null +++ b/library_of_congress/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9867b0535db51aed664d5934d6cd0f58e1fc1e275c6ee76ba0ca77179013a135 +size 33554672 diff --git a/library_of_congress/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50bc39c9bd8502cd379649d20546338fb794c445 --- /dev/null +++ b/library_of_congress/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:711b19bde2354be3a85fea9dbd7781adc5eaeb9ffd171ed25ccae9daa7238a8e +size 67109160 diff --git a/library_of_congress/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c2521561f47c742c149d5c7969b2702101df173 --- /dev/null +++ b/library_of_congress/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10f818038857accce5fd71575056c951d1baf37f489fba87ed1c1733a4a63321 +size 4192 diff --git a/library_of_congress/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1b416f2a50c0867db6a3a44c9d7990ce27adce0 --- /dev/null +++ b/library_of_congress/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf117ee4be0d9ed07bd5bd97c945e3d7383a1bcf329aa1dc2e43460114d77eb0 +size 8388848 diff --git a/library_of_congress/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9af2f32307161be4adf81064b20b20b5692b879 --- /dev/null +++ b/library_of_congress/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d949062d3738ad5ed69680cc5e023f1bebe6da13eaafa84e324058f5874b929 +size 25166176 diff --git a/library_of_congress/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb7f30054c037c330392928f87c83f831e9a315f --- /dev/null +++ b/library_of_congress/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd4b61586c45e620246ff040d42f51f1ae7c159b26b786c028f83cd4940318f1 +size 4192 diff --git a/library_of_congress/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18f7d3d2283549d8a95ae04ff82dd519c3b71958 --- /dev/null +++ b/library_of_congress/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24174b2bdde7bf097d5e5083f82510c6ab98038b3859231bf134cfdb1c333430 +size 33554672 diff --git a/library_of_congress/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..063c6de05da7ebf3e3a559afe3bc8d1133dceeee --- /dev/null +++ b/library_of_congress/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:344cc53a44e1e559f7ab0ba363479f0b2c782dc95b3d938ead28629fd61502b1 +size 67109160 diff --git a/library_of_congress/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a11e35774d24432d2ff67be42abe69269502818b --- /dev/null +++ b/library_of_congress/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb57fb7e72cb879e6a8d93c40a51abe4403420eceb0c387a3d5f5e31d60ac0f8 +size 4192 diff --git a/library_of_congress/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59bfa232fdbada6a790a7196f647756e4d67e7da --- /dev/null +++ b/library_of_congress/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c791b99cc839ffba53dbd6bec6172e871ea5d30e3eb657f12fcfdcc025ed986 +size 8388848 diff --git a/library_of_congress/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8959d224bba3d4f5dba07b127b1ee69c69d3050c --- /dev/null +++ b/library_of_congress/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f859820801174116d484097646a0405e2f1f912714e26bc9d919fb20a5aeb2b +size 25166176 diff --git a/library_of_congress/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d003bcd7699e1673abc3fb22a3702249460c06c3 --- /dev/null +++ b/library_of_congress/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c6781d03df3a4ba789eba9b77c007835f0b644e88d19cc820aa231de6add0e +size 4192 diff --git a/library_of_congress/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1be58bbf201499cc5ae7a1cd0f279a1c6a8400ab --- /dev/null +++ b/library_of_congress/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:792aa1342f54133b0e91ec5707e08fe3cd65c82dec62efc5e1e23a2d14b61024 +size 33554672 diff --git a/library_of_congress/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d05779ed4c8f396784ce78b59194f20032d0bd7c --- /dev/null +++ b/library_of_congress/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2474dccdb29a6af76bb876ea09637ad91ee82f176adf675ad5275dd5d6380f42 +size 67109160 diff --git a/library_of_congress/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13196862373c3199ba48dc47bd21e8a0a05b059a --- /dev/null +++ b/library_of_congress/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43147640e7984aed53020c922a25a5912262d5a8f64126572f2c2aee765cd98e +size 4192 diff --git a/library_of_congress/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa4659141d8b451b48b06fa0d14cfe51615b2f7b --- /dev/null +++ b/library_of_congress/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373283c14fa1627636e46d344107c15a67af6ddc7bd3084b3eaafe8a09d026a8 +size 8388848 diff --git a/library_of_congress/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63fd572ddb051e11c7c6222d13083c0a7b222b5e --- /dev/null +++ b/library_of_congress/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3434f34b0268d9e2eeeb309f0f348c6cb11f0d7d8ca6924a9b57a7ed1170fca4 +size 25166176 diff --git a/library_of_congress/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97b38f7d3a7b23471551c4482602e00761fe4314 --- /dev/null +++ b/library_of_congress/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0469898c127f55c1127c73c96b3b95120a4eca64fe89436368cd21e857aa9c4c +size 4192 diff --git a/library_of_congress/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9348ab2dbf66b2204614db865346f023b3623ac3 --- /dev/null +++ b/library_of_congress/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2f69b72c9b5f06802733c0b31609704eedbecab6d135a1bf5d123c1a645db13 +size 33554672 diff --git a/library_of_congress/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6531d4904c9828626a276e55f66aae0e7e473292 --- /dev/null +++ b/library_of_congress/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfbeab59e202aa8ce95ba7922fd92207292c1f83ff7e38851ee22f2679c5809b +size 67109160 diff --git a/library_of_congress/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f95b3e08295fa3a5519d615c82ff511910ac2f39 --- /dev/null +++ b/library_of_congress/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74796cf00ff7f4da95d4c47e4f716ce1c7a8d3d2ac2188c26f6647c07470e448 +size 4192 diff --git a/library_of_congress/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a460ca40029c7f3295646acc680f06d0f35d0677 --- /dev/null +++ b/library_of_congress/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd78dd1c68af9f969268bf81b70966b69bc696f0847cde9983882cc0426bcc46 +size 8388848 diff --git a/library_of_congress/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c472f33dcb682d24fda569dc8604806bd7ff6b5 --- /dev/null +++ b/library_of_congress/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:747de56c073d26d80b424dfde31f5453e969b515e479bceb3e9239866a02632b +size 25166176 diff --git a/library_of_congress/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ab7f77d1f08b8146add9034c58a4fc17dde1b37 --- /dev/null +++ b/library_of_congress/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63d4d67082918b947344ba6964ce44470b4a24bed7450807b2d09a750cc41bba +size 4192 diff --git a/library_of_congress/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c1ce6d53ec2b104430457086fdcec5c58ece9d9 --- /dev/null +++ b/library_of_congress/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:282929cce2fc5819fbdbdbe0acc7399897304ed745affc4b3e393e04b30d49a9 +size 33554672 diff --git a/library_of_congress/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6ebb4a64e9b0926df0e1bacca342f9d9f143cd9 --- /dev/null +++ b/library_of_congress/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10f28d2dffd47dd06f516a3bb70310023f8b80056eed8d94d90973412351dc09 +size 67109160 diff --git a/library_of_congress/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/library_of_congress/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9bbf828320594d4f4a4faf67ca0a85a8f6b700fb --- /dev/null +++ b/library_of_congress/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6abce3dc3044a5446f4dc7d4c82b2ed8792e06361276d6c38f2b46e688ad1eb +size 4192 diff --git a/library_of_congress/model/final_layer_norm/pp_block/model_weight.safetensors b/library_of_congress/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..004a8a916fe8f8b8b12dc1f2eb92e86b30313a04 --- /dev/null +++ b/library_of_congress/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b976bcfc75c54d4c9909783c17a8005eedd4bacc3b8ebbee534aa40b76612b11 +size 4192 diff --git a/library_of_congress/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/library_of_congress/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7fcdb3ebea56992ba2d27e31aa38f5d8e542556 --- /dev/null +++ b/library_of_congress/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cced93ebf872869defa9ff6356e52dfc3247a39b942d1ef6415fac1deabbfc2e +size 205914352 diff --git a/library_of_congress/model_config.json b/library_of_congress/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/library_of_congress/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/peS2o/checkpoint_metadata.json b/peS2o/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/peS2o/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/peS2o/config.yaml b/peS2o/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cecdcc2e96b730c19c0d1dc26368fbab958bf2f6 --- /dev/null +++ b/peS2o/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredpeS2o-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredpeS2o-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredpeS2o-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredpeS2o-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredpeS2o-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredpeS2o-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/peS2o/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..feb6ac48a3251c2a0366804ca2f75ffbd3ea1d41 --- /dev/null +++ b/peS2o/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e41b6a4f2e20b9b6db09f54bb2733eadbe75f0699a1071cf05ea17a7265087c +size 8388848 diff --git a/peS2o/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..207965a1eb50ccc299542d4b9aebedbd70b8996f --- /dev/null +++ b/peS2o/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e89e105fca5ec12475ec6924fd39b353f86986a6254e540bcb6397fdb9b04c +size 25166176 diff --git a/peS2o/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7628119992934f59992368f589330f9d2c0e186 --- /dev/null +++ b/peS2o/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2117401a093c211c758851d80598e45fcc07aa617768febb64639454a998d27c +size 4192 diff --git a/peS2o/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8cc3e4b086c6dcc80b49d4efda9df59a2b28dd6 --- /dev/null +++ b/peS2o/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e81ddb8e98aad17fc6af680044c60e731de3f92b8912685611942329156efe3 +size 33554672 diff --git a/peS2o/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..230655a3becb292278afd52a2e452da6ec88874b --- /dev/null +++ b/peS2o/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3e54e0ec2c3c509f36d63372410c8b7bf56ad391aa4cafed44c5f692d4f8ab3 +size 67109160 diff --git a/peS2o/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2199af9a07a1c5f0568b26126cb527933ade8112 --- /dev/null +++ b/peS2o/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6c6a108fa4c4504f38d8ed1268bdd9329a805b89d34ab3617256ffe47c4aa3c +size 4192 diff --git a/peS2o/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c3e8e21556e5ddff371da5090fda6c5a66c85de --- /dev/null +++ b/peS2o/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f51254db543f56bfbdef964c473074ebd640ebe9ee1e7b303b32489b1139b9e +size 8388848 diff --git a/peS2o/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbfe704edcc2669e0eb1bc153396c96a48c7c3d0 --- /dev/null +++ b/peS2o/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11bca7ec8e5235fc8ea9891dd32f18dcafffa979c7dab840b57bdd5363a084f6 +size 25166176 diff --git a/peS2o/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77c0d39208e3eec9a96fff755e8c05419f855703 --- /dev/null +++ b/peS2o/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9748bf69e91a01897496be7933c43deab00a80a8692888fd26d3686aec09dc33 +size 4192 diff --git a/peS2o/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5eb1e4984c2dc6d0f722bb671c75428f94fc044 --- /dev/null +++ b/peS2o/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dedafd52d77a65adb2b6fb0176a9a841fd92e2b29670eb4730f35921728fd1a4 +size 33554672 diff --git a/peS2o/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9c137365f4731293eeb8506f9d1772a8da1410f --- /dev/null +++ b/peS2o/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffee5d39686ea2e8cf9726a1d5db45289a37f51d3715b95c8853a9697034c1de +size 67109160 diff --git a/peS2o/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb5fd0c5d37cf8f138bceb61febc4be15b7fecda --- /dev/null +++ b/peS2o/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13852310c9156ec2444960b4961eefe82bb8d48af43e8bb55489aeaabd83a0ba +size 4192 diff --git a/peS2o/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..740e340d2250ac5cd4abf8e3afa4d69c5c59b5e7 --- /dev/null +++ b/peS2o/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc882094ebe5119a2c43d16d08fc22a502cfd5cdfe76c965c8f273066232976e +size 8388848 diff --git a/peS2o/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12e2465e1670be5d9ee4d3046bd58a63a92d0b19 --- /dev/null +++ b/peS2o/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e3cdb90bf6e7098adcbeca80acb36edd408a6b4e31a1e90a195efcc8d2fbc7a +size 25166176 diff --git a/peS2o/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77d02ed34848ea7c277b3f7ff408bbdfe5e2c5ab --- /dev/null +++ b/peS2o/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:928ba1feb625456146f033d60a13ac249daecae42524e50c61b59c52cf57b3bf +size 4192 diff --git a/peS2o/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ecb73bcf11a98fa6367052fed18374e5c11450f --- /dev/null +++ b/peS2o/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a09c240f161e243c4d46275eb60f72fe5cdccafa3b146c1476ddffa8f154a4ef +size 33554672 diff --git a/peS2o/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9162b114d02678a631a25c9b2edee4e09d5bc62d --- /dev/null +++ b/peS2o/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d17b161b81a2d4cb9063b3e4f570d05df2c1be32c7c6ec1678e03e94f04f2f60 +size 67109160 diff --git a/peS2o/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47fe720d9fcc7d5dba2af22fa5a3937b594958a8 --- /dev/null +++ b/peS2o/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e01a4bd690b1c299b5e4c1cd8208731c0726a03ef489663deec4de9af0c2458 +size 4192 diff --git a/peS2o/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..beb5f1306b84417be68ddabb8232a5c1dedf8d90 --- /dev/null +++ b/peS2o/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a2ce4419ad46c3f1a1d4d11f5d0ca0dfac8af4bbffbf54d2225d8f1a2201a8 +size 8388848 diff --git a/peS2o/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..645dc11e41e7812a538516f480be6a308c84a2e5 --- /dev/null +++ b/peS2o/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a04ce667c542b33aa5f4fb40d6cb49fd0308ab9f3a734be7b8b75561e32000c +size 25166176 diff --git a/peS2o/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4bd0e0baec81d405428e47d8c54a05cc9e8a65d5 --- /dev/null +++ b/peS2o/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e167e23ecc9ff476c3f78e52f91e031a5a9ed82e9ba87c11b412e482e7a351 +size 4192 diff --git a/peS2o/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ef82ed02cb24caa553e3cc726e97318d4505b26 --- /dev/null +++ b/peS2o/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90acf99a44f94a2a62946bc223b0a9bba1ab79abfdf67969cd42bcdd17939338 +size 33554672 diff --git a/peS2o/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ac2a0dfc9ef9fe4efc63996f7c222d5d2db7b8e --- /dev/null +++ b/peS2o/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:940bc35ab9bc1b63670b179501c885f1173684e4b32a97fe56d344f98831f7c5 +size 67109160 diff --git a/peS2o/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12fe7356e3d15e65e1f2bebef18e3bc6b3fca34b --- /dev/null +++ b/peS2o/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:310752dfc80d43f9cdbc4d83845d35f5b02b51ebe1b62b681dcd50a433fe5956 +size 4192 diff --git a/peS2o/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89544cde247b6bb6118d37cb3d4fd5370d4bde15 --- /dev/null +++ b/peS2o/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4a70f110d3b6ef9710cf37ed7e38bca130bffc516852fb449d11be2c9e8ddb +size 8388848 diff --git a/peS2o/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b31829b85ae4d38f424c24dec0f748e800615fec --- /dev/null +++ b/peS2o/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a61955755251f1342f4bd4d816874259dc1cf6a3a841b42a8ad231154b373e50 +size 25166176 diff --git a/peS2o/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9afaaa7c0586c25ecce2c5c2385f5d47a552114a --- /dev/null +++ b/peS2o/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cea0e4e808325317f0bbc4ce99656117ca8463a6763dbd00fb74ced46dc0c1b +size 4192 diff --git a/peS2o/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..998a9f073ceb2adc916df527763a73c8dbf4ec0e --- /dev/null +++ b/peS2o/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d59b953d4351e7a8b12bfa1f15d97b0c9c0b0055725c36b95b55a77b8f3dfad3 +size 33554672 diff --git a/peS2o/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a250385a92a5b33bc79c5d80d5adcc3f4e20a42 --- /dev/null +++ b/peS2o/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e3c39f4a79b8e903e0012bf007721048be56aa9e97a59fcbe6e21093f5e3015 +size 67109160 diff --git a/peS2o/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53de296950fea792402a49072d68fa1d529a793e --- /dev/null +++ b/peS2o/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32a489bc3a973ffd671bec04a3f3602f168ef44fe6754518817fe088a2524dcf +size 4192 diff --git a/peS2o/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21ea2910eb2e6050fbca84c9b8363581bd46a785 --- /dev/null +++ b/peS2o/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89a9fe24f3fdf7ef2f6877c3af4fa0ad54e77d10dcb6e9252cac3239025912dd +size 8388848 diff --git a/peS2o/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..caf496c3d3c460fd3a9b09d3706e0773f3c723b5 --- /dev/null +++ b/peS2o/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11894a212a17bae086a487a5e3bab2e02b5f5022d94e942f2a94e561bb54e6a0 +size 25166176 diff --git a/peS2o/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f15a09438cffbc696a65824424b380ae59ff48bb --- /dev/null +++ b/peS2o/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b540762c27bd43db1fd8cb684b33300db3d7523bc5921d10618ecd0af372cd0 +size 4192 diff --git a/peS2o/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2202073e4b93d8e33f73c082bdfd6bc571c87b26 --- /dev/null +++ b/peS2o/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08a689d7c488a99be5f33f1c810993294b1b263541183d1bc221c7cb928eed3c +size 33554672 diff --git a/peS2o/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b70bfd74eb7af0cbc8db73ee10b25ceb177086f --- /dev/null +++ b/peS2o/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2371ebcfc53f229a99ae365683ffcaa6a3a1328d449c31bd2dfbe9570c906c2a +size 67109160 diff --git a/peS2o/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f5be97383607bfc513def502d125553dcd89547 --- /dev/null +++ b/peS2o/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2f2229549fb086c89eacef195d2fa5be26101c32f8954bb35403393b84d5852 +size 4192 diff --git a/peS2o/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d12a56b44c091491575ce80dda082415c59041a --- /dev/null +++ b/peS2o/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:562a38ecf6aa51e9dd7de95c67a93dc8cc056380216148f5a6d4b462524d80d1 +size 8388848 diff --git a/peS2o/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ad204952720cb65691d2928923be75bc2172fcd --- /dev/null +++ b/peS2o/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd013613d88448651f6eda9ae1db88b634da03e756d9c80e7a96c0aea12dc905 +size 25166176 diff --git a/peS2o/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d39a78ad7dcfac1b2c24aee14fc6bda4639a292 --- /dev/null +++ b/peS2o/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30eb40075da97a32264d470af4ca990e68cf260c1de62a217ac1503192914131 +size 4192 diff --git a/peS2o/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e35fe9f1c4437f25bfb8909bf745fec8211388e2 --- /dev/null +++ b/peS2o/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e573850c10486897cbf8fd0b4fd2e8ed802e874b3fae58e48e2400010ec6c74 +size 33554672 diff --git a/peS2o/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60812f32f6e219400b25b13a89f19dddbc25774b --- /dev/null +++ b/peS2o/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25c74798b428550a277db54a078d636f0c5af1c519323e69f7a9c3298bb0895f +size 67109160 diff --git a/peS2o/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c209a24f463a2c9b73cbac931b3eec5f82f983a --- /dev/null +++ b/peS2o/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a4ab39b3e63207d3318c9f599d6af1bb0659cf1708e0cdd46f25b56de8d4023 +size 4192 diff --git a/peS2o/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4179e1990154e8ea9c420375a035357811d1a02 --- /dev/null +++ b/peS2o/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4b3847b97c5586aa9b388120b3695aad221aff8783a67575dfd2043937a2695 +size 8388848 diff --git a/peS2o/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6270db7be5ea4d1ae5f85416f0bc6ebf21562d4f --- /dev/null +++ b/peS2o/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea1d77ffcf8a0b5ad2482c9ef65da89e2f08d368552e1857644cadc2ea9652e +size 25166176 diff --git a/peS2o/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f806510124f673704cfd7cecfac5fea8f1f74fb --- /dev/null +++ b/peS2o/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0194d97fa73a55115ba6cbbba3ddecd1ba572c73dc216c74cc5cecca6791eab +size 4192 diff --git a/peS2o/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cb4cb11fe5f740a59e5decffc9e956cdc193b4b --- /dev/null +++ b/peS2o/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c42898018bb74164eb9b7fbe7e6ec389eb070101a2ce860079e39f429b347bec +size 33554672 diff --git a/peS2o/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec4ace370807bbec72d6497531f9da738106ad33 --- /dev/null +++ b/peS2o/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:833283689bf4bb152564590080d25d086e36b0b88f1601f673893be238292074 +size 67109160 diff --git a/peS2o/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0607aafdd9dc0c38f9efeda9ce12dfd8381cebc1 --- /dev/null +++ b/peS2o/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8067b4fafe249075a9856455cb1c8048b7018228974f5cad1d0a31b00ff9d20f +size 4192 diff --git a/peS2o/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bcec781c09e905981ac0b524a1abc32a1b8947ed --- /dev/null +++ b/peS2o/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9890bce9db6dd7ee497fb4e0684e3e35c2be23e8ad43334897f1ff8816a049c1 +size 8388848 diff --git a/peS2o/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..311d8eb60f07cd1b6735d6e63ea76a94ea40ab2d --- /dev/null +++ b/peS2o/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbfbf94fbb83c426aa972c817dd697360b82cf5af4bd47d691d017812a77eedc +size 25166176 diff --git a/peS2o/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a8957faec3a0589e10133c2ea4f889fbd8d80c6 --- /dev/null +++ b/peS2o/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:738c7a9ee40ed4d0e18f211b08b5a5a512fe6f75cd173fa434a560dd45778311 +size 4192 diff --git a/peS2o/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f700e8a509032069cf54bc960d05c3d2f34d5011 --- /dev/null +++ b/peS2o/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aef817ccb2ed72aa420e51b008e15d0161e31189330b98e4df8d063925ecfff5 +size 33554672 diff --git a/peS2o/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b41d648d79622ad3db205c5d3c3c2734c99242a --- /dev/null +++ b/peS2o/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94646ae741d43f7bc31d82f7dbb0279c2b9cfee3ff7d36ba7f54d2a626c7bd60 +size 67109160 diff --git a/peS2o/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8877d9a0da97b739409a22f6674fa15c1948a02 --- /dev/null +++ b/peS2o/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0630e54c2de8cf335149090c1c7d2f68ca0bf42585402ce7b21c7f4a50b60e58 +size 4192 diff --git a/peS2o/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4dd97d6fa4687c64d04504a25038384da5559e6a --- /dev/null +++ b/peS2o/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4057e661e5a391600e61779b795a579d11da6c091d774d5351bc1611b4052854 +size 8388848 diff --git a/peS2o/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4000ade927241155e3ea929cfb11d487b5ac720f --- /dev/null +++ b/peS2o/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534b06e68230172787a6dce20b5caed5c04b786cf38c5831bdad2aa43b6fd9a6 +size 25166176 diff --git a/peS2o/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea1390a645b84d927c5b84c9b2ac1841102dd484 --- /dev/null +++ b/peS2o/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed9dba4d962d63fd95dfed5648b4823517092429aa80c3a5b99da43741a035cd +size 4192 diff --git a/peS2o/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03c6344bd1863285b311266946d949a774275d38 --- /dev/null +++ b/peS2o/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:074f61fadd116bde4c645cf5a23c84176ee959860fcfe96320356782f07a5941 +size 33554672 diff --git a/peS2o/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cbb8db5cfb0ef9fcc8460a501b7f934478b7590f --- /dev/null +++ b/peS2o/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27a7d60571e263671337c1b42a727196e2ffad5a3bfac11aeaeaa1d4dd176024 +size 67109160 diff --git a/peS2o/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..026386bb21c13571928e8691b56855e63580a716 --- /dev/null +++ b/peS2o/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3ec9fd5730008b679ac8fa8c5109cd60d1af7bc130a8780fb19da287fc8d793 +size 4192 diff --git a/peS2o/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a67ce0d6b69fd4bbe30210e5d486ae3c56cf670 --- /dev/null +++ b/peS2o/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:521237e24476f5f7e00638ae7975c2bde07a7f59ed0ed3cad528b27abd120698 +size 8388848 diff --git a/peS2o/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de29e4eb4325e91b562bbd78a786a7d5d0ed1210 --- /dev/null +++ b/peS2o/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb9636ab7775a37437c8a31a0f8d43a8a05934058f2d991d8bc4fcc4240e2da +size 25166176 diff --git a/peS2o/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb9e7b423ecd762db9e70e36a81fe62787e5e11e --- /dev/null +++ b/peS2o/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7367a457b69670b403f503c26b1c3c276f37319d06a63685dfb50a3b679d096 +size 4192 diff --git a/peS2o/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25e13699c389aa0805a6502b5d5724e509c8216f --- /dev/null +++ b/peS2o/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d57abfc1f3dad3a4adb30be85feff32d7b3d5d5129d5547e4d5e317de519c92f +size 33554672 diff --git a/peS2o/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec6e3f7fe8411e0c8ecfd8a2b9fcc56803cf6a27 --- /dev/null +++ b/peS2o/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5fe02adae175f93eef7c97944d8dc9b9dec0e5dfca8ce12c6e1ff617751a8d +size 67109160 diff --git a/peS2o/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d7b64035e88a6c317bf696de703550ba68910249 --- /dev/null +++ b/peS2o/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0454aee64638e84f855e1c83489b0e8e992c7dbaac0afb176c790ba1a89ffdf3 +size 4192 diff --git a/peS2o/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81f2e094bc874946f5cdd8d78e094427cd5cd463 --- /dev/null +++ b/peS2o/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc14f6411b675c7b81cdb2c28a5099c7f635061eeb890833f753dc5d104c3d82 +size 8388848 diff --git a/peS2o/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29a879c7b65a5bcf58cd4d1dcccab8401abbf38f --- /dev/null +++ b/peS2o/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb8dbd8da896047bb15adaca7c77ffe45530a24feff2d291c0cf11f1f7761f0 +size 25166176 diff --git a/peS2o/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0e7734e153e673f12004f139336e12d1ab49d07 --- /dev/null +++ b/peS2o/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32bc15cefec7766a5ba20d19f37fa3578bdee5edfe19e733f01e3af5a1567c1a +size 4192 diff --git a/peS2o/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e99550919c8100d28e51a214af27ed1c0dcec89 --- /dev/null +++ b/peS2o/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eaa237a77c0a703608d8d08d9dbe7ae1019b822407a867f532918f024cb61fa +size 33554672 diff --git a/peS2o/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ef2c8d7370d1b7a2b00d035aa4cfa42856fc66e --- /dev/null +++ b/peS2o/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1d3477972b8b627803c449319ff6fe09cff0f2f5dd4f5da4f88813939339e9b +size 67109160 diff --git a/peS2o/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8339beda55756ddd9e95d952669656def24640f1 --- /dev/null +++ b/peS2o/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc5b82036e0c31c6710a21272efe45757d29f8a1c86b4533573f9169584e6b81 +size 4192 diff --git a/peS2o/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c1e65552cfc0aa44648b171ba081eb816e509d3 --- /dev/null +++ b/peS2o/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89a6c1ce78e0056094e9bb2f67a2448fe2fc1fd8d69b0e6a24c7e4a8294dd819 +size 8388848 diff --git a/peS2o/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..166adbcafd37b0212da5df0063fd9b8ab0e2dcb6 --- /dev/null +++ b/peS2o/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5197a66e914d98687f4ceddb5f2f6b8c5f32e48826ea4f91258af72b0703b66f +size 25166176 diff --git a/peS2o/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a968359998d60aa22c1d7c7318ec7a2bf0bbeddd --- /dev/null +++ b/peS2o/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54b9a8ccb780578aa2366dc7de3c62ef49a87be9cea7dcea60503d98ec26216 +size 4192 diff --git a/peS2o/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49d2229c5af8263d5b9ba486a5ccfb1a723e37d1 --- /dev/null +++ b/peS2o/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:322a59ffcab79a0484cc212910a63454e506fa1e7e2df4dbbce15def2ebba150 +size 33554672 diff --git a/peS2o/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..001fc05cbfcb4a5aecb3d51fdcdbcd50ae62489e --- /dev/null +++ b/peS2o/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:129bfdda5190ef2033f16a19ee2a083db5acfa4863fea6db819133ec1064347f +size 67109160 diff --git a/peS2o/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed78c5ac6797ff1a0eaf79b007eac52e53c6f8a4 --- /dev/null +++ b/peS2o/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e37500fd5bac8015f89b92d791fdf0ede333fc25a37afc53318a1113c784b52 +size 4192 diff --git a/peS2o/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a3cff399d5d578ce4335fdc1b830ce2efc92436 --- /dev/null +++ b/peS2o/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ccc956986ff34cff7864d271132b16b83f50dcfa6bfa0c25f1884763cb0abf +size 8388848 diff --git a/peS2o/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..445860d0be278f7356174e3beb306c8416020ce5 --- /dev/null +++ b/peS2o/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:623b9e58fd1199487f4382deb5bbd097ccd57e4182e603eacafb4673133b46ee +size 25166176 diff --git a/peS2o/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66242d4ddc0381ac2c5b5205cdf5ba853040335e --- /dev/null +++ b/peS2o/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72624cd7e8471122557d9af279be056b86be1c0f2d9b7f64a0015923f4123a81 +size 4192 diff --git a/peS2o/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d40950b96b016d02e452520e3c51ea6ae80aec75 --- /dev/null +++ b/peS2o/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06d34798ecec52ca4739432e2b67c9b693c8d42bdeafe873131eead79810a0d6 +size 33554672 diff --git a/peS2o/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..115a3d2dc146e75d3156f70d44d0d7f898778214 --- /dev/null +++ b/peS2o/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2d2d76272cc3bcab4e7e64968fadfc3d89c13e089f9292d8290a52e4baeb9e8 +size 67109160 diff --git a/peS2o/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdd69238b4bdbaf4c40fd8008517b6987ec87303 --- /dev/null +++ b/peS2o/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8665feddba5a1c165e5d1efb1a7d27d93b89bea7ad589197e93600bea51d5ec3 +size 4192 diff --git a/peS2o/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8d59749c0a17a9b0543000e11bcaed8b22074dd9 --- /dev/null +++ b/peS2o/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0abc05a5b13912d680f675681b7654e43e6d00f34644541fe334c4fb84dd6246 +size 8388848 diff --git a/peS2o/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01a16966d76df0db4a5a8e65add0e751f189b02e --- /dev/null +++ b/peS2o/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ab98569504684f1d0fdc1120e042622221661c8a90edcd06bd1ef71c9044df +size 25166176 diff --git a/peS2o/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b8fc75a758c8017150a29266eb36a2add7e08b5 --- /dev/null +++ b/peS2o/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0642154fa42da53a6b34c85652c9fbca25b5f0a524bd9669a43efd9182776f +size 4192 diff --git a/peS2o/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60c39c73bcd435eba81cfd4195318edb50e482e2 --- /dev/null +++ b/peS2o/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa4e5c223c5f242b9aef9842cea71be6eb9d6ffccdf834108375db913cbada5d +size 33554672 diff --git a/peS2o/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e22fc5122230feb2658277d2487bd67aa8a475c --- /dev/null +++ b/peS2o/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f46ed9cad80df2a3d70197faca0743b5c76304c98db5b6e1dd9b407726331443 +size 67109160 diff --git a/peS2o/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..879bd18147e87f23bdb49b75ecdd14fe601c84b3 --- /dev/null +++ b/peS2o/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ad970cc1e374527478bc2fd26e4e37b57bf9ab97dcf7bc37422ee32b4e6aacf +size 4192 diff --git a/peS2o/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a921f02a079c9ec249bd1429f4a5b8d030535185 --- /dev/null +++ b/peS2o/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d344875cd93d4b5e917379fb7d85512cb5d87ef68b9c579b3b88df08ed2634e +size 8388848 diff --git a/peS2o/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3f4690b47aadc4026ab462edb79d2a1953a964d --- /dev/null +++ b/peS2o/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ad4451a6ca9c0bf430071dcf3468052b7b5205a2a9d85d8ba18b8096eb16a75 +size 25166176 diff --git a/peS2o/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43f715fbf92a8ff1c8fea2a622159d88f15ceab7 --- /dev/null +++ b/peS2o/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a98bcfba2e05bdec9ac642b9656469f65c1e7e1af2cbf137b52bad4a60d4f67 +size 4192 diff --git a/peS2o/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfcf0ec61c3de37416dbd408452a5ddf27f1fad0 --- /dev/null +++ b/peS2o/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b13caecb4f5d68f7436c01d455a2f33dd01b56c03f8e15dc7a6dec11a8b4ce85 +size 33554672 diff --git a/peS2o/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4515ab3c7d219e4f42fb02e42fd8b5a832f1416 --- /dev/null +++ b/peS2o/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7832c82a0059e0227cb09a602b016d3690e524de57ca0f05b3019dcc34b4dc0b +size 67109160 diff --git a/peS2o/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67a4254c3f1a9b720a029a37e01d065798aa98a6 --- /dev/null +++ b/peS2o/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5913b847c878126c757aaf54ca9a11d601713701f1108cf3ad63a32fa72fac5 +size 4192 diff --git a/peS2o/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abfef225c7a5ac446344c81e44c2e68bcc5c0d6d --- /dev/null +++ b/peS2o/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7970a7a0c8578ca3bb5d5bfa90a68cd5045e585c08b8fe9eb5bd6da4b271172a +size 8388848 diff --git a/peS2o/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd000835dec01f2c469f207422c04a03303285e7 --- /dev/null +++ b/peS2o/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ca83ac81dc77e103e65332db03ac2caf092effd8f46e3d0ec6a7332f61512d +size 25166176 diff --git a/peS2o/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3b431901024dd90204370370836d5e608b41735 --- /dev/null +++ b/peS2o/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b7676ecd9cd73caba414dfc3ab4ddd07c7d4b991984fd7adf2cf2cbbe5108d +size 4192 diff --git a/peS2o/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6ad50e73d5ec36890f885c9d4671f730e96ee97 --- /dev/null +++ b/peS2o/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5fe9e4b281c65019c61729b4d4ae1279f31c75939f5a2a51dc5380d81466be1 +size 33554672 diff --git a/peS2o/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f8b7c5d10c1af1f66229b316e7f454013ee3121 --- /dev/null +++ b/peS2o/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6608e908d31207f4864599196fab7dac7c109d1a78a9e2875befc8269d7c082 +size 67109160 diff --git a/peS2o/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c12ad4e64f7831a038e55f55b0b2ab4b2c3a2e1d --- /dev/null +++ b/peS2o/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:542b5deafe074d87107b7734ed08198537c4773f2d517495f6a1bfdbfa5d1047 +size 4192 diff --git a/peS2o/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..501eaa02055283d7762f7f197df421720df69279 --- /dev/null +++ b/peS2o/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:662caa631b1163b00595cb8db7afa358c876712aa1c198c1d462a663b62150d1 +size 8388848 diff --git a/peS2o/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c42d3ee6d3039784282eb0818a5a3c43235a4060 --- /dev/null +++ b/peS2o/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6460d38beab8124e46f061aba652c51c9ee1c4f909b02e1a6ed90280089852f4 +size 25166176 diff --git a/peS2o/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b923b6253f4e96c5eed6b43a614fcdd4ccb6d62 --- /dev/null +++ b/peS2o/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c1fa02808cbc3174434f776c44637939c038e977f56e9d265afb292c6c637bb +size 4192 diff --git a/peS2o/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..426a45a581e091307a0f6f045457c18029a39147 --- /dev/null +++ b/peS2o/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b2ad3fbd5847dd7ed147fe30b7b4e856c828a833e76f419a069e2c3edb9767b +size 33554672 diff --git a/peS2o/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..efbedece5ca5e78eef564de1f9595b9d430d34f6 --- /dev/null +++ b/peS2o/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c487d3661459369c1cb3adfc0da446cc4b2f6b571336deb268b54be542d5135 +size 67109160 diff --git a/peS2o/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c1f9b2aa7c97af191b7f43800cb0e9f18328965 --- /dev/null +++ b/peS2o/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbe7df84a6b82c9139ad743ee682d8348b868b0b1dc46e7230d8bf7dbd373f57 +size 4192 diff --git a/peS2o/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c6d771eaf68afcb1c982307d5d437b9cc4dad59 --- /dev/null +++ b/peS2o/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af6568b5e6e02a4589f5ca3c563f7d235529eed572ea3f17b6931b681ba7f020 +size 8388848 diff --git a/peS2o/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3dd94fa3402955ec1957f624901b9218501bd480 --- /dev/null +++ b/peS2o/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a1d396840d91662bcfb0207c3604673de6e6291544d25f385766953ca6fee09 +size 25166176 diff --git a/peS2o/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ccc15afe62813525906192d7c79447a80fc7d4e0 --- /dev/null +++ b/peS2o/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c83ae556f409357199bc9389d54028d9598ad481d9ffc84f88045637108aa1 +size 4192 diff --git a/peS2o/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fea3d7e2fa85d520f88ff3349b2acf37bb8fb9b0 --- /dev/null +++ b/peS2o/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed656f6ca30dbb451e25502dd6d28dbcebad78a29eeab792547a39de2e3b9b1b +size 33554672 diff --git a/peS2o/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4444780749048cd6b0b7d687b20285c96c55f8dd --- /dev/null +++ b/peS2o/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd427c0d420dcc17135d74ee5efeea6d2246b48dc061d0a97c58e2be833c6178 +size 67109160 diff --git a/peS2o/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6da8b8ec49a297a5fe5088ebd45fcc1462a6de43 --- /dev/null +++ b/peS2o/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af6babd9d197f6831b9cd8caeec9d17e333ef1ccdcf05a4d25a97c5f6cab06a +size 4192 diff --git a/peS2o/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8fc7d34d83dd582250fe51205863db19ba3a831 --- /dev/null +++ b/peS2o/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26fbafe891a3627917a72c98827ed3d3081b34a3759128066ab236f30e5153f2 +size 8388848 diff --git a/peS2o/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..588909523ab6c38c96fd0fd1d25a1ba2b0425e15 --- /dev/null +++ b/peS2o/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40b888c8b53fee35ed158b8be55fbba7adadf7c54ef6b6a3ab0b81c3e8b85f2 +size 25166176 diff --git a/peS2o/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f397288085ffb35d2bc3adf0d72cb6213e9ef43 --- /dev/null +++ b/peS2o/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f10bcf796c04f2fe40db8a2011a307ef7d6eb3fe4ea7b36cf08e08f5b5b720a7 +size 4192 diff --git a/peS2o/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbba2e5783b86e23a6a037987be6a1753d914173 --- /dev/null +++ b/peS2o/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c888776668ec1a0b6df3b82e032e7033360569a9258b756cd280af97739b963 +size 33554672 diff --git a/peS2o/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29bff046ab72bd49fb5d8ae3db357303a3e9887f --- /dev/null +++ b/peS2o/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8297e517e7bb934c61713532f421e579df0c960d58d5928b90d8c8cb94f25e12 +size 67109160 diff --git a/peS2o/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7fb96ec685a0495e2ed2963edb76655f2cfb16d0 --- /dev/null +++ b/peS2o/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:105910479f7a31a0f30609fe3fab1fe5344457a8c20680974a79b63aa390ad76 +size 4192 diff --git a/peS2o/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e749b5365c65cd077c836840157f0f6f05f70d6 --- /dev/null +++ b/peS2o/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f419e06e72f95bcd1b0eaf73feea55adbcaed0f57e7bc958804a2a82fb09200 +size 8388848 diff --git a/peS2o/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c9b0f7b6086dcf77d338b2fd66994926c4ce309 --- /dev/null +++ b/peS2o/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6370a12b1b7f1fa6f9f462a6073edf8ec640eb8f8df8509a72a91e92dcf4ceb +size 25166176 diff --git a/peS2o/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4db2664c8d7a1a4b3086df493b27def02cbb7987 --- /dev/null +++ b/peS2o/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94af743b1b378ea3699e82ea060a3ce5f8384bf772457e2b9a57ff6fd9d22e1f +size 4192 diff --git a/peS2o/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2467ae968d806cda6c27b166ebc81a276ffb5bec --- /dev/null +++ b/peS2o/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15ffe408930134e7016f38dbb22066d7b7af9c31679dee7452e8d16b26ed774c +size 33554672 diff --git a/peS2o/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7db07d8bfb23659e0128a4da825bbe7fe8700b1 --- /dev/null +++ b/peS2o/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e46b8d8b9d09503cd7b0f94923829a0f9ef8df91291b0959a9c3261f6d64f37a +size 67109160 diff --git a/peS2o/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..573a010546685b84d980d34917b4685ed296935e --- /dev/null +++ b/peS2o/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67650a26fb42717809cceb6933da8fe2248e8e4edb2e6123d479cd815ebda8b1 +size 4192 diff --git a/peS2o/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d70ef77270edf3586481041344288729bdd9920 --- /dev/null +++ b/peS2o/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a547f9ebdcb452ed74f5876d15a2e80e9d21b581fdbf14bc67157ab2c970286a +size 8388848 diff --git a/peS2o/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30c65fe1213863e205075b398402894f9c860aa8 --- /dev/null +++ b/peS2o/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c4c4668ed70b195c1e3425209a61a86590b96c6cbd9cbd45394e572d1148720 +size 25166176 diff --git a/peS2o/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6d135c2738665bc8eb89b1f6f1f41ed51c71247 --- /dev/null +++ b/peS2o/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ceff1fe89590b317c92951c3281c6fb31b0fb328e1ea269733c401a66f9b14a +size 4192 diff --git a/peS2o/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f586b386ca44522c0305f4e98ccada4f0bb0322d --- /dev/null +++ b/peS2o/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7481e1e77143d1ffe48e212b2e312cf78176e98504457544595118879c26cf8 +size 33554672 diff --git a/peS2o/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e0a8384a9f7c354c25065add80f37e187cb9d47 --- /dev/null +++ b/peS2o/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77ba5275acfd29c62809a6d90a9fab8867aeea1f36dfc4549572a1162c19c711 +size 67109160 diff --git a/peS2o/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce554e8e7aac2e704cf51e6019bc5034528e9ab8 --- /dev/null +++ b/peS2o/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da0f11f5d4e0a045089b278af7ff332a31f929fa1ef149f5793bf3854698f94f +size 4192 diff --git a/peS2o/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4842a78e62075ec631b18aab3f15ae85bbc8795c --- /dev/null +++ b/peS2o/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4c912b2bd49f4e9770f3ecc5fe82d68261cd5a976d1ebfbdc131ffe2460dbf +size 8388848 diff --git a/peS2o/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88f703c5f7694cb1b967eb3225e6400bed4b56c7 --- /dev/null +++ b/peS2o/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec96741dfc6548509d1f31ec31b63d4906fe3dc4c8ec1c2344d04243931e9d4 +size 25166176 diff --git a/peS2o/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd8eb88c572d7325d46d0922d15e9e1d5d1a25a2 --- /dev/null +++ b/peS2o/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1937705a136f0cebbe5d49928360eb2a96aba365791aee7925f92ac01d7d47a4 +size 4192 diff --git a/peS2o/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34aede7f11a1fc0ddc8bc7bec44e4f0075d521af --- /dev/null +++ b/peS2o/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0db099f21da54eba3e0f479b39757071954ecf77ffdaf85496cbb8f0e114ddb +size 33554672 diff --git a/peS2o/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdb9468025ee63c53c34dc7803b2945fd0e1e20c --- /dev/null +++ b/peS2o/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c9a948c488b7e6f09f5e6a06967449ad09e28c69490ba724b31fe83bacd0445 +size 67109160 diff --git a/peS2o/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86a5029a6b0f3870997e527f1f5699e51ac8a07f --- /dev/null +++ b/peS2o/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f2655c7d2c635aa923d41960943b7fb954b4d01d66bcfa4f401db5172655d89 +size 4192 diff --git a/peS2o/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d1c9f7f65544f37a0e02c5993fb07eaf775ea55 --- /dev/null +++ b/peS2o/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1702ed0794f3b9a19595c03307a02e93dd5b5761f88b4d9660a38ac3c0266383 +size 8388848 diff --git a/peS2o/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..461abe7cfce931416ec9088defba707a223e5888 --- /dev/null +++ b/peS2o/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b154f1c5fd652913e4af0a7c887666289167be24e60c418dc31344d179172aaf +size 25166176 diff --git a/peS2o/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/peS2o/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..786d2b90c7bc4f7dddfb9d87b8eb5224e3e3561a --- /dev/null +++ b/peS2o/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad48fab4844c0be7cd813ef11f2ac0c8ba1319bb1748c20544f958b27320bf4 +size 4192 diff --git a/peS2o/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c25a43f2308bdc02c7b71474b20fe4269cfb742 --- /dev/null +++ b/peS2o/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1e89d2e13729f3819dce2cb255f96ff4380417d4b8e52bddd6830b31afd298e +size 33554672 diff --git a/peS2o/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e319cef403a60903bcfe3f2a28c484a8d6f524d8 --- /dev/null +++ b/peS2o/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e34ad4d6c1e87dc2879d7d4fe8e6b0c1ab8e39d71d66ded15b2f5cbd9dbddd7a +size 67109160 diff --git a/peS2o/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/peS2o/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85aabb08cc385394d9ee0554f3d27f0b4e3332db --- /dev/null +++ b/peS2o/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6c375e9194e79d24659393973143abbc2ad81851eeb7690ebdf12c42a3cbd6e +size 4192 diff --git a/peS2o/model/final_layer_norm/pp_block/model_weight.safetensors b/peS2o/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33c124050589e493b9bf2ab8519377a0b4980524 --- /dev/null +++ b/peS2o/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d47654f4d4be1e4d97099bfc8891526d71ab541e3a013e5f3d7c3628882d5bd +size 4192 diff --git a/peS2o/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/peS2o/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..add52c93ffbd517523cca607b50b90f6dcbc1fbc --- /dev/null +++ b/peS2o/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d902fc6509d349d5b1275f942439102f1b65bbd56ff01f613de6ebd41176770 +size 205914352 diff --git a/peS2o/model_config.json b/peS2o/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/peS2o/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/pre_1929_books/checkpoint_metadata.json b/pre_1929_books/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/pre_1929_books/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/pre_1929_books/config.yaml b/pre_1929_books/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..93d823d2f34db5f34aee1280e2b2ae70eb889641 --- /dev/null +++ b/pre_1929_books/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredpre_1929_books-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredpre_1929_books-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredpre_1929_books-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredpre_1929_books-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredpre_1929_books-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredpre_1929_books-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/pre_1929_books/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18dfbda71ccec1cac1605c082f1b2abed53448c3 --- /dev/null +++ b/pre_1929_books/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c16a27f6612a797dc9bb61fcfefbf49e110ebc4852f248a4e4e714dc1de7961 +size 8388848 diff --git a/pre_1929_books/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4bb536d771be3216d1d4f82d478d19dcc968045 --- /dev/null +++ b/pre_1929_books/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827f74a02c093f29686ab3ec28183b2e7eab6ffc7e11407987869086182be86a +size 25166176 diff --git a/pre_1929_books/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4b0c1aa2df468a4f60f1d8bb086fe467d44f1ff --- /dev/null +++ b/pre_1929_books/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcff1a91f110a1b864880ae63524029706ff51c042b423e3622f0c59b456a54c +size 4192 diff --git a/pre_1929_books/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8088d17c9a4a3c770581b675eb5b7c9e570b834d --- /dev/null +++ b/pre_1929_books/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc57a5c635084629a21e95edf30487965aff0d91d698b748d46aac1b030f92d +size 33554672 diff --git a/pre_1929_books/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a073290b70f48869a6f448e0660102fd410fe93d --- /dev/null +++ b/pre_1929_books/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1a015d409e33fddec3eac57e2b2dbb0bc5af13546031b2763c5f20714824998 +size 67109160 diff --git a/pre_1929_books/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f878f64dbf85cfbb8e662f2ecc0c75cb2d310ae4 --- /dev/null +++ b/pre_1929_books/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee7576b0f4b1a058cb69896697a24b297580c7d54393675dd5c479757069c0eb +size 4192 diff --git a/pre_1929_books/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea5d466df02a7a6293bfdcaf03fd51bd8db14633 --- /dev/null +++ b/pre_1929_books/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed59a87a6735638493f1af63a2d7920e2c34bb3a0f2ec68cf0777ea92382099c +size 8388848 diff --git a/pre_1929_books/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fae13681d72b29dd0b0aca98572cf538d6dbd3d8 --- /dev/null +++ b/pre_1929_books/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6eeaafc6020e7ef4d1f22f1d59320434b638c74af5dadd2d4935d7946fe9fd6 +size 25166176 diff --git a/pre_1929_books/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..553a28c657536c6412d34849ec0ab7f8a9ba2672 --- /dev/null +++ b/pre_1929_books/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e297c0a988ca0047d53ab7fe2799ee0478549c242e2b2d0c29f44bbe78abcdd7 +size 4192 diff --git a/pre_1929_books/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d3deb95d6116747ae18cb52989353b18f265806 --- /dev/null +++ b/pre_1929_books/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf7177297222346b7b5a307b4364a14cf7ac9828b923be5f0448431f901d054e +size 33554672 diff --git a/pre_1929_books/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..664ee4558bf89902e8808e1a1de819a0522111ac --- /dev/null +++ b/pre_1929_books/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dac9e874dd1547c19ea70e529806298a3b36834247df47273141be4b51a40a06 +size 67109160 diff --git a/pre_1929_books/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c43b4e86aafd467790a8bdea5f90532ff79ae1a --- /dev/null +++ b/pre_1929_books/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2cab0f9a0df7adfd442e19ecb5d69b4a93214f309819b890ac6d77458b6ebe0 +size 4192 diff --git a/pre_1929_books/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04a7b2d88aa60b534c0796d7d65d7100505d2c6f --- /dev/null +++ b/pre_1929_books/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:defc07a85a5b0e51b6f97681a348f443cf245170e7ae2c542e3d71dd54756ae3 +size 8388848 diff --git a/pre_1929_books/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f86966aaa9ed85c5ef7084f9681b1b7277f689f --- /dev/null +++ b/pre_1929_books/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c52289b46493c1d7c695df82e5695a6e7bcf5bd8abc1dcb40208da7a6a1271ad +size 25166176 diff --git a/pre_1929_books/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8fe5603c27431e037243ddd6e684f85436ac9fb7 --- /dev/null +++ b/pre_1929_books/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcb10ceaabb285e4523dda71a7a1ec0bb006458da5b0afe36a6a7bd084b958e +size 4192 diff --git a/pre_1929_books/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f75aa9362b0415d4bb9e19ae689f16dc6a0a44e3 --- /dev/null +++ b/pre_1929_books/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e05370851f951139c0bd94007a78b1511933217cf2e03965c5a4a8f86afdf37 +size 33554672 diff --git a/pre_1929_books/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ed4247b320b83ed6448937e01e61a23dd423acb --- /dev/null +++ b/pre_1929_books/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:567db8987daad2fdc9e73b32c5e892ccd7533f3dcc9bda6072738fcc188da10b +size 67109160 diff --git a/pre_1929_books/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f90cb1f65019ccad0ffcf02f1d2912e8fb2ca2f5 --- /dev/null +++ b/pre_1929_books/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15077e3f8033873029be15c9256ca7a25f7e8d5ce1c61b0e5beee3b2d3a5fdf0 +size 4192 diff --git a/pre_1929_books/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1d64f14f24bb0bccdfde93a73b1e8744ddede13 --- /dev/null +++ b/pre_1929_books/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ddd5574bd584dedccdfea95ee8296d63339295432e1e9efc8a1ef157c69124a +size 8388848 diff --git a/pre_1929_books/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ca44b8173bea9c5c4ab6ae0794f526d7ce8f59d --- /dev/null +++ b/pre_1929_books/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b9a7bbbfe0f17090f89c2f43e2ddbc035d15b4d72c01060f2ff73abd640ed09 +size 25166176 diff --git a/pre_1929_books/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2d9571a569057b58c7890015859c35095815fba --- /dev/null +++ b/pre_1929_books/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d43183da3b141b33844c94298bbc57bf1c1bb6d46609a47ef3a90571e13690f +size 4192 diff --git a/pre_1929_books/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3076682dae568c0f32ad63bf169373d83c1e8d68 --- /dev/null +++ b/pre_1929_books/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fab98156f0e7e79ad421bc658a0a90a215f1c52ed34ceef080851127bd741bf +size 33554672 diff --git a/pre_1929_books/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c18a0e17673f669ce882575591dad92012cb667 --- /dev/null +++ b/pre_1929_books/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c848a08c76e4a45d0d9119facbef41b9aaef265ef9dd6cb03f07d3797c4deafa +size 67109160 diff --git a/pre_1929_books/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5a47d7e5463e449a2891f8aeaba5fe0745a28cc2 --- /dev/null +++ b/pre_1929_books/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd3c86032f411f80cac2dba7f830a07fece437e3ac8eda0185412cfd3bf5d68 +size 4192 diff --git a/pre_1929_books/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386a5ce882cbe817a8b4fd7fb959d6dd249f791d --- /dev/null +++ b/pre_1929_books/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c198ce028fc493c2361090b56b37068ee1ba0e917aacfafcf55178972f7c579 +size 8388848 diff --git a/pre_1929_books/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bb80d526bc51c498ac2a6ee804fe62b81e2cde6 --- /dev/null +++ b/pre_1929_books/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c04c6673f0f3f1ebc6c40c7b538add64cfbefe37f41b7e478c520c8a0ac0dd +size 25166176 diff --git a/pre_1929_books/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..269380fc3af00e59218703093875a5dd6f7576cd --- /dev/null +++ b/pre_1929_books/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab77fe183ee6acdb7855b7064e99ef8dcc7e8938ea9ca1298e7fe3664c3a194b +size 4192 diff --git a/pre_1929_books/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c5f6cb9813908d96d221f0f30e918d284b481891 --- /dev/null +++ b/pre_1929_books/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc001a965daa80512f32aa01e6ed3523b238c0a969b2676a7555354bbeb7559d +size 33554672 diff --git a/pre_1929_books/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d078b5d7e0936a1d758ce78a70f9d07fa01d838c --- /dev/null +++ b/pre_1929_books/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51144f75177b3516b4721d7eb4012dcc187be327ede20611bea724c95102d01d +size 67109160 diff --git a/pre_1929_books/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3fa3a7ceda7a87bdeb286933c95dd573fe0dc037 --- /dev/null +++ b/pre_1929_books/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c7e86d48cf2e89810f80d46cc8522c45d7d9996ce06072f171357d14de2352 +size 4192 diff --git a/pre_1929_books/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb006e39ecd6bdc417e7c4ef67e68ac22104c57e --- /dev/null +++ b/pre_1929_books/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:165f99d47469664f2454158fd76a34bf0d9075a7818b8dc94719d78f32c1f1d3 +size 8388848 diff --git a/pre_1929_books/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44e9216a19ebd0f5c0b64fea41846d2e33b29cad --- /dev/null +++ b/pre_1929_books/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f44ec0490e58f4d1d85ac6c598a6e127a3ccf76d49deb4cda4c1b3755bc5d009 +size 25166176 diff --git a/pre_1929_books/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d395d84198e0c8b803a035a38d5f43cd9033483 --- /dev/null +++ b/pre_1929_books/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d797a6cec02fea3310c5f8917b9c7b3189754c4e5c621e816fab4fd53991064 +size 4192 diff --git a/pre_1929_books/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..192929696b1606856d87f9f10a490f9881f2b464 --- /dev/null +++ b/pre_1929_books/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:171186a5edd8ff7ecdcb6335175af89d9d083acbca6958242b84186a5fd6ba20 +size 33554672 diff --git a/pre_1929_books/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c09bc055ce36cb9e3a5e02cf6745ef10cacb5733 --- /dev/null +++ b/pre_1929_books/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73442c5b3819e2555dbe86f5d695970befad632cb2274c96e5966dee3ae70c63 +size 67109160 diff --git a/pre_1929_books/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9ccae920ed0be8abca8905df4551a5a5865bc14 --- /dev/null +++ b/pre_1929_books/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:789dd299c34a0aa4478a99507c9a36950667fc43d30bc5a6356fe273f4410b4a +size 4192 diff --git a/pre_1929_books/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28b44c3a49f749c6f2bbfe22094d85d5975b39c5 --- /dev/null +++ b/pre_1929_books/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751bf8f5be2ea369e83923f99b798a31cc788ee6447061ba366eb6cb5a9e1660 +size 8388848 diff --git a/pre_1929_books/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3fd2bb45e2ebd090bc0b962cbdb22718e6188d89 --- /dev/null +++ b/pre_1929_books/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b865d1a14d9bca48bbb1178f5954004b43b21c88abe126c90c4721e9f4647924 +size 25166176 diff --git a/pre_1929_books/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75da47eca8da664933d58f34e416bed95677120b --- /dev/null +++ b/pre_1929_books/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a39c8e5487be9d8e511901040059150fa1c1cb661dfe469c5e8cdd0c8005447b +size 4192 diff --git a/pre_1929_books/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c5685151672cb6e2a87e0f8690e3d54ef9e3087 --- /dev/null +++ b/pre_1929_books/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df48f12de6de86860f75c36d472af2cbf64d6170c659a263f7915e8b5b7f6d4c +size 33554672 diff --git a/pre_1929_books/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0a45a8a04946aa20e5e94a2fad3f7319c623193 --- /dev/null +++ b/pre_1929_books/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b5fc4636667f0cf5eef809ba6efc2548c29b19b029b047d801d0807bf8cb90 +size 67109160 diff --git a/pre_1929_books/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..253d9b9b75b29a38d42a9af63b756c4e70f01da6 --- /dev/null +++ b/pre_1929_books/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ce54d33c570600c4f2b72e160c3f4d517516fb9fcf1fbca1734bb32b43e8bba +size 4192 diff --git a/pre_1929_books/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cec9f220de29c7e3c54b161c9d81b5b69d7003b2 --- /dev/null +++ b/pre_1929_books/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f13d4e7bb365b5a88f68a4f82ee3cdf5601fc61d2618e85e546827ef3a8fe8 +size 8388848 diff --git a/pre_1929_books/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a436c678213f9956fc069f89716afc048a36cd1d --- /dev/null +++ b/pre_1929_books/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3856b6f2dcd8d7a7c47f7b7a41b6d389b4887138c859bb4463b2303112db66b +size 25166176 diff --git a/pre_1929_books/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4807dca4e6114d48c4d2f3f747eff8755b94525 --- /dev/null +++ b/pre_1929_books/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e54874fe5e93153213ef641f241969f6eaea7c7a20a0c423bb2f49ce359be79 +size 4192 diff --git a/pre_1929_books/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60260eced7104a1aab46490f60d6544797ad2c48 --- /dev/null +++ b/pre_1929_books/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:927dc717111c77c774360c56d5230110fa01734c2c67ed10a768da67d70837cb +size 33554672 diff --git a/pre_1929_books/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a0c8787495ac268c9b9bc06521a8f9624b926ba --- /dev/null +++ b/pre_1929_books/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53a73d60946ce2508fec28d8b39f764f60d618385dc0a9500e4c6c68f010086d +size 67109160 diff --git a/pre_1929_books/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29f758fff1ca7952af3c0aebd4f53be4084cb559 --- /dev/null +++ b/pre_1929_books/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56fdc25bf1e8d89e5e137d866a6957a2cf11f1ed923a98327655baeeaf1d590c +size 4192 diff --git a/pre_1929_books/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6de25824f6653ff25c0e8ebd037553c3227186cd --- /dev/null +++ b/pre_1929_books/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f543308dd9d5a2bffd2d65fa8c6e84d2887b1a48cb1c0bae59bbb48faf5cfed9 +size 8388848 diff --git a/pre_1929_books/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2387f2567ae1fa29d41de93cf8ba4b8e796de9eb --- /dev/null +++ b/pre_1929_books/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:486d3d3e5ed7e846d9bb9cb0d20a7ea5fb2a050014cbf4a385dcee5c97e068c3 +size 25166176 diff --git a/pre_1929_books/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43ee598cc8f84362f95133b993aace2af422fde8 --- /dev/null +++ b/pre_1929_books/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f360842966f426a253aef457bca2c23cd2439d88047f2799928aec04c599f69 +size 4192 diff --git a/pre_1929_books/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4654066c327a4432965a3a704f8ad026496ef727 --- /dev/null +++ b/pre_1929_books/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e71ab3292dcb9e268a03715ef96c81130a8f5fd8add9eed47254e5ce8e8ef20 +size 33554672 diff --git a/pre_1929_books/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffbce6ab97a1007b285ff21829f67aa1345b3f6b --- /dev/null +++ b/pre_1929_books/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5a1518c628d5655e88e1ab643553a3de5194e802eb339f8744e01bef2fbc2a +size 67109160 diff --git a/pre_1929_books/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc47b073e58c8a5398765001ad2ddd22f6adb56e --- /dev/null +++ b/pre_1929_books/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0307a1c07d2a743b02e2e1f5466bc136f3bc21dc460c9a3b1794ee96237cf5c1 +size 4192 diff --git a/pre_1929_books/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..650db146bc04465f4dedaedacc814ca3b8538d63 --- /dev/null +++ b/pre_1929_books/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29b9dc8613eb991536977b5d5a5835c753bc07270a207089a3801da017d6a4a9 +size 8388848 diff --git a/pre_1929_books/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7297419f5fd378c6d09ab3d64ed0298c8092d26d --- /dev/null +++ b/pre_1929_books/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c65c5c4205445c2536805b656252ba388009e7d50c6707a467eddb2e67f8efa +size 25166176 diff --git a/pre_1929_books/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4388527ac1d7a3ba35a0dcd76df1a5a7dd10d056 --- /dev/null +++ b/pre_1929_books/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d077466c00c92da91a4d1bcf66a054e5ab7bf9c93b5caf9df07e3a15a6b37a +size 4192 diff --git a/pre_1929_books/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35109890a4e4a0b40abdbd2f53f274f482cdf1b5 --- /dev/null +++ b/pre_1929_books/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f842f5efbd2db41a8deedd91d35cbd64389804e798a14e85265a123aa5859a3 +size 33554672 diff --git a/pre_1929_books/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afba23e09d96ffa14c9413ce34eb5e76663ecb9a --- /dev/null +++ b/pre_1929_books/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dff164975d4dce1dd7b6cf146171ef41fa19aba23fe909088e06947ce407ade +size 67109160 diff --git a/pre_1929_books/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..156128d69df8486da617a2278b40f4c2e8d2a8ba --- /dev/null +++ b/pre_1929_books/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64513ade69155b1e2d11ea9e2ed24482a5dc875d55ed1f6f77e52b9adbacd4be +size 4192 diff --git a/pre_1929_books/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9232ea0262e26bb4120ff7ab3e07b5b54f5182cc --- /dev/null +++ b/pre_1929_books/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7c51caa5aed5bdbfda83486766470f4eac18a50b03face72f94aefe1aa16c2f +size 8388848 diff --git a/pre_1929_books/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0cc0cd14c414b4e4dcdea5248c6444d9293ab4f --- /dev/null +++ b/pre_1929_books/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76aa4aa892ddedc32a0b381b96017f39b12403f6b89a80440f247ca10b8f002 +size 25166176 diff --git a/pre_1929_books/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06b8b872eaf6e188655ea87ae5fc97f3c5137da2 --- /dev/null +++ b/pre_1929_books/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad32d76dee31fad2c53c64d9f59d5a5c676cbac170ecda971af911a9feaefae +size 4192 diff --git a/pre_1929_books/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ea6334b3357ab26e1a19435ff650d4538088097 --- /dev/null +++ b/pre_1929_books/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3929a23dc05f64d8cbd09b8dc8262305b65e28ddbb33467e1636e34a3bf909 +size 33554672 diff --git a/pre_1929_books/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df8bbbc147029c86cafb41815011246ce6ebf074 --- /dev/null +++ b/pre_1929_books/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb005114a9bba6f0259544d67a740d18fd5ca7cdbb994898814fbd93806cd96 +size 67109160 diff --git a/pre_1929_books/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11850d8d91213d077698e421843d895039ca53f0 --- /dev/null +++ b/pre_1929_books/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5aa0c3a0d3274bc4db9583de11533d2dd2e328269623c9cffac352bb80366c5 +size 4192 diff --git a/pre_1929_books/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b57aeb87864badb291452244c253b2ab60ce9863 --- /dev/null +++ b/pre_1929_books/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5450ac6d1831c9bdadaab79d58079504023d86e5f99a8b5fda54edf64ad37d7f +size 8388848 diff --git a/pre_1929_books/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98e08f13f48a2b276b7856ee6c6858914ae16a76 --- /dev/null +++ b/pre_1929_books/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:535e53701fa5279fe3987aac85d556a037c73747a1cfb515f159627b48e2c66c +size 25166176 diff --git a/pre_1929_books/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c93446f911fc8d310bcb931206cb389fc7ee19c --- /dev/null +++ b/pre_1929_books/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf227e0fd821516b40b1c3ec6e455d77fcdc19a8ceb19bfb561f98a5addbc49 +size 4192 diff --git a/pre_1929_books/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c329783d1ba2de7e933e2161fc9900843641f44 --- /dev/null +++ b/pre_1929_books/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb5bbc80744320ae3b13f81c0b3e576af281e15d91313429936f650c1b1e07a5 +size 33554672 diff --git a/pre_1929_books/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1915e8995aa0c1965cda76cf4077a58cfc131986 --- /dev/null +++ b/pre_1929_books/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1633150ca3f7b48c86a74d5ac7e4d2181914553eaa7b81c15ef316662e978f37 +size 67109160 diff --git a/pre_1929_books/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5430840a0bb70f8d7b2f2738eda3b5cc3b180f66 --- /dev/null +++ b/pre_1929_books/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f74ebe35b606d236014c9c6f9d71a112d62696077361f5e1d6dbef20d01cc43 +size 4192 diff --git a/pre_1929_books/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05f90cb3046368d9401a266c44522a18983b3674 --- /dev/null +++ b/pre_1929_books/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3c1d8d4dc577a915acf47541ff1c31363a52eada7c7316b711a8014e3040956 +size 8388848 diff --git a/pre_1929_books/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..054d435ce32da9ae185d0caf4796f5fee86978b2 --- /dev/null +++ b/pre_1929_books/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c3a0fe5c4b8e8f8dc7d94537b0e716a7a78126c4b12c276e7d3177a3f14037 +size 25166176 diff --git a/pre_1929_books/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b11ad21d81389d6f0acd6e44b64902d9b340ad41 --- /dev/null +++ b/pre_1929_books/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f82cb635d8ffaceb6364c9937ea01a7efa179440369723d7c95858af848f47ea +size 4192 diff --git a/pre_1929_books/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d11722133c3ccdd69c0ed35d6559ee0b7707433 --- /dev/null +++ b/pre_1929_books/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87b9f20bad74fbf8eb28f04c5079130ce897526f2f9f7f13d0050b86ff26815c +size 33554672 diff --git a/pre_1929_books/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c96d329597c9580a27f785a8c118cab28d9453c --- /dev/null +++ b/pre_1929_books/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf7f6da7cb598e8891d747177c305c60b57d283af2d6f49b3770e32a4907c05 +size 67109160 diff --git a/pre_1929_books/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b6a948ccc367f1b8a6ff6132fb7988e76534b8b --- /dev/null +++ b/pre_1929_books/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1869d5beefcba2eb263100390860c96b2147938c869bc42eb79ad3f1da117d +size 4192 diff --git a/pre_1929_books/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2bf46d3f69dc3dae3f6a58e05e55d5be76b7920 --- /dev/null +++ b/pre_1929_books/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021b1d87ae9d85b8d2d9435c6fcf3fd25715fe22e4e1a333e230a1d4942af798 +size 8388848 diff --git a/pre_1929_books/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbacd2e5e20c5d2278251c990bb9300d63e40e0f --- /dev/null +++ b/pre_1929_books/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4c024fbd81aa7fee36e79aa57f45b7bf6b20aa1b378757a0207d6ff001b1528 +size 25166176 diff --git a/pre_1929_books/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ccd7cc14df1334b7e5829bf369a5f074759b2e69 --- /dev/null +++ b/pre_1929_books/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e7670aa039cae3b2087bdc80c2f892ddce26dec2ac155a5ce6f98bad24a08cb +size 4192 diff --git a/pre_1929_books/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e9f6b6a169b06c49f47b07bdc52f3c07f94e0ec --- /dev/null +++ b/pre_1929_books/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7adb285fd38460cd02b9e1a77239435c6d7621dbf34160c51c34b318d380c655 +size 33554672 diff --git a/pre_1929_books/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40d3c2124e770aca721c3b10a251d26727a8fb1a --- /dev/null +++ b/pre_1929_books/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a78f1d6b37f4c2931b235bea0ad36685090fb3086b38e19fe5324832c36917 +size 67109160 diff --git a/pre_1929_books/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15fdbf9ec56c3ea81bdeac771d45b5a82b07c86f --- /dev/null +++ b/pre_1929_books/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1fafc9e41af67f14c21ceca1ee59ca2973255f5e22da8ff2caf0665ae539d1d +size 4192 diff --git a/pre_1929_books/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1f0f80098c7201b914574557b4fb39097cde49a --- /dev/null +++ b/pre_1929_books/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9302e0c1627fed568ccd36ce2417c8edb8bce44f28c85af68e443f7f92b586ba +size 8388848 diff --git a/pre_1929_books/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8942bd3bc7f8eee6a4eea4b9c8e43ceab5f8ff9f --- /dev/null +++ b/pre_1929_books/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0292a3167ce6d29d1a0307847fc641a2b414f9b02e0b6900257ff689e30d6500 +size 25166176 diff --git a/pre_1929_books/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc61dde8791213416d6d3b32e6bad7500aed0c7b --- /dev/null +++ b/pre_1929_books/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:239689adade2b7e6326b6799792aad2639a1e2d9e6f9620511c9037c308437b8 +size 4192 diff --git a/pre_1929_books/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..638bb5b9eb10a1ac811165b65c57e5d69680364e --- /dev/null +++ b/pre_1929_books/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c7a851d81ffbf7976b8eb26e782c012d80b6f5b785b1bc40bd21c04f7f1dcd6 +size 33554672 diff --git a/pre_1929_books/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df49a147185cf6dff36f3d5d0df4056e0f123b26 --- /dev/null +++ b/pre_1929_books/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1790ce067af78c7a882dd4361967375928ddc6936700c2654580697f4966e64a +size 67109160 diff --git a/pre_1929_books/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..824fcd660fc32ee1bfaff85742c97183ce1abef1 --- /dev/null +++ b/pre_1929_books/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0ef16a1603abca56161504f0977262c155220ee58f5338d87d6ca1b6763d705 +size 4192 diff --git a/pre_1929_books/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..482d6fe0993c2b4e2b47b0cb18210ddadeaaf7fd --- /dev/null +++ b/pre_1929_books/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:035a1fb010f64f2311b41e4d96e1bb0b4cf25ecd1719139be3c11d9b81e392a7 +size 8388848 diff --git a/pre_1929_books/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f774972187d49b3a288f26af8c3e1a42765e6720 --- /dev/null +++ b/pre_1929_books/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b83bd4c62601b471c66efa87a4a02e9667805c86c87e3a6b6084c44c33c3c9c +size 25166176 diff --git a/pre_1929_books/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a298a1ce58b875e1480280553a28dc60e22b9074 --- /dev/null +++ b/pre_1929_books/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:065de47c4014a651be2e83d1b429594a9a2ea55b4ef15e8f18e6fe237d17c388 +size 4192 diff --git a/pre_1929_books/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cb9495db95ff0a46995936e8acd8eab1b01f2cf --- /dev/null +++ b/pre_1929_books/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c6f711fc3c50f9e00fe01b1f7540465aee97f9a95abb2592c45c42e67912321 +size 33554672 diff --git a/pre_1929_books/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c66b9d775530c4c4889d396f2c57cf3060c4412 --- /dev/null +++ b/pre_1929_books/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a078ee12090e9b7319754228c6c2c9dd7cb8e9c729f9e86013542d8b2c83dad +size 67109160 diff --git a/pre_1929_books/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e717d9ff242a915663bb049ba8bb1bbad22d01a5 --- /dev/null +++ b/pre_1929_books/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f8d59b59b1fad09e3cf93a4ffc167465b95e0022b833e604f3a1d0998b47b57 +size 4192 diff --git a/pre_1929_books/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25dfa1644c7d62fdfbb18484a9fd715904054e28 --- /dev/null +++ b/pre_1929_books/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1da24885e16bc6ce79d21c6ff7692d9c33c0d40d85e5ed63c54aef3dae7c2be8 +size 8388848 diff --git a/pre_1929_books/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc419cb9c7043bc0cbd69b354f7ee815df958f10 --- /dev/null +++ b/pre_1929_books/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188ff82a694a3aa1e718714d52c2cdb028307fc44242c10bb9349ee75c46d856 +size 25166176 diff --git a/pre_1929_books/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d39d1e53968d355cac11b27ca8be0c0a74204ce --- /dev/null +++ b/pre_1929_books/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:959586c04b696b4a24741c4563d30073979739495b3beb51403693b0ebf7f62b +size 4192 diff --git a/pre_1929_books/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..372e2daea6dc9504717c8a3df2d9f84bdae17dd3 --- /dev/null +++ b/pre_1929_books/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7d9b7d53365c164d360c2d0539ee0e352cccb2b256b81c43e2633ee4138c7ad +size 33554672 diff --git a/pre_1929_books/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a25b26d1f7d4b4890475597972467b9402c4b7ea --- /dev/null +++ b/pre_1929_books/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b68aa3fa9c2a66f65ae1425f5324f00cdf6abf20f97a7dbc4c4bbbca0d5427 +size 67109160 diff --git a/pre_1929_books/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7778d09959a32939d461761b8795edadbe12d99c --- /dev/null +++ b/pre_1929_books/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:547d6da0d81810da6ed801e2dfb7f79b21ac8da30cbda9c74c50b16ccbe915c5 +size 4192 diff --git a/pre_1929_books/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb7e5db7a2dd7aaced56ea354a8f24f9e815daa6 --- /dev/null +++ b/pre_1929_books/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f28ec032545e3ea55ceae6ff8b5ac41f1228e9f3ff47172b42b2cd7c4c5ebea +size 8388848 diff --git a/pre_1929_books/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0672d73fcc42adc58ef470bb56f2fe6f6867298 --- /dev/null +++ b/pre_1929_books/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b2fcfbd18f91db6af918301aada6e749011a97b79ee3ad37affcaf4e82cb405 +size 25166176 diff --git a/pre_1929_books/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ffd4c0d5bba04b292932fe833134492f08c60c7 --- /dev/null +++ b/pre_1929_books/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1706811d6b33b743d0107c7da0dc421b1d95ad1dde302956c46bfff04c6abd10 +size 4192 diff --git a/pre_1929_books/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b12e5ecd5f0916003ec63241cff0bd0f24074fe1 --- /dev/null +++ b/pre_1929_books/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4019e7a6ba4ecb91ba7750ec35826ec5135cdd142af61cda527d402cdb0634a +size 33554672 diff --git a/pre_1929_books/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99210c82a57343df2186b8df2696130e772b3850 --- /dev/null +++ b/pre_1929_books/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe509664de724bb010d433fb996e8d32d69541f78f8e40b03b142aacc0a96928 +size 67109160 diff --git a/pre_1929_books/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc68da409e90c2e29e6f3dcf5dd68fe9b8519661 --- /dev/null +++ b/pre_1929_books/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c193fb2804a105ed58fa26f5d75636e4d72cab0781828135209943f092a5751 +size 4192 diff --git a/pre_1929_books/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..929b32a3a723b5e80205f603dd8aea8ec6687afc --- /dev/null +++ b/pre_1929_books/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7be7fbe4e340a82ab6f902cf750d5adb4f5b64645f5f9c83b9f599de693f7e2 +size 8388848 diff --git a/pre_1929_books/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d86dce0f05de6e87b16a7fc79013faa019d11a35 --- /dev/null +++ b/pre_1929_books/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30186a0c1115f1783ab145a9ab0f0ca7506fa576696f464db081bf4d3a0dab97 +size 25166176 diff --git a/pre_1929_books/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17f5bd3364fd695c5babd6096aa660166a8d2768 --- /dev/null +++ b/pre_1929_books/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86098a63a0e47b9f8995bcd8feb7d67f28ef0a15590df774036eb8999f0c6490 +size 4192 diff --git a/pre_1929_books/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..996085eac538f8b014ec09392815a0e2c28c8c4f --- /dev/null +++ b/pre_1929_books/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:564cb215368dcf074dfb8e67d5a664149316852214147ac53096b6eb80fd0d09 +size 33554672 diff --git a/pre_1929_books/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d079c55ddf258580fb027cd9ec771f4d39a55c4 --- /dev/null +++ b/pre_1929_books/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7d7c58c1687beaa6c30cb7a9acaadb8173fac85f8ea8944929eadabd4d61b8d +size 67109160 diff --git a/pre_1929_books/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db8ea2c0deb229365d9c91113abb5b5b4ce6de0f --- /dev/null +++ b/pre_1929_books/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc4d4161460efe8da6b06f666a2e585467a22c48473d30fd0cd761709af6024 +size 4192 diff --git a/pre_1929_books/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e36fc0a5d1e1264a94361a91a92db901709169e --- /dev/null +++ b/pre_1929_books/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e74ed05965abe1b35954f133f8c18e1456f96849b6a9bf0443a7e7fce0230c +size 8388848 diff --git a/pre_1929_books/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f28bd390d72fe5b2c3407c26bc719ceb97a93034 --- /dev/null +++ b/pre_1929_books/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97d61e75be9130d179113cc1981e692e224a84df1bd046979f551e313ceaf892 +size 25166176 diff --git a/pre_1929_books/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76a1e75cc3a57058b5c0f1742d33acc41711ecba --- /dev/null +++ b/pre_1929_books/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccc5520f8a4027dc93361ff2201c880cd2b6617b76cfe5d76a3ba7866d632d07 +size 4192 diff --git a/pre_1929_books/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3029b7dfbaab5764b7c7b3bdfc26750d3680b3b --- /dev/null +++ b/pre_1929_books/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ecad7d336e8289cb08a8825ec2d6da50f842caa8ac48ddb8b1931602b5772d0 +size 33554672 diff --git a/pre_1929_books/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fab06c97126db5adcf5fcc4af4c3dd56d99aa5be --- /dev/null +++ b/pre_1929_books/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7904d6e8ba64bffd87ab8fb7344eb48aab350571f57e9669461f6a6c4e7a3517 +size 67109160 diff --git a/pre_1929_books/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..845ea8b1fc5ec56baa00b2d4653ec67d4fce3654 --- /dev/null +++ b/pre_1929_books/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1634b0733e9f0df885934d74f2ae3808f700b98d53faae5efb74c627cf94c55 +size 4192 diff --git a/pre_1929_books/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66bc36b233bc087f1b41ac2719b0ece3f64a8d12 --- /dev/null +++ b/pre_1929_books/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0e9cbe35a88963f36661ac02d4642ea5243850152c0e53d4426e44c89c8c467 +size 8388848 diff --git a/pre_1929_books/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b81fada04492f68efdd002ff27aba1354d2f58e --- /dev/null +++ b/pre_1929_books/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d54fe63040014de25d965df3cf262bb67cd8c983e2c68eba01f4e9c4b6195c48 +size 25166176 diff --git a/pre_1929_books/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..500c697fd55dbdaace106681bcb4003aa93e32ad --- /dev/null +++ b/pre_1929_books/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a853433fce5d0dd1edbd503dfa0366c99b9f4488fe342f7eb418397d88bc6826 +size 4192 diff --git a/pre_1929_books/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b80b4facac7f07a5c4422282bc3da9add15ad9a --- /dev/null +++ b/pre_1929_books/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6f7300814c63dd9145b08f1faf354ee736b03a5f9688034d368c78e3d4559ff +size 33554672 diff --git a/pre_1929_books/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09fdd0580d16cfa5edcd6ca1c73cea6fe6ec8d3c --- /dev/null +++ b/pre_1929_books/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78d3aa1bb8265abcfcba9ac86f8a59832de7e1d38d5e147ee197ce7a98ff6c71 +size 67109160 diff --git a/pre_1929_books/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64be843f380964538dc671b6bff82fe63c15cef0 --- /dev/null +++ b/pre_1929_books/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4338ca94b7cb2e993e8043111c638b50f7cf7cb4dedba0aa23506b9b8358fc03 +size 4192 diff --git a/pre_1929_books/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b930c1c25ec7ce4ea30af460c489845583343798 --- /dev/null +++ b/pre_1929_books/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70cb71c4a5a3bfc77b7b1e30a2c850053dc75d593435d40aabdd090f79e6469a +size 8388848 diff --git a/pre_1929_books/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84e247af078a57934ff67880492d2212b7e20c94 --- /dev/null +++ b/pre_1929_books/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:472b2f92d8fc493b4c3702c952e04ec0e539fc37cf74b94ee1e38e921bf71c27 +size 25166176 diff --git a/pre_1929_books/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6007767d50bae4f11ae513a58dc6f6d58c2b6814 --- /dev/null +++ b/pre_1929_books/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b57b61ee240f5f76db0db22d16e07aeb47eed01b3980be792929f7396ee8724d +size 4192 diff --git a/pre_1929_books/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffc45aa686ba2d4f8fd3ed08f83295fd3815fa22 --- /dev/null +++ b/pre_1929_books/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2209f37e7800879dd6564eeaf8516d9e2822a850728a1f4f1a845f12162b51 +size 33554672 diff --git a/pre_1929_books/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9380c8ea224ff48270153cc75484ec6de079a086 --- /dev/null +++ b/pre_1929_books/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45fe92b7ea136d48c6988b606aaad763a259a3c9d53cb12e83e836e6bebf95d +size 67109160 diff --git a/pre_1929_books/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3853ed565e558a01215341e3a5b001f76304a38 --- /dev/null +++ b/pre_1929_books/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:269606b9238aab6865b0931e6900297c108b4a4868c5d2952f8f29f9f9704183 +size 4192 diff --git a/pre_1929_books/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61768f5c7dbf4854b33c2144cae25ecbd2a94803 --- /dev/null +++ b/pre_1929_books/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e29b217b6913a3d68d244f7d8e5a6782297f1b9e18ec7d80d76e0ab0cfc981e8 +size 8388848 diff --git a/pre_1929_books/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a12760209491f23958a15fa8936198c85e06dfc3 --- /dev/null +++ b/pre_1929_books/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bfd52bf5b1f2b3e8f182ca1494c88594525af6b98918e9aa25dfc1baf08f558 +size 25166176 diff --git a/pre_1929_books/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..023ccbda2769e2a4ee55c26cdc34de1c3104cd8a --- /dev/null +++ b/pre_1929_books/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8850ad9e65be01f65535f6042b0af8aa4680aae7804b83381acd1f46f5058a7f +size 4192 diff --git a/pre_1929_books/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e39b018e286d6ebebb5359fbfcc5123efe0db4f --- /dev/null +++ b/pre_1929_books/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58d85b34f8d5b332945058afebb43e0f1b78688bc31f01170d2ca7a51a9778ba +size 33554672 diff --git a/pre_1929_books/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09d253e0917bd9b96f32e6e0484e67dec0e71504 --- /dev/null +++ b/pre_1929_books/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1927b1f02be839474cb24889df430cdbda938784fac6de0e39114987ef4569b0 +size 67109160 diff --git a/pre_1929_books/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1ff66f044546ec90f21630d92a4d39d9c3d5e35 --- /dev/null +++ b/pre_1929_books/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d8eb2e90cc1fdf5807fb58eefd87948d0b037516e230173aaf0cce6183c207 +size 4192 diff --git a/pre_1929_books/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b5a791fac15682e8c04ed1074c27b45ab110b23 --- /dev/null +++ b/pre_1929_books/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9b2079ce87fdcfdd04bc8e861b00e9dc64ae3d2945698177edea429a7433110 +size 8388848 diff --git a/pre_1929_books/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b22afe3f02ae38bb73f72bc70f8759febe40b2db --- /dev/null +++ b/pre_1929_books/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85c5b0db9595896215f6ae71e05064065a445f3ecaddafe5c46037c450717d15 +size 25166176 diff --git a/pre_1929_books/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9673a8d88635fb7caa14723989c62c445cde8783 --- /dev/null +++ b/pre_1929_books/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be3cf9c84c568f7b35762e69ded98b2d285acd90ab3e5f3711731db26a96b7ef +size 4192 diff --git a/pre_1929_books/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5f9ce1ccce55fa90dcb42a1a63d42bcbdb7f25e --- /dev/null +++ b/pre_1929_books/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a250ad9376940758133c9cd7150e51eef9a6793921b4bd8da6c6d42c70c7344 +size 33554672 diff --git a/pre_1929_books/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e64222dd82eb1ae382bfe7ca0921e95bc0db96b4 --- /dev/null +++ b/pre_1929_books/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9749c86f3c8b60a96ffcfc6debd671fadecc32da51d1e2430e6151573a82bfdd +size 67109160 diff --git a/pre_1929_books/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/pre_1929_books/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6604f62d035ac688307e87a49a184cfa842ca53a --- /dev/null +++ b/pre_1929_books/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb0df173e36cc8387b4203cac4644175c7d71ce8bbebab44b21b415919994081 +size 4192 diff --git a/pre_1929_books/model/final_layer_norm/pp_block/model_weight.safetensors b/pre_1929_books/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a552e4f381dcd46634982e2b33bcfa9b69c451b5 --- /dev/null +++ b/pre_1929_books/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57036e06c74fdd3ea0cbb408ee1ed0b44bc01b1da3c9b60f2bff8664fdc8614e +size 4192 diff --git a/pre_1929_books/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pre_1929_books/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e71c3c47e5aba29d5ec0708e717feef875689645 --- /dev/null +++ b/pre_1929_books/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f0299b4fac1a9d94a1470315eea32eeeca9399f23d36ba1049c389abdf80653 +size 205914352 diff --git a/pre_1929_books/model_config.json b/pre_1929_books/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/pre_1929_books/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/project_gutenberg/checkpoint_metadata.json b/project_gutenberg/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/project_gutenberg/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/project_gutenberg/config.yaml b/project_gutenberg/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d7e51dd86a2d918967e06d3c12a1d2f0426c770 --- /dev/null +++ b/project_gutenberg/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredproject_gutenberg-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredproject_gutenberg-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredproject_gutenberg-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredproject_gutenberg-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredproject_gutenberg-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredproject_gutenberg-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/project_gutenberg/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d26bbd5888309b34fa93ec952cb13801188b0253 --- /dev/null +++ b/project_gutenberg/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dec332dc25d5a5d788d95a3b4ab808d50433d4add79e90f36aad0709693988f +size 8388848 diff --git a/project_gutenberg/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0e7d0bdd50ab1ab383fa29c865654125ef675e2 --- /dev/null +++ b/project_gutenberg/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a033f58dec728ce2b01a26a839f43c37b2bd498360a115e8f2c6f88b88697e48 +size 25166176 diff --git a/project_gutenberg/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..375e9ad81ceaedfaf04d83e8f20893ffa44a7580 --- /dev/null +++ b/project_gutenberg/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c991d6505dfc40636f3a3ebfe5a758a871e5793ed058a5fa9c1a761585cc0a6 +size 4192 diff --git a/project_gutenberg/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25160e316aa010654a33a165d0ba1a07c190eec6 --- /dev/null +++ b/project_gutenberg/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e151ad5f336d4bc15c2c2169cf09c03f1cc657f6e883065b5509aec21d48e7e0 +size 33554672 diff --git a/project_gutenberg/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3010a2e8bd336d6566523e5ec3ddbc5f31ab40a7 --- /dev/null +++ b/project_gutenberg/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a7f40ad7f1ca76d8cb0b213e6decafc7e30c64024987a4b36a88859814684b9 +size 67109160 diff --git a/project_gutenberg/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47f2b7f084e9729309898db3f55b3cce898b4330 --- /dev/null +++ b/project_gutenberg/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8d3dcc64fdfc8e8b29b674ab8a67dc9e5af7f2b890ae85777273eced0a0167 +size 4192 diff --git a/project_gutenberg/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79a0efdec2b885aafbf8031af2b3433a30e4ef30 --- /dev/null +++ b/project_gutenberg/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b96ede9a7b44184fc05aaa613ab5d25d2f2bf325b34a1a219f39c0ff29397a2 +size 8388848 diff --git a/project_gutenberg/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20def372b67465e2b697e8ede6c8783a78d21b3d --- /dev/null +++ b/project_gutenberg/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f82db89755594ba3a55f26d43cb28e36926877509ea087cd3f299fdf50da04d1 +size 25166176 diff --git a/project_gutenberg/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ca3a24afa5f07e6597953d0bd79be743f51fde3 --- /dev/null +++ b/project_gutenberg/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24404b51008c8c19f8e15f12e44fb1dbfdf1c0a6ba91608f1082c1aff49ff4a4 +size 4192 diff --git a/project_gutenberg/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76560375d4eb9d772e1ccbfd90ce70f338840cda --- /dev/null +++ b/project_gutenberg/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac603725dfee5010e9169ba843154d060da30ed6aaf98e9e5ad6db2feb8c22a1 +size 33554672 diff --git a/project_gutenberg/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7a843887fe1b4268148c5c071df120c508d3756 --- /dev/null +++ b/project_gutenberg/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9fb5de95dc3909600b578d98c91d9beaf17b5cc772c15f7b94977e68850892d +size 67109160 diff --git a/project_gutenberg/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..484b18962ed8b7fe39eb7e5520a240b857a85154 --- /dev/null +++ b/project_gutenberg/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13404e88406267c545d218834703b0d657739552cbcb7ff8e070d3f704c05a9f +size 4192 diff --git a/project_gutenberg/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3989c9e4bd702fb4932abb3e07110ba7fb66f64c --- /dev/null +++ b/project_gutenberg/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50e26da2cc9536f0c7b32aac7a055dcff7264f8fe491ae0cb46cbe538680d9f5 +size 8388848 diff --git a/project_gutenberg/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8ee67eeb0d44b963c2d7780a50cfd68d632c647 --- /dev/null +++ b/project_gutenberg/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa60dc12ddd566e6d4ce6ab8da737c4c285b98726e24cbc3b4206723d7a57b73 +size 25166176 diff --git a/project_gutenberg/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52e303bceaa0ac8ad573b1c7874f99c1a1130b4d --- /dev/null +++ b/project_gutenberg/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64e03a730603e2822a672ed559fc8b52984adc0c5989037c9d267b854d968d1c +size 4192 diff --git a/project_gutenberg/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97bb91d2603aa580bcce2d52e90401f11eb25f56 --- /dev/null +++ b/project_gutenberg/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a658386ac5bf58caaf980997908763e0bc41a9ee33743084f19b9ce437f5a225 +size 33554672 diff --git a/project_gutenberg/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc8748c1db36fc82675fc49131035d9e425443a5 --- /dev/null +++ b/project_gutenberg/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:415b528b8ecf4184fa0a582475549f008aef55b5fc930c56ccfb4674b620be5f +size 67109160 diff --git a/project_gutenberg/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8221735ba2c3f2ba5e908e07a76997932a5e520 --- /dev/null +++ b/project_gutenberg/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bae4a97eabc30f6ebd318c5f9ce69fece7943978b4bf112671d76ebdf309d81 +size 4192 diff --git a/project_gutenberg/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f47909f727c5cba8c0a3d85c05db286686f1840 --- /dev/null +++ b/project_gutenberg/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:240a39ebb8b5e706f2773cbb2a42b56b1c72302326bd78be3022840c41cda9c9 +size 8388848 diff --git a/project_gutenberg/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bfc42fd2423d19dac04110aed2ef4dbc1f70e88 --- /dev/null +++ b/project_gutenberg/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3399a1caf3c8a84f9c3e6884d75c64427fab4f99e525e8888d315a1d121dcc27 +size 25166176 diff --git a/project_gutenberg/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..426629c4e741103b37c8d9acbfd89a48252716ea --- /dev/null +++ b/project_gutenberg/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cecaf58765322bf6f78f6279b3b6c03eb927d84cdce17a57f7df08c610118b9e +size 4192 diff --git a/project_gutenberg/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..809f0174dd616fe74bb0d9a2eafb06a986bf81fb --- /dev/null +++ b/project_gutenberg/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f22b6a1ee1b8c6601cf6462061d03283a41df1635f5af0cc0592f73e354bca88 +size 33554672 diff --git a/project_gutenberg/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed1b132379d0592f9342dc06c2605a16a70a436c --- /dev/null +++ b/project_gutenberg/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:820bcb11e7e97d692955bb5b853a2940f5ebed8b0e5f563a4e9a57c8932ac201 +size 67109160 diff --git a/project_gutenberg/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6160eee2a4e95dd824f9c7541777154c6cad1afb --- /dev/null +++ b/project_gutenberg/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8657187e7fc6c3765e4fe9c4dbf1b07af800ce2ac642f5da70232860bca38ebe +size 4192 diff --git a/project_gutenberg/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac3db672e3f63177fcc385a691bdbbc836115b7c --- /dev/null +++ b/project_gutenberg/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ba8bf80ff0f3f91cb7dc5961a803ddaa8c31ffd4ca387f242c434a5a521170 +size 8388848 diff --git a/project_gutenberg/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1176ab729f2392bde22064da7637e680c758fda5 --- /dev/null +++ b/project_gutenberg/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751395e4fb6a1d8fc77efa177f1c3619a047de5f4e31a2f9b17245d4a4f8430e +size 25166176 diff --git a/project_gutenberg/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5a5bda788f4264d00ab911b2b80133e5e15522b --- /dev/null +++ b/project_gutenberg/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:947318cf949b2c8f1fd517aafd15e6477fcd8932f032e2278115f3056c42d093 +size 4192 diff --git a/project_gutenberg/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7a62963bedbace7c869351ef41fe22cb160065e --- /dev/null +++ b/project_gutenberg/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c0e726eab2ed62e759205e7e8e35b4f0510b9ba5105b63ecf291568b56b1dea +size 33554672 diff --git a/project_gutenberg/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d46ceb5050bdf76ade9b181f35570ada90e9183c --- /dev/null +++ b/project_gutenberg/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8765e859d54909d07fa869cc69d29385c8cce38f7370da34475ee486d5be8730 +size 67109160 diff --git a/project_gutenberg/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..142d8213a939fa96c7f0e1c879d159aca501c4f1 --- /dev/null +++ b/project_gutenberg/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82ed1a3eda42c355c6b97de356b459c3d5a0fe009739237cd1f615053b07b09b +size 4192 diff --git a/project_gutenberg/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1dd1ee2b7d84186b348c5aa3c4cbcfa4d63553f --- /dev/null +++ b/project_gutenberg/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da4140fec9121e202437502bcbec21eeda4ce62faa608a11b452532dbd7f31e1 +size 8388848 diff --git a/project_gutenberg/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dff6e559992cbd1818c1b5fb1305ed08c66bb990 --- /dev/null +++ b/project_gutenberg/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5524f577768b1cf735c87626b52ee487754d5ae329f5d71fe2a89f07b5ef2bf5 +size 25166176 diff --git a/project_gutenberg/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae9d0a633c4570ade1f3d4b969f9e2977fb4e7dd --- /dev/null +++ b/project_gutenberg/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e62e60577e27b571a4facfa278686523aab47e91490cda41862b7ba103a84ad +size 4192 diff --git a/project_gutenberg/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..314b0b737f9a28bbf25a25ac787c1833be77340d --- /dev/null +++ b/project_gutenberg/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c3cbef32a119cd33711c03d77bc68abf66d921c5d3fff14e244650b439a3b4f +size 33554672 diff --git a/project_gutenberg/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94dca6ec6f6b1155fcd6960e4ec9775a46b737af --- /dev/null +++ b/project_gutenberg/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b939e817e922450889ef4df5435f2e8efe6eda72d1f669f5080231c479cef5b +size 67109160 diff --git a/project_gutenberg/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53588ce031cb96f7c4cf81a29628f579eceff10d --- /dev/null +++ b/project_gutenberg/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7274371a038f56d115df35f997e0f26f79ce76bc32d0995ff8312317b129879 +size 4192 diff --git a/project_gutenberg/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8d66d412bf825d1e64e7e9fdaf085c48c78a2d37 --- /dev/null +++ b/project_gutenberg/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fb6fab5e96f0858c7b85bf31479b019a9b9dc3caf2af4f1acf922520f22bb4f +size 8388848 diff --git a/project_gutenberg/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5344b4a0bd9723b91ed04059406e856f32e39c6a --- /dev/null +++ b/project_gutenberg/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6330aa0df926ec0e796c13e85209765448166caea49626cf383423ca6afe41d6 +size 25166176 diff --git a/project_gutenberg/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f00cf711a9feec11153c86e4c2d79a2abe7a8456 --- /dev/null +++ b/project_gutenberg/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d133652411a7752b89d4bb8f43df7d34ddcfe2d32dfe76088b7b074efabb9ee3 +size 4192 diff --git a/project_gutenberg/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bc32b87bdcc0c5e55b8ee95bc242858acb0a878 --- /dev/null +++ b/project_gutenberg/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:520dd298f13472dbd91f647a00dfd847b369e3ebd00577d37edab2eb0cbae126 +size 33554672 diff --git a/project_gutenberg/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3fce7e62201a27109f8a5f79c26e0b4d6ed13586 --- /dev/null +++ b/project_gutenberg/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19bc705d1848aaeb986b18471e84e33a64b5938c5880ffaa685974127d14eff3 +size 67109160 diff --git a/project_gutenberg/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b0d2d8aefd4923df42308df4e78435d745b5762 --- /dev/null +++ b/project_gutenberg/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95e85269158f79c7c6dcbc0e350c2d7c4a0b3f6a4ac93232df4e626fd803a870 +size 4192 diff --git a/project_gutenberg/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78477e8f771f4ade37709fada068f3375b395105 --- /dev/null +++ b/project_gutenberg/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:860912b8b2ef0c2a21e78e55752b1ad66ea6e8648cd13a097135550577f7e27f +size 8388848 diff --git a/project_gutenberg/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ced725a0665872fceeacce506318c135a5d7409 --- /dev/null +++ b/project_gutenberg/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efc9d515615c9acf651b68cc0e1249dacad90d5baefe149b0834a54bd3b8970 +size 25166176 diff --git a/project_gutenberg/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7ce35543cec050786b8f30ba9d8fe71cc2134ee --- /dev/null +++ b/project_gutenberg/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8308a587a89605ea25507b4dc308134d03b478a21922c515e60a3938c2068a94 +size 4192 diff --git a/project_gutenberg/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73285f2faaef4208a12075e3bd97dda5d01f9f0d --- /dev/null +++ b/project_gutenberg/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3278519007b2f80dc550c9f1d04e458a9c98a0d1f3404418fa2a71a238eb8b20 +size 33554672 diff --git a/project_gutenberg/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..767f1eece2cf143cdeec29162dfa7d840815f1bb --- /dev/null +++ b/project_gutenberg/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fab0e5baaa21055bc47ea813ceac65e5619cc0ca3d864aae8754d55d33e6ed6 +size 67109160 diff --git a/project_gutenberg/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c678559790bd2ccee9ce6213e2520b79dcf68b8 --- /dev/null +++ b/project_gutenberg/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98d5776b7b725c8c7b1303886e37dc9d92ce301ec36289565d69cf9ad545a5fd +size 4192 diff --git a/project_gutenberg/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4807802b81499b64b48ffe3bf584e264c4f394bb --- /dev/null +++ b/project_gutenberg/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d3abd2a98fde723485b1deb53735b6aabdbad26e128ea5c017bff657989bcab +size 8388848 diff --git a/project_gutenberg/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9b664d058ca1925188d1b3e72eaa5dd49310679 --- /dev/null +++ b/project_gutenberg/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9206b1b95b120d9da6a53b5d0016efe3077834e5915710c04bf31df4aca5abbe +size 25166176 diff --git a/project_gutenberg/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f67510d9889085d31b731a86a177927efff678ca --- /dev/null +++ b/project_gutenberg/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c85ae796f36cee84294963486c86753d263739cb560a0fd984839ff3e250de05 +size 4192 diff --git a/project_gutenberg/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c8fc3fb7a37ba7b7921e43ab0e2e77e368deb67 --- /dev/null +++ b/project_gutenberg/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b680289c6bc26325339f767dfdfe88bbba24a444642f55f187c90c11dd78c5bf +size 33554672 diff --git a/project_gutenberg/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea6feb31e12996311ff9ae971166b82a0b54cfdd --- /dev/null +++ b/project_gutenberg/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0687af4d0aa4fb2c66c2eb72e634a6995128a827433144be7940a9de04375e62 +size 67109160 diff --git a/project_gutenberg/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb9e7af2067f99d985507526844a85e6b01345c7 --- /dev/null +++ b/project_gutenberg/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99badec4d4ebee0aa7f0e6424092b51d86b96e083af5e7a71d8fb567e2ad1e14 +size 4192 diff --git a/project_gutenberg/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a2ca8f3e16229d0019ec9024f94798e2a38315e --- /dev/null +++ b/project_gutenberg/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aec9d6466e388a4c4b570431e764de567f16255ac1aadda3f0a16f0f792c2d1a +size 8388848 diff --git a/project_gutenberg/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5f8e901837d44a3ccfa4e6a80f724c498d70124 --- /dev/null +++ b/project_gutenberg/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70724771c3507519a385f5e834fb73d81babb5437fcb3ede130982860aea3407 +size 25166176 diff --git a/project_gutenberg/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba3a611f19c9255c3067ac4826c590c9d509d1ee --- /dev/null +++ b/project_gutenberg/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d03c8d98cfa33f91b6a683a00f00635f5e8f71223d5f289923b30e03fab7baf +size 4192 diff --git a/project_gutenberg/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64318bc1188c5936822c8481912c14bb4906c444 --- /dev/null +++ b/project_gutenberg/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e745031f560d2f6e76e01e031abe72053f3ef6d6dfb716b36006765ae255ed9 +size 33554672 diff --git a/project_gutenberg/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4295ee930a8e93ed96b48cd368a123ae9c8d86f --- /dev/null +++ b/project_gutenberg/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af99f41f227d0331edf4d3ab93d1ae158f67152a6ffb26397b707bf31b0f4659 +size 67109160 diff --git a/project_gutenberg/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eca1cde8bd3ff2312f0c086e6d13400ed937ca38 --- /dev/null +++ b/project_gutenberg/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0c7fb563d2527fa6cf4dbbf5fd72e253e0d59a8c8cd9be0ca8a14665c923486 +size 4192 diff --git a/project_gutenberg/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ff9dcbf007f2c5d05cd322277ac2819ca2dc073 --- /dev/null +++ b/project_gutenberg/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b80f3030fcc77d0d9e2a3767da5f0b9c8846f772438dcd448e58f77299214d +size 8388848 diff --git a/project_gutenberg/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d15780e7c701ea377239a04f5a4c8aa2084ac31a --- /dev/null +++ b/project_gutenberg/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cb6b9ccb654cbf0c66fb64aa4d7544efa3c6e0f749c0acac5307ebc198fc49f +size 25166176 diff --git a/project_gutenberg/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdda762b11bf0c7ecb68d139936d136407d246e7 --- /dev/null +++ b/project_gutenberg/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc84b49e8698f6943d38de5a3a682f75859093bdb16a61154fdf5846ae380c7f +size 4192 diff --git a/project_gutenberg/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d82c9371c4f977b0f0ceba36f9dedc54876621ce --- /dev/null +++ b/project_gutenberg/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d27ab87fd6f11b485c5c19fe77d0592057fd87b941c8991bedaa3a0fbd9e7ef8 +size 33554672 diff --git a/project_gutenberg/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..610ccf6c7f2c6f2469cbc0354443150e6e22a91f --- /dev/null +++ b/project_gutenberg/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:786c322198afe350fdbe1a6659eb5abe59dd43472d78b4be87452d1f807105ff +size 67109160 diff --git a/project_gutenberg/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74e280af0a18feb3aaf3691db9d83a4e169d8820 --- /dev/null +++ b/project_gutenberg/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827e6fafff3e229c793974cf5596bf5ce271d5167ae47e33871c9e0c62f011b7 +size 4192 diff --git a/project_gutenberg/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..659c3ec580bda7a06835cbe19b171fd2963ecd74 --- /dev/null +++ b/project_gutenberg/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7463fd83ebfa19b18a67d4ea045d585497cce00fcbd7e20c9446573d894b7d68 +size 8388848 diff --git a/project_gutenberg/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96dc495cc26f566483ea1f3311e0c57fa339b69c --- /dev/null +++ b/project_gutenberg/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9204d4f7ba14a8f7a99c4dbc4205f2e184b7a1429a34834a684f5504f84c2fa +size 25166176 diff --git a/project_gutenberg/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7671d09efa8eee548fce2e99eae34fa6727bf79 --- /dev/null +++ b/project_gutenberg/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df6d459cc83c0ce4b44cfda7e67ebbdcbdf66b497484c0fe643606f681dcadc3 +size 4192 diff --git a/project_gutenberg/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..401bfe3e9b8f1df1be212f9533193d93af94a9f1 --- /dev/null +++ b/project_gutenberg/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50baac76120dbc29e662f1a6fd2845fc4bdc8b5866b224832ee48db3178c3395 +size 33554672 diff --git a/project_gutenberg/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dca6de4310fa7d1378e893d732f2f11fa79c71d1 --- /dev/null +++ b/project_gutenberg/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bfa0eeeccb168fdff4fcce17a16d37c8364357319184b7baf0eb4182c8cf7fd +size 67109160 diff --git a/project_gutenberg/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7cef8dd11488d5f53a67106d8b0bac139ba7f61f --- /dev/null +++ b/project_gutenberg/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea794ff2b65d3f7ad3188d09629cffc50da53f195178a8d97c0450d0963fd029 +size 4192 diff --git a/project_gutenberg/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dde81a2ef915b268542e3a7c1652b3140bad550b --- /dev/null +++ b/project_gutenberg/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b311a4f000b77a3639482da1a40bd9855ca50457a1ea69fc26c22b2797c259a2 +size 8388848 diff --git a/project_gutenberg/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af4c2f1fede6e43796b998cc8dc7e23e89c05177 --- /dev/null +++ b/project_gutenberg/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad140224b3d17f43c6b717489c027a16bb98c8421968b0f27870e4c305bb6b77 +size 25166176 diff --git a/project_gutenberg/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f057ef563ccc40be9e90f088560b6576ff115bf --- /dev/null +++ b/project_gutenberg/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2da3095676a38476c57c4fc9a91c6196f6e14c6a86c72878ab843a71704617f4 +size 4192 diff --git a/project_gutenberg/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..157a029f468c98e90b2908ddeeb9fd1916d7394c --- /dev/null +++ b/project_gutenberg/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a51311a29ed1777ad651d76fe39c7192679ed00d3050085b917babe491edd0a5 +size 33554672 diff --git a/project_gutenberg/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4177247d16b184737713500c176c2569d13b5b3a --- /dev/null +++ b/project_gutenberg/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02967e6aae3c4f55035b2de61386a64242d5cd473e2fc7f18a29d1b818a840c5 +size 67109160 diff --git a/project_gutenberg/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7edc0e8ec386a35f76b3d2ebb9bc88ccea7a5115 --- /dev/null +++ b/project_gutenberg/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dce624420efe7252dc84ef662549aa952c401dc143aba3b0540ad309cd5add30 +size 4192 diff --git a/project_gutenberg/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9070e12937a23c4a06943b643948465ff7ef2b13 --- /dev/null +++ b/project_gutenberg/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d94fca5df0d30b44951df9d3b422cfdb08ba51cc2c0b212af480139a17bcb9b +size 8388848 diff --git a/project_gutenberg/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9b7a4babe097f6d824827c8d2d25a05009669b8 --- /dev/null +++ b/project_gutenberg/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45c906374a08958727cb6e6d621ce099ecb6ed3ff5c600bc5acd50c43335dd60 +size 25166176 diff --git a/project_gutenberg/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e3b374ea256d0ddd5e7dc6b9b56a1795badcc99 --- /dev/null +++ b/project_gutenberg/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a555cab319ef6e76dc9ee8a62e1409be7a3c1294d1c6d600880157aea9e5a13e +size 4192 diff --git a/project_gutenberg/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53eba0b7b9626a17908d77aad74ab418b210dfc3 --- /dev/null +++ b/project_gutenberg/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2302af7baed5c509566bd944c79763b86df42ad35a7d2c6d5c5db2e1bcb4f42 +size 33554672 diff --git a/project_gutenberg/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8336286db15b0854acb19547fb95730c5ce06724 --- /dev/null +++ b/project_gutenberg/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d8874cd69aefac6c625357d850d59c7faf3957b2c3754bb469962f7b95c966 +size 67109160 diff --git a/project_gutenberg/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..560b157c1fc390c07d8d4f95d4d151661347d67d --- /dev/null +++ b/project_gutenberg/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94606eb8013732eeb60ed5613e7ef2146afc6dcf953a5f87c16ca8a6e85f3acf +size 4192 diff --git a/project_gutenberg/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b594d48484c43421766c3fa62ab07798a4f9c0fe --- /dev/null +++ b/project_gutenberg/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ba04dcde7f10987107170f87f18aea8c3418aa8f541c11ab25ff43b593351b8 +size 8388848 diff --git a/project_gutenberg/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04e9408cbaea10f27679966d6a42055a56e39b74 --- /dev/null +++ b/project_gutenberg/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7487d0dc062602e30bf436c1149dcbd34e08893882c920e71a90e55adf377ec +size 25166176 diff --git a/project_gutenberg/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9bc0affda327be1f864f25d1480b2f4983c36cc5 --- /dev/null +++ b/project_gutenberg/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09efab09a4a54c83c8634b7d8436404f3ad3d469baef81aba8f980c5089252df +size 4192 diff --git a/project_gutenberg/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..834607cbae912175e1d9b25da447687a675ce4fa --- /dev/null +++ b/project_gutenberg/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a432963abf163ff44dae834c19f6b9ee39bbda22207a8976cd6d454fa7b4bd00 +size 33554672 diff --git a/project_gutenberg/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38b47ca5edf29d645e2d7f72287ba04bed9d8c90 --- /dev/null +++ b/project_gutenberg/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6865335c7e93211f2d8d052947920e318c2fc2a51e83a615eec9c8b47153955 +size 67109160 diff --git a/project_gutenberg/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..acc0d9f996f7fd8de4745c91eff43a040f428d96 --- /dev/null +++ b/project_gutenberg/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a2e2f73d560e1257b9d0823d4b01ef1de02f1efe7315065c8c71c7fc8d32dd +size 4192 diff --git a/project_gutenberg/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25896fc2850a6b3c2439185ea3a7fb34b6b69896 --- /dev/null +++ b/project_gutenberg/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bbf0c3324b2e8db8db0a55db0b84705b3c06154967f4a3d29f6aa6cf75f9082 +size 8388848 diff --git a/project_gutenberg/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b2f7bb370279cfd87cb5e2bf785d8cc195e8e40 --- /dev/null +++ b/project_gutenberg/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d78c07be450e4f9dec431084039ec4776262ae5f22fe1eaa1c13d6913e1fa3f2 +size 25166176 diff --git a/project_gutenberg/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4cc6a6e20ff4f848eb91d702fbf7dcafcf8eef98 --- /dev/null +++ b/project_gutenberg/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb2acdcdcc457b1584aa8e819852d85d451ebc7549cef8abdf5a113e78d10ad +size 4192 diff --git a/project_gutenberg/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95a1bcd6ed95703d49ef6b79a73176745687b921 --- /dev/null +++ b/project_gutenberg/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cae0248a0fa8c7d86c7a9d0087e942bfcf3f3fd410d938f232ea7dbf06abda6d +size 33554672 diff --git a/project_gutenberg/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09487514c1e78976a827ade9905af2fc17dd2fa3 --- /dev/null +++ b/project_gutenberg/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1214ce5676f8e01f7f91a1cbaccf92d101b4b51f8c5535575b4047a75b7e49 +size 67109160 diff --git a/project_gutenberg/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4a416de25eeaa6f4d4e0d1f6ce424da727219ed --- /dev/null +++ b/project_gutenberg/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61cc37e213620d4b16a8b86d1cb455fd92e51297c8711d4dd9b83f3f8826e41e +size 4192 diff --git a/project_gutenberg/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9261fcdc16bd22c118e15d6d5e20cafddc507b7 --- /dev/null +++ b/project_gutenberg/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61da2fee20375d2f94dc246cb62d3daa9a8fc8b48465dcc1d139008ccd1a9c43 +size 8388848 diff --git a/project_gutenberg/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..873b12532f028bb30d040d560dffc3ac6accd727 --- /dev/null +++ b/project_gutenberg/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef24c77d95eccab9847a8a2a3004103b952a7167315c751de8c2a79516a12236 +size 25166176 diff --git a/project_gutenberg/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..54cac92018316da3113c9bb61a4a6c0ca4c0dfc9 --- /dev/null +++ b/project_gutenberg/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ac41ee0f3ebb4fc9bff7bd8f94823a66a97481084a0bcff7d8a0afbce740823 +size 4192 diff --git a/project_gutenberg/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96f9ffe0ce8f4ebbca1cc400eb77c4e66f2d8740 --- /dev/null +++ b/project_gutenberg/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8514f378c174d00f13e7406e268c52f0e4434d06a895260f5ddaa68357d8c10e +size 33554672 diff --git a/project_gutenberg/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0a30c7d5574427269897cdc99d58ef5cd15ec2e --- /dev/null +++ b/project_gutenberg/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc5fd27e1afe0fe2f9d3f7623f2ec3c9cc794dc5418140258443d161ca23bca +size 67109160 diff --git a/project_gutenberg/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea38d1637016934926f17bc6d7c4a7a8ec3ccb38 --- /dev/null +++ b/project_gutenberg/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb64b141997437a1b414e138b554ed03cdf852a0da53c5a794d44145d7a82ef9 +size 4192 diff --git a/project_gutenberg/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cbbc28ea457d7f8f74fc35d27630c0d65cb2afa4 --- /dev/null +++ b/project_gutenberg/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bae8750b21c75152acdb89bab6fe3ea9f0a824f32ce9ff59d8d7f15a26212f5f +size 8388848 diff --git a/project_gutenberg/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e44f2dba8c69fc98d84016a6d62495201adff7b6 --- /dev/null +++ b/project_gutenberg/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9331426eb39d48e5d3f01cf5a94be1cecee9ad691a3e9340cede6bc947967581 +size 25166176 diff --git a/project_gutenberg/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8944512960b2b001d058f43eed385f8499166329 --- /dev/null +++ b/project_gutenberg/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcd160bba9d2a50687bb64c4eeeddbf45fdf3e156fc7046d0d7c3c767372c123 +size 4192 diff --git a/project_gutenberg/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb42fd3bb89ec905ed8d2143497ab9add8224d08 --- /dev/null +++ b/project_gutenberg/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:855c522c577a1167f540d91448a2de97ebf2d8cee66186d438a4018eef4a46f7 +size 33554672 diff --git a/project_gutenberg/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a20f701505af13a7070bfc1f75afa512efe98a2 --- /dev/null +++ b/project_gutenberg/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7487df921769f1b8f2c1805e5e5ba60d2c8b5bff61202c4fd6a7a57af210aa84 +size 67109160 diff --git a/project_gutenberg/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58c72651efb5e8e09f374f753d7b00e531f63d0e --- /dev/null +++ b/project_gutenberg/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7022ce1ccde1edb30dcfef4e2417e36c792150713b40e59ae042095cb0d2992 +size 4192 diff --git a/project_gutenberg/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20bad6b21d2ea9b439038d8e5eed1e45c382df63 --- /dev/null +++ b/project_gutenberg/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a9798779324974601d3e64a7b60f033f403143a9e9068a8027a2ad551245b7 +size 8388848 diff --git a/project_gutenberg/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f55f18579a2d53315799f347fc3a024ffb1f47da --- /dev/null +++ b/project_gutenberg/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ede4b5d6510b12086d02acef6ae2498165393613425b397227d69abab92a0cf +size 25166176 diff --git a/project_gutenberg/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..130928b789426c2d408573617e2bde5824110e9c --- /dev/null +++ b/project_gutenberg/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:417ae39d576ac0b65e5cbe57cde0c9ed13fc19b67423d6e2b481c4a7e165678e +size 4192 diff --git a/project_gutenberg/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38d23faf28ecdc07c671731a2eee28448d5059d9 --- /dev/null +++ b/project_gutenberg/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef34c59fc758d5414a392e38d97cad86d92da0bd017c5bf442aefb45875a7760 +size 33554672 diff --git a/project_gutenberg/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa941423bcca2e03c9b91632cf2e36e15ffb6a37 --- /dev/null +++ b/project_gutenberg/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf34f4986ebe76ed1badb9b446f329d5adbea0fd59e88b6f05df41f1b506e53f +size 67109160 diff --git a/project_gutenberg/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb605a4de178ab876354380ea79f55388c02f7a1 --- /dev/null +++ b/project_gutenberg/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38622e89063e2f5142e9b42c201448cd9b8f770f7b86b437ef1d199780f3aa9a +size 4192 diff --git a/project_gutenberg/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86d9ed9784cc95b6642baed436a344d84d258655 --- /dev/null +++ b/project_gutenberg/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25eaec20028cef9e664edd466fc44bdf11bbaf6409ebfe3d227a62357fae20ba +size 8388848 diff --git a/project_gutenberg/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32b86adf98b7e4501e6967a54f2af3fedfa79d76 --- /dev/null +++ b/project_gutenberg/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9eb95fb79907078d7ed011d0562d53855d559d337d8f2842fa84e6c6f075927 +size 25166176 diff --git a/project_gutenberg/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c61a919f31eaf9f18d8a74baed4bc1c1814ea1c --- /dev/null +++ b/project_gutenberg/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4cba3ccc43c29418703e8f1574c8f580587a8173317951af0430e2321305b5 +size 4192 diff --git a/project_gutenberg/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2efd7b824dabf6330cec1c429bdeebca78e4bbd --- /dev/null +++ b/project_gutenberg/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f673eb2f708d83e7b17bdb250b032bb13842167d2b77610d870ea60ff33376b +size 33554672 diff --git a/project_gutenberg/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cffda3e33e395a195a61f98c3496923496ae2f67 --- /dev/null +++ b/project_gutenberg/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70f8a33a7459290999edd0c991ac48209a54075aacb85f2926aaadc72cfe6d50 +size 67109160 diff --git a/project_gutenberg/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1075bf233983aca2b0133c2667afb20b3971a8aa --- /dev/null +++ b/project_gutenberg/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c33b6c4dfef845e75810ed4b5c9e2eba3bc465136c450fa813bbf8a585eabc5 +size 4192 diff --git a/project_gutenberg/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b779198762923a3f046a0820e74af519cecf603 --- /dev/null +++ b/project_gutenberg/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5d45d566070fc8a39c6b9f63e1771711502737aa42bef7560c90b1f8ef8f3f +size 8388848 diff --git a/project_gutenberg/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86d949be4e0e2103d4367a4c832638945855e9a3 --- /dev/null +++ b/project_gutenberg/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95ab9771c063358befcf2824d54aa7a49784f05ca54f49f6fa3d001c692c2974 +size 25166176 diff --git a/project_gutenberg/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..631ec94800af5952e73130c60cd0df1409bd890d --- /dev/null +++ b/project_gutenberg/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a43e2d1a3f7a9debfc435ad322c523da632062f493e7a91086bd00ba289ea4c6 +size 4192 diff --git a/project_gutenberg/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15ac74b581db58aae1e335ca57e32a0555982be9 --- /dev/null +++ b/project_gutenberg/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1bee91aa151bd2975c5414266a1bd0eb0fc9391967f6d7f0b4fc27cc04d38d8 +size 33554672 diff --git a/project_gutenberg/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9b5b01e68b7ab08241224f1de29b70fda7181b9 --- /dev/null +++ b/project_gutenberg/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2479750074425666cd5acfe8b244cf59708111da5841aa020f0e23522a5996d5 +size 67109160 diff --git a/project_gutenberg/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d62ad985299fcff7d8ee6bef96af1b38bfcae02a --- /dev/null +++ b/project_gutenberg/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca45acfe22c31e83ca9efdd94c7e47a11978fe080f603b4d3987f88d2dd259ba +size 4192 diff --git a/project_gutenberg/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15545e1872dcd12d16604ecbc095cf1fba3eb87f --- /dev/null +++ b/project_gutenberg/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac7e178339a644154df04f44695c2bc85a4744ad75f400888820ebe7ed8cbcde +size 8388848 diff --git a/project_gutenberg/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85e6faafe263d915da1b9389df5569191f850d44 --- /dev/null +++ b/project_gutenberg/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e546711b238d7d30abcc3205145878d72a4ee481593c3a8fbf9c3a7636f6b905 +size 25166176 diff --git a/project_gutenberg/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3d0987557262ff64434bda976c836491bd361f8 --- /dev/null +++ b/project_gutenberg/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a47569478159893a1b379717ca15f88ea6c6041281284f1ccb62ed8db93192e1 +size 4192 diff --git a/project_gutenberg/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a49f96aba9934323d5f3ab8f89efee1379f2797d --- /dev/null +++ b/project_gutenberg/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcdd487dab2c0ba90ca7665bfedf9ecdab66244567f6c1866ee6297c69012799 +size 33554672 diff --git a/project_gutenberg/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1e7951bd78930d7d643dba6663893fc9f9c12cb --- /dev/null +++ b/project_gutenberg/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bf80f2222008884ce633df92a8416b053ed561b91e5d3c0425edd92e7fa9306 +size 67109160 diff --git a/project_gutenberg/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c2abe31352590aebd30c66a13f53aa3b7d5bc12 --- /dev/null +++ b/project_gutenberg/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5887d99e1a974083cdd03f156447426a72294171e991e9b79c2b3ca37a3f408b +size 4192 diff --git a/project_gutenberg/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49a15f957fd9c471525e7b68fcf3ca37e042e942 --- /dev/null +++ b/project_gutenberg/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba10c39acdd8e41d67afd517b5baa2c259d70510d864d8d53002b455873d2967 +size 8388848 diff --git a/project_gutenberg/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2935dbf9c579e73aa7bb9e1a54994871fa1a3db3 --- /dev/null +++ b/project_gutenberg/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f596c1ecf0bed99c9fcb344159563cb21476efd2812cf5253ccc462cd7c64141 +size 25166176 diff --git a/project_gutenberg/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83eb10a80e7c80d40d3f202d118dd28e759c42ce --- /dev/null +++ b/project_gutenberg/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0b0e96dc239dbc8f3f3e152e3cf1a99856ee68911a5f73624c5157cde7ffdeb +size 4192 diff --git a/project_gutenberg/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16da7b30d4cef4d96fa22629f22a06fb02034c0a --- /dev/null +++ b/project_gutenberg/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1da1ae24ab1edd6db94543e0e62c6492506b67c3f02f4a43ec69370e147405a9 +size 33554672 diff --git a/project_gutenberg/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23b8e945b705b97179ed865276c674d62cd96c05 --- /dev/null +++ b/project_gutenberg/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8918a79bb78667b046bb1c7b8b0df5232effbabe67b05fe9ab53b5295c5ad784 +size 67109160 diff --git a/project_gutenberg/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae8daaa5a1df6751d78044565304fb04cfc5fec6 --- /dev/null +++ b/project_gutenberg/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:194ca9b2140191053ea7f396b5803fb432963cc55c7afa756d8aeaebf29c6faa +size 4192 diff --git a/project_gutenberg/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dafd5da55ee48338da2e20e24de19ce46bd15fc8 --- /dev/null +++ b/project_gutenberg/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c327707878d6b393163e0fd3fca79286415f6210e7f910628b2cedb5898903ae +size 8388848 diff --git a/project_gutenberg/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00027739eed0d2189c7badb50bbbf73929267623 --- /dev/null +++ b/project_gutenberg/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a7e8ac14e464d4195a473acb23ffef1a8e6ddb16edb19bacfc7a64635df89d4 +size 25166176 diff --git a/project_gutenberg/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2eefab59f61f017d89e0671b438a71e262edfe6 --- /dev/null +++ b/project_gutenberg/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:377c72df1a1300af1253c01b1c14abebc84c677b6c4f09c796451e4899580900 +size 4192 diff --git a/project_gutenberg/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29cb93573d12245b99d17a00fdf3e0bf3ee1a9ae --- /dev/null +++ b/project_gutenberg/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34cabe05bc66b5bc60be495f5a5fb1f2296789badd8be2679c0f7cba46e3a3bc +size 33554672 diff --git a/project_gutenberg/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7746cb08df939bdcc07cad78521cb11978d2b7e --- /dev/null +++ b/project_gutenberg/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a83a6e38aeddd95f0495a0d120e86c09c207fa835b7a62f89cb9ab3ddba0af7 +size 67109160 diff --git a/project_gutenberg/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/project_gutenberg/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e70ad68855e68f2eac47997037350066570a2a74 --- /dev/null +++ b/project_gutenberg/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30637098bc3897b15b61b7f86a1ce1964fdd5f2f190b591273089118adc53837 +size 4192 diff --git a/project_gutenberg/model/final_layer_norm/pp_block/model_weight.safetensors b/project_gutenberg/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..781761d418e09e18eaa9948225795a0888a5fc7a --- /dev/null +++ b/project_gutenberg/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4674b765d7659da1003e770e8569c9782ce645ae4ae5d727788f038d7030811 +size 4192 diff --git a/project_gutenberg/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/project_gutenberg/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1515076cd07cdaf2102f64ba51cee222a192f89c --- /dev/null +++ b/project_gutenberg/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8186de6bc7c3c1de4de882b0e7836c1c20d241f91e49e93d6cc2588eeef284a2 +size 205914352 diff --git a/project_gutenberg/model_config.json b/project_gutenberg/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/project_gutenberg/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/pubmed/checkpoint_metadata.json b/pubmed/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/pubmed/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/pubmed/config.yaml b/pubmed/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..026c20b671468a46c6b3f791a47e23c703645a31 --- /dev/null +++ b/pubmed/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredpubmed-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredpubmed-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredpubmed-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredpubmed-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredpubmed-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredpubmed-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/pubmed/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41d4d5fe0dd14bd6cfe7b39b683172c7deb972a2 --- /dev/null +++ b/pubmed/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:324ed573fd7f714d0633df0ff4f6d5d95cc42846a068dfb3c1d1570ca35fa460 +size 8388848 diff --git a/pubmed/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1c18e0e51457a2370107f731fd3f47eaac9a72b --- /dev/null +++ b/pubmed/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d160acf006716b9b0277fcbf1f304654e933ddb225fcc93cff4937e1d05fcf6e +size 25166176 diff --git a/pubmed/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12507865d7a232ea7a8ee8d06268311e18845e57 --- /dev/null +++ b/pubmed/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e42493175f3b8dc321eda3f94e0d40102b4b61fde3ea111ae9a8705db3d204 +size 4192 diff --git a/pubmed/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1901805fa838564f4adea7dac7a1a5ad0830b1c --- /dev/null +++ b/pubmed/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12c90d2eef56bfe2c6ca025c2348e2345399bf47f9a114598062e00e5d2acb4a +size 33554672 diff --git a/pubmed/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b52bac0ac66ed70336db4f0a9ec49866f007d396 --- /dev/null +++ b/pubmed/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fa2953fcbe60014ed562171fba67881fb8575096e3585d08f3e3b9f78f91aa3 +size 67109160 diff --git a/pubmed/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab06aecc89b5db5953156d238d813bc5b49322f7 --- /dev/null +++ b/pubmed/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad7798ad63a44669c4324349e6d5b8ce3442a69cbac6a09a6a984b97187611ec +size 4192 diff --git a/pubmed/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32983ffc8919162afe39783fcf3fd73ec1315444 --- /dev/null +++ b/pubmed/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5834f0735fdf4a58f4e958e52d81abd19d8737ab796a6ae92b10d58feb4d0958 +size 8388848 diff --git a/pubmed/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d9ea703351e4d0b1849b5e0298a507249462e54 --- /dev/null +++ b/pubmed/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f5faab45da347ac89c5fb5c32481fa206aad61ede2641bea6a875b251dab3e1 +size 25166176 diff --git a/pubmed/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4640458d86fd25dded6b4292dd8d237548043a8e --- /dev/null +++ b/pubmed/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a978c2162f9b0b191ac16b9f1118d9697c5af7f92f22b6437b7324dcc2b392 +size 4192 diff --git a/pubmed/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c327ec9e3df9aca618a05c97f4a98daed499cd00 --- /dev/null +++ b/pubmed/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38d58fa90fcb5e0a3b63368d3a8e3ec094c219a71a7d23a03e69450471ae7b54 +size 33554672 diff --git a/pubmed/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47262baf61c8c80314ba76d1899c583404d14ac6 --- /dev/null +++ b/pubmed/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d58cd15c73ed3b422e76cec1b33cc39db25a1a001c604691ec031f98026bdc9 +size 67109160 diff --git a/pubmed/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55cc767ee2234e1872bd5c7dae1ba85e59df6719 --- /dev/null +++ b/pubmed/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e24cbe5f2919033ebf4666604e474eafa832b26ee2c841b0cdbb670ce745b09d +size 4192 diff --git a/pubmed/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b397cec7f35e78761fec7e94798e98f22521f91b --- /dev/null +++ b/pubmed/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aea21ab83fc51971a6ff40e5e71cea12e920348c2c6b668d859c408a3f6bde8 +size 8388848 diff --git a/pubmed/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91adc09c8bb2ffb946e9aaec5d86f91a29911973 --- /dev/null +++ b/pubmed/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dbc7fb1815b2a7e6abc04d47d1ed7de7022fed13cd3206bf9648527ae9ef797 +size 25166176 diff --git a/pubmed/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d26620d229128f1c1f5c0bea2fbb7730d662bb03 --- /dev/null +++ b/pubmed/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecebdc45c08107e4e9bc641045da20bd9fb1a861559142c509bf34536ae7b025 +size 4192 diff --git a/pubmed/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56b007973ade0390f8474357c918d8b47f8dc0ad --- /dev/null +++ b/pubmed/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7266df1f91fc5c60abbb5107467bb4adce3417024e5dbc366573ff3d1745b12a +size 33554672 diff --git a/pubmed/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..703320d45b107ed3f0f610d106115ed65225e102 --- /dev/null +++ b/pubmed/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6944309b3e60c8815d3fbfab23d76847e4e3a7946a7d88c328324162b7c17ce +size 67109160 diff --git a/pubmed/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc991845c3d6f3e00a48162045a0f3d6d609ac5d --- /dev/null +++ b/pubmed/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7cc6d4ea471df49b00a7408244744092b9f41410f17e6bb85a7c8c2d69b59ca +size 4192 diff --git a/pubmed/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01087d5fa55191b0681b849bb916782aef42e5e0 --- /dev/null +++ b/pubmed/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d788e79c6efe5408abfc4b01e13448f6c446142c3ed0a4a78f11ec62ea7637b8 +size 8388848 diff --git a/pubmed/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f04c6675e20e3632ffeb2eb6f16ced59a33574ec --- /dev/null +++ b/pubmed/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d93882adc4ebb56855300612d7b3f8a2dfd4a5678028ca2692848993841f3f3 +size 25166176 diff --git a/pubmed/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2113e417e474d5b5c7f7e03dd02157d3cb750da1 --- /dev/null +++ b/pubmed/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b1b0484d3327af98efa76eda6bd7cad4d9413ba40cc68379b0debb8eeca7bc6 +size 4192 diff --git a/pubmed/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1ac121e93b11d2ca741a8854cdb9869ad83826e --- /dev/null +++ b/pubmed/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47e9f1ac0131612353067696f89778bccaf4e8f4ecf56f83cab25ec0848ceddd +size 33554672 diff --git a/pubmed/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ead55d2b568d8da15e1e3b09b7ec110b3a251b3 --- /dev/null +++ b/pubmed/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a79825fb57f3fcd920ea9a0b4b013d304ba21b9bda63166fab1e70a80905f4 +size 67109160 diff --git a/pubmed/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ecb096e55aeed1abed95e8cc69622c5f725243fe --- /dev/null +++ b/pubmed/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e92947d9c27c9d7282f746afc0aaa080a3391da3a05166b3b4d8122e1bacef99 +size 4192 diff --git a/pubmed/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b816b7f1267b4d6db574a5b6cab9e828e8eb00d5 --- /dev/null +++ b/pubmed/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466317e9c429117ae6318693723cb1c8a6448febfa6a506fe287204be46d4795 +size 8388848 diff --git a/pubmed/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78352b6f31f44e187038e2e54614e95ef41a39f6 --- /dev/null +++ b/pubmed/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40ee5d95b08e52ca6ef4063f79a646cc45b8621492e1f72f1861d87bc3c18ff6 +size 25166176 diff --git a/pubmed/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8833858494532bd3e9658998a397ae66d832e9e6 --- /dev/null +++ b/pubmed/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cf5c196a188a524be1d99942140023fbf4ed757bbce6eeea8044a695d11c078 +size 4192 diff --git a/pubmed/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9e1ca199450124f38ed1f269f3be1ff081fa9bb --- /dev/null +++ b/pubmed/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee25721298916165bc12e061ba2eeb7806c5891c537126a0de5cea861982e9a5 +size 33554672 diff --git a/pubmed/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0234581dcae3345c337aa0e4bd67ed8537468eb --- /dev/null +++ b/pubmed/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83fa4c40b397a469a3f41aec5f7a9d02b54c99aad21b1f4414e135e57161cc5c +size 67109160 diff --git a/pubmed/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a65bdabd8a7b53e1ce5df9285929adca3a823e4a --- /dev/null +++ b/pubmed/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:312170ebba565388f8dcbfdc8cfffa85c409c6dc7095eb618b42886bf0d7aabd +size 4192 diff --git a/pubmed/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14fa711f08e5465fb32a246a13f9d432f70a40bb --- /dev/null +++ b/pubmed/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4950aa932005517416b53d59d884c8afb4454fa23bc0905b51d16e9682b57980 +size 8388848 diff --git a/pubmed/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1baf807f034e8310d7837659072df11e6e35f61 --- /dev/null +++ b/pubmed/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:566718c9831d67a6f301ab001270878d333545afd6425f791cec0e1dfeccad0f +size 25166176 diff --git a/pubmed/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbcf8814b321307a55d2c34e0c574a18cbd172d5 --- /dev/null +++ b/pubmed/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a7a637f6d331427c163d1137f18ec836645f10405e0cf9fec49812fa3d75d95 +size 4192 diff --git a/pubmed/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dba469599506a6db5ec0abe91d7b6a8e64397197 --- /dev/null +++ b/pubmed/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39ef5d00e29c5e0a79e5166f32b8383e4f554f50e6368a8cb4695c872c4a6775 +size 33554672 diff --git a/pubmed/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c02d76b50c7d48f8258426c5127dab7e6d021f4c --- /dev/null +++ b/pubmed/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:440bee9d0894ff76da914d88820c5dcd054716fdafdb5f46805fa9c513e77479 +size 67109160 diff --git a/pubmed/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78094673e1af8441d610575079ba7624d73064a1 --- /dev/null +++ b/pubmed/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18b02acad474a2aef9d6346346f5fc3d37906da5e61e082cc87774d7698b6a3c +size 4192 diff --git a/pubmed/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8138fbd1944cfd99deaac17fac764d6cf2dd7ef4 --- /dev/null +++ b/pubmed/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f488ac93966dac1d9d91ba0abf2f78c24045172728d4f4ae24ecd5b21471fe03 +size 8388848 diff --git a/pubmed/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e95677616373f5a7d81c6c7fd9cd31d5289f2b6 --- /dev/null +++ b/pubmed/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e39c952d4107fe0235840c8493ee2a2a36b51999a83f9447668648984a709dc0 +size 25166176 diff --git a/pubmed/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c143a6544316ed246f1b44099a70bd6f7ef1847 --- /dev/null +++ b/pubmed/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:112d3f981998a2dd9ba106d1e2b143b9414ed48d07609c83efc7078e511db768 +size 4192 diff --git a/pubmed/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db99454d9db05917cd5801c7c77f6577a94a75d5 --- /dev/null +++ b/pubmed/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b50d1ba95717d3fda5d3b9b20918bcb1fbbd6e9626464cd3afc37975e112fb31 +size 33554672 diff --git a/pubmed/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..311fd7e1e4115d719b4eb35164db233f8c5d7ae7 --- /dev/null +++ b/pubmed/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88bb7d0cf309a1baf47154a75d1e12df935cb51f7e06ea3a7294bb56b6e0cb72 +size 67109160 diff --git a/pubmed/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb17c4d2dfe4ff7bda211a1aca491c6a3f384f3d --- /dev/null +++ b/pubmed/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6149fcea9ab82e71e015227b926c49f136e8d1604c7c275f7c50a11e1faf8265 +size 4192 diff --git a/pubmed/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57b8f2dd352820c32bb26c24f390dc909ca8e512 --- /dev/null +++ b/pubmed/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11455b0ac79ce0738797a2c1590cb896f98782f333736f86fb1f9082fc03877 +size 8388848 diff --git a/pubmed/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..439a95aa78aeed06f09fda3734d1a8c6f1e8209b --- /dev/null +++ b/pubmed/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee51a60e475844c359fea23802eeb68378f40671d688d09c98eaab9d4b95f3e +size 25166176 diff --git a/pubmed/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a7d17510f213d21a4269a33fb4b45f041f42e62 --- /dev/null +++ b/pubmed/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a04808c3027d230bf1c6e1a4a77f6b4dc90c03d7e2c24d4320a6cf84bea52d9 +size 4192 diff --git a/pubmed/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..233623e714cbb66821205e8c293dc4da90b1575f --- /dev/null +++ b/pubmed/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:895d5a86c2c7b4d2f8594381cecb2c54df4708a498b46a6bd912da72261f09ce +size 33554672 diff --git a/pubmed/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..772799810b028d898d5fe81ed13cc9c2f9c1864b --- /dev/null +++ b/pubmed/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8335330dafcd232b51bdf8618fe834f509889e87f15f637d051076401342b628 +size 67109160 diff --git a/pubmed/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3c02b1befb686ae46b74a6bf5ce55a89984b254 --- /dev/null +++ b/pubmed/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3dffa56f8c7f6c0fe34d69aa5a86b15c9652e4ff6916099032c71fa25778418 +size 4192 diff --git a/pubmed/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..adb3f53c805b4bec57d5b9f441e42b1169fe1f40 --- /dev/null +++ b/pubmed/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:461575676a92ca8229994f93cdf148e57dd5ec932376d21ddb6bed5ca4b117c7 +size 8388848 diff --git a/pubmed/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6012b8d6b17819899307c05a1cda5794d6142777 --- /dev/null +++ b/pubmed/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c5318893abd379ce36e2dc67ee4e5ff146b521d300e24a4157951c786474669 +size 25166176 diff --git a/pubmed/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf8af078427c79c994d30ac347f3dc39f55fbe80 --- /dev/null +++ b/pubmed/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b53e6e250476e258d4b0139d9b69a76ec4fc9a717c25b0243d31cd2e4b58cd0 +size 4192 diff --git a/pubmed/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3692da2c192cf1692ce0cc03c08c6909cd7763e9 --- /dev/null +++ b/pubmed/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60ceb29add3a99d913992b3af1f81009ad2ae3b1b05ef8ad3a0ebf1f06b52927 +size 33554672 diff --git a/pubmed/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ed5218ea99c1d97867a869e2071dfd8f5b2cd1a --- /dev/null +++ b/pubmed/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d8cb4a5bd1cb51bb197bb71c93736c406957daeebd8a686a98a861bf0fd7f64 +size 67109160 diff --git a/pubmed/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1caebfac6bd591293ff58b6671b0c0bea59b9036 --- /dev/null +++ b/pubmed/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2d6577553a7a3396d28512e1d4c2c10ab70e7f8c09d376e5792298f5c9c2b67 +size 4192 diff --git a/pubmed/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8a3e82272f488c43209761be11621f1fb82187e --- /dev/null +++ b/pubmed/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93967969ecac4b7663db7cbf3229dd0c3b474e7ea539a50fc2a98ad9f44029a7 +size 8388848 diff --git a/pubmed/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25b3883af5f00b4f59576cba20871c9a35489150 --- /dev/null +++ b/pubmed/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91a709c644733b9c2fffc1634831d2be6872d1fbefa3094e87d17ad5eb2161ba +size 25166176 diff --git a/pubmed/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3602b1751a56fe897b6babecd80649efa061fbd --- /dev/null +++ b/pubmed/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdc1663ee126b5ea2822ce3d1dc6cf968ca23e0f4cb988b57909e25c25d8503e +size 4192 diff --git a/pubmed/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1329546e1bf7cd628233378c4f1be8ae7bbdbc5 --- /dev/null +++ b/pubmed/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eca9d81f753e955c88e0d65a76d951a361349b7c577180ee9a0383c13d9c7e82 +size 33554672 diff --git a/pubmed/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6123733772afd440895204209854489534287d3e --- /dev/null +++ b/pubmed/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d4b6c5671583f19b2bbe88f8a89eb61bb9801946d00b64a36f665df741e1c3f +size 67109160 diff --git a/pubmed/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..975e6fdd1701a4be9ea1353b83df4102931d7fb6 --- /dev/null +++ b/pubmed/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:950a93bd62a41e9b46daf2befe3f9a322ed3d77b22b6a738c4bda3c572d983a9 +size 4192 diff --git a/pubmed/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a027a0900d5643eb62912439d26111fef3e3870b --- /dev/null +++ b/pubmed/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb0d3d406daf7451f268f82aa74c374957027d5b4a6ed05508b6cc43dd230582 +size 8388848 diff --git a/pubmed/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40725a2916898ff21726463cd0caf377c4b8962a --- /dev/null +++ b/pubmed/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2107f64f225229314b1f7c650b0e8ed83f51e37e822aa4bbccbefa71921a899d +size 25166176 diff --git a/pubmed/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ea59e65fb7d4e3f1c45b69fb34233548676a1db --- /dev/null +++ b/pubmed/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a7f8472f3bbedf88611e8b86977aed0194ce9644e679a494e21c946f33f80ca +size 4192 diff --git a/pubmed/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..160dedef6570e44f0efcd7ba4e1f83d4c5385615 --- /dev/null +++ b/pubmed/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:226bcdd87c3367caa4ca2540b13976625fc58a8eb3927e7282f46cafaa1bebb9 +size 33554672 diff --git a/pubmed/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6979522d481b0b59cde95b1cc098bd879e71038a --- /dev/null +++ b/pubmed/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b97291a54ba0c53b610ffcb90e042dfff809ac7481be57650d1f3d32ef065dd +size 67109160 diff --git a/pubmed/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..655a5a6c1150fd4429e0a9557cf4565108d971fe --- /dev/null +++ b/pubmed/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6760270132f0769c1d7ed0fa0b7d72c8ae3b90e88e05767c187dcafaf249565 +size 4192 diff --git a/pubmed/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b67056fcdfa214be127d68a4489cfd72daacbd49 --- /dev/null +++ b/pubmed/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:147cda8131f8de6d7a2ce9c4b6f6e12df5184f9e57a6f2e4360817159686ebdd +size 8388848 diff --git a/pubmed/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3eed3f2cea1e271c43fe8c207a4d10a547101b85 --- /dev/null +++ b/pubmed/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a9e46052f0eb4f1361de3b650635b44770f1415b5be9e8c73e1309b6af491d0 +size 25166176 diff --git a/pubmed/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdfbb8f10470909da223d32efb5ef29b9990e083 --- /dev/null +++ b/pubmed/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec4a3fde3248c1bd7be300a114236f58ea3bf4ae3324e5c99ec1ee3e71c77519 +size 4192 diff --git a/pubmed/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79282b966c8dd2595532aa79e34dbfa3fb03b2e0 --- /dev/null +++ b/pubmed/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5dcd3e414300d7cb2ce6263555a089ecb66826db106a3ab10fdc966297e853 +size 33554672 diff --git a/pubmed/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cd38a6c3d548caf31d12ab849a0b38eb46ff383 --- /dev/null +++ b/pubmed/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0204448993d44980ae61f78cfbc28eb0cf825e29136209a307f7bb869fe2393c +size 67109160 diff --git a/pubmed/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b92ad2a8a01ea8d8b57e2c9c4c97d8a103eec27b --- /dev/null +++ b/pubmed/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70eb34da65239fb7ef99c5aa4692df8056290ed317d255dcbc1ed22f07270591 +size 4192 diff --git a/pubmed/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33bbeba1ac42b83a8e1328a2386e9667bee2e384 --- /dev/null +++ b/pubmed/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:269911bd31ffa61c22176bde3ebe419cf64d613a67994171816a9133b4dbd7b1 +size 8388848 diff --git a/pubmed/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21155b856e2f591a84439ee4b51b83befa7c98b9 --- /dev/null +++ b/pubmed/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39285a3d13a121c6f658c868220d8bcd9a10a0663762015bad775660948d024c +size 25166176 diff --git a/pubmed/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4bf241adea6f49f83e4728c1a3ef6c4e262d24a --- /dev/null +++ b/pubmed/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6212cb427c28825bc35a5b5ce9db3c9796c7410a64dd6d3f86dd1eb7044b02 +size 4192 diff --git a/pubmed/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49624545d3964d457ccc4750796532f49520e7f5 --- /dev/null +++ b/pubmed/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf173497d3f7c6ef1eb05f414d735db298a1664b519e029f45ccf841eae3b5ab +size 33554672 diff --git a/pubmed/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4316439b9e8183ff04bcbb68d0410b3566f6b4c3 --- /dev/null +++ b/pubmed/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f4a487e5f82300f56f9bffba470e8808e84e9995095b1c617b0aa35074dd6e0 +size 67109160 diff --git a/pubmed/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a511f69bcb6f01d26b8b8c5546028fcd3dda072 --- /dev/null +++ b/pubmed/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98f55015ab0cebbcded0b626d8f47a1eb4063bf6273943742e6e72d16497c287 +size 4192 diff --git a/pubmed/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d324d262bc372623805f095c375794107ad4ca16 --- /dev/null +++ b/pubmed/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e83f61eb1c1cc219778ec9c7f2903caec1620ae38849af14fd9ec43b18d63ea9 +size 8388848 diff --git a/pubmed/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b7d180eb16aea4fc96e12030ccd924f35d94c3c --- /dev/null +++ b/pubmed/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb63e46d52d1f21e4fcf0d1bc745b767028276018bf9cabb316f1e7c7562724d +size 25166176 diff --git a/pubmed/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90e74b89f72c1f5c461c7db1321b0bfdddb3357f --- /dev/null +++ b/pubmed/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bd5678e6514f0a138df0651074173354e78577c303347479fde06cba23b3c9b +size 4192 diff --git a/pubmed/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1aae3f9ab1dd26c669a143dbd094199ebb700270 --- /dev/null +++ b/pubmed/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:488fbec66a2f2a45dbcdeeba2f93683f8e29fc5e5aa1e2d3b764ca857ca50852 +size 33554672 diff --git a/pubmed/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1980b05ae4eeb7f8307daa3eb5dfe20eccd5b98a --- /dev/null +++ b/pubmed/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7db539e8a7507d7124ec63f9e9cfce4c9a842311d8d3a362d3a04c023bb6e12 +size 67109160 diff --git a/pubmed/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1560d424412c63cd7911d3543b433f24fcc6b63c --- /dev/null +++ b/pubmed/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82fa5fc5b2887eb7ed734875d95dd4c2b778c7a8083c3092911a00676662ddb +size 4192 diff --git a/pubmed/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d54ba1d8ec1a42f78f11bf785bf811e19a7120a --- /dev/null +++ b/pubmed/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2357386f51931d5b7391a1f91b8c7e39e4eec735dc071ed24a881fe22794ff +size 8388848 diff --git a/pubmed/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7570b0ceabcceccd3c12a49bb0b03a57ac96bfa9 --- /dev/null +++ b/pubmed/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df9e870446b583211c411a2be440fdd89e90e7da1f6d5825933fec16707b0af9 +size 25166176 diff --git a/pubmed/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..184a5fdc96fbd70d6de06993b186c027f0bb8926 --- /dev/null +++ b/pubmed/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:723bacbfaee066e8e4b206ac6a45476d59e24283734382d3fa1631c34ee7727f +size 4192 diff --git a/pubmed/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d80605322eeedca65e17099268899329968967d --- /dev/null +++ b/pubmed/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39a4dbda4252b17376ce4897010b51ae25c36de15a83026c080e14a2c7e378e5 +size 33554672 diff --git a/pubmed/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..666d7522fd5d453c396b660a328b81171dd0a0c6 --- /dev/null +++ b/pubmed/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6eb8aa492a3a47962458cba875db0185a6137e1cdac8fc09a772b3abfdceb9e +size 67109160 diff --git a/pubmed/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85a4258b192f6f9f70424ea6dbf8e00476270171 --- /dev/null +++ b/pubmed/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87730656c5d259690215f6dab8f67d53fcc9388a95637e6d34419d5f1fb43d9a +size 4192 diff --git a/pubmed/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7248ce1a53a889c01695e3c6620f0ab636b7b350 --- /dev/null +++ b/pubmed/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfb6bfab19aec92ca48fe4f3751ab3f52bdce4e3711affeb96295695f3b71a57 +size 8388848 diff --git a/pubmed/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..453153183fcf69418aaa4eb34f97ef4e3fbc7ab6 --- /dev/null +++ b/pubmed/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4152a5ef7d423b8b59eee2f8358e6de06b5814d89d35c36a5656cd0a1778cc97 +size 25166176 diff --git a/pubmed/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..926c47f5fe0068f4d6b182a62c7b67b4b6e0fbc7 --- /dev/null +++ b/pubmed/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30121ee5b99954a1875a955b77431d88eda6650786c1fc81d0a5b3e6c72667be +size 4192 diff --git a/pubmed/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4458d589545807ac7532fbdef3c6419625c7fa53 --- /dev/null +++ b/pubmed/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be737f95be529e9473b599530d186eed6275dab10cca81333d7e4f8d900e5dbe +size 33554672 diff --git a/pubmed/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e75bbbddb9b5801b090e77a8678f43b7a31e69eb --- /dev/null +++ b/pubmed/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e143f384aef7678cfaf07b92645c7162db1223143218b13fa7c2fd1ffe39d4c0 +size 67109160 diff --git a/pubmed/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a892f8e7e899143b845632e575512757e722824c --- /dev/null +++ b/pubmed/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5b82d76e127862535d037c28d60b714ac8b51c1c5f3ab90dd8ba704093557c4 +size 4192 diff --git a/pubmed/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11391a5b13b1c29aefcf46e7ad77c372b7045c13 --- /dev/null +++ b/pubmed/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57fcc8029abb4d992972d90633bdaca2f68d1ab3bf3a8a97939c635ad04e6be +size 8388848 diff --git a/pubmed/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ad8d574ca12075721c5e7fe1f2063a23a3faf2a --- /dev/null +++ b/pubmed/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3c23f2a795a7edb54141fe78a3186b454a1378e8e4c719be82d0caca42a8c7c +size 25166176 diff --git a/pubmed/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d62bd62737b109cdcdbbafedf82291865540a1af --- /dev/null +++ b/pubmed/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b67c5083fa142753faebb96db94ecbb5d14f8158fed2230e446f28e835565b4 +size 4192 diff --git a/pubmed/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d92fba52da291708b8f53a1c424735c55093eec --- /dev/null +++ b/pubmed/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:013306917c22fde5190e5987e7da5a95c08eaa6f9ef75a0934cf99ddf54cfdc0 +size 33554672 diff --git a/pubmed/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66b44881805507db471a3a6ada77718e08ee5f82 --- /dev/null +++ b/pubmed/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeac1863993ecec555c9c1d8daaf1dcd05b303696187382f6f77e8079f457bcc +size 67109160 diff --git a/pubmed/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33751e52e0df518a82a1b8ce26ada4d5116c4ce6 --- /dev/null +++ b/pubmed/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22dc06b39c114ed047560cae7b0e8adbbc412571af2b53a38d882e401e0f7169 +size 4192 diff --git a/pubmed/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81b5ecbd3305b2dc75c4ab744ecd5f431e5f18ee --- /dev/null +++ b/pubmed/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fdfe4e44ee8a6cefbe32200f61f9e6dfe549cc554fc229f3bc7692ede159ab3 +size 8388848 diff --git a/pubmed/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9dc69ddca554338630def2f5575944b1523149f3 --- /dev/null +++ b/pubmed/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bae27044e199ffa8aa2bee4ebd7523aac4d2d664550aa75f2cf9f11489d32e50 +size 25166176 diff --git a/pubmed/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9f220512687f6c9aeda23257f4a1b658905da32 --- /dev/null +++ b/pubmed/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e60e3ec9156b69776cd6a3ce6ec145a741c1042353962318ca34b3df9dabdd57 +size 4192 diff --git a/pubmed/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..917b7de2396c633d6ab218fd492c601970693616 --- /dev/null +++ b/pubmed/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe6eea7ff6b9493c79de2477cf155f46a17171563a6e1d32ea7974e499b52fa4 +size 33554672 diff --git a/pubmed/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..87a1644cc79fc2424e74a8a5fdf3dbe969ea87e4 --- /dev/null +++ b/pubmed/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b420cbb121fe450ef87785b0edb5f823512c36dc8ec9563455a0fc77622c459f +size 67109160 diff --git a/pubmed/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c2974c655618b47f3d5e5f603edcefb3ef1c413 --- /dev/null +++ b/pubmed/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f77cab8e7cac615ccc684a2ccca05b9be5132eeb45382d80360bce2c4e88844 +size 4192 diff --git a/pubmed/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b34a3323dbc4d715a9951f8cc03c8d875b3f2656 --- /dev/null +++ b/pubmed/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbf68af7e6dc0990a5c6ba8c41541f9b96444af6aafcfb4dd85b9e36c87715c4 +size 8388848 diff --git a/pubmed/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc21fd7a7bb5d84bf65f8a397eef8dc89cedf569 --- /dev/null +++ b/pubmed/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42ed0fd6bb53e82d53387349b64a0ee282ac736503c0831b6a0ab1c5b1ce00f4 +size 25166176 diff --git a/pubmed/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed2ee2758608c490e8bcc48c9eb053bc8e1fa0a2 --- /dev/null +++ b/pubmed/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c48ce588e485c41583edfc5439cd4384639167fa5a42c09e34e2b3a58f1751a6 +size 4192 diff --git a/pubmed/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b655399263c146e33013c250d21d8eb26e62d78 --- /dev/null +++ b/pubmed/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98da304cb0800ef0584e7025d857bc17fda4320671b7de0bd1ea7ca94575d75f +size 33554672 diff --git a/pubmed/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92cf2aa3c0a74a2b1138912d48cfdadbdc4916c2 --- /dev/null +++ b/pubmed/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a89ef96d888719ec6ff1d6e6980328d751262772ca46b6dbe84b710e7f6a2d9f +size 67109160 diff --git a/pubmed/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52065a276bb25c370ca25abca135debb2450ab00 --- /dev/null +++ b/pubmed/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb71d22a510e8afbf488ce5c147e31b00834750fd0eb45750ebfc0bd211f24d6 +size 4192 diff --git a/pubmed/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32b07fc854ac996772c50c7b0267f1c6a81c3f58 --- /dev/null +++ b/pubmed/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90501efc16876e7cccb6b0c221802e6baeaef7d0d65031ac422d65fec0cdb969 +size 8388848 diff --git a/pubmed/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01bc4834903cd17f0bac42eb1532ece2bea72acd --- /dev/null +++ b/pubmed/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eae975868029c0112b9dc01adf7553ec389952514d6c9c1020c8b383d005710 +size 25166176 diff --git a/pubmed/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e6e3f6d7ed41d2405f6e4f40ba6739168af4725 --- /dev/null +++ b/pubmed/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f47633a38e4ce025c74e2d1c566e129a3d6054a83908c8bada9efc484d4671f1 +size 4192 diff --git a/pubmed/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94c95c925dd80f3e7cc476b71f322326ff1c617a --- /dev/null +++ b/pubmed/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c82f7b9de0b69ba7a172932c9fe3f7dd811636d75bfa74b4f8fe39a90099e16 +size 33554672 diff --git a/pubmed/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02c0dc55f442dd12239b9e112b7aa9e03a85980f --- /dev/null +++ b/pubmed/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d875762f6756daf4136e64f206b4c3fe5f694d2727890565ea158092a32767c3 +size 67109160 diff --git a/pubmed/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff35619e3cd1f80cb575c095ad2d2c8af43a5c7f --- /dev/null +++ b/pubmed/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed15189b9414f8b6e0e0ca804f00f1f7b86d83f09435868f1fd4daa11f0d66f5 +size 4192 diff --git a/pubmed/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d30ab0601238f0a5a248c7084cb676356460d31 --- /dev/null +++ b/pubmed/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4f4545551f8c806a7a9bf582a67894399e9006586d086fd4f1c57909a418095 +size 8388848 diff --git a/pubmed/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75388b91ba37bc4fb63a29c4314145757a5fe535 --- /dev/null +++ b/pubmed/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc2dd039660944213f7f83e8b592d513c9402d4f858cf52d16034bfe55a93cfb +size 25166176 diff --git a/pubmed/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60eb59cf3dbd51acef16c631ff3da670b2c78af7 --- /dev/null +++ b/pubmed/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f3919c9f7a5398542b62b880dbcc0a310be1a0edc0ad4e1c04541b1fc9d875 +size 4192 diff --git a/pubmed/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19f61b1d3ca6dd309da88aba3b5e07954505b885 --- /dev/null +++ b/pubmed/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:126575b5b3a64110522d26406fefc0110632b5564d93123d5fbfb1876e025bc6 +size 33554672 diff --git a/pubmed/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d323d1e3a4723235555d24c4c8ad34c3e437792 --- /dev/null +++ b/pubmed/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba3a4ab6075c78074272e7d239050ce5c6bd9f62d262a7dd820e708185daf14 +size 67109160 diff --git a/pubmed/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd7ca07aa6b0c60111c836ea7a2424962e89a786 --- /dev/null +++ b/pubmed/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ab29f85408549d61c11f6bf8288ce4802d10e289b98df9a228457c3f69206ce +size 4192 diff --git a/pubmed/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71cf2cd9901491542bcae5dea920aaf6dde4a7e8 --- /dev/null +++ b/pubmed/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f75e86d731a239118fd463ec3f934d2d3f474284b1ea18915f98c43f762da0ec +size 8388848 diff --git a/pubmed/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dad113273d67bc5021bc25297e731f1b23fb5c29 --- /dev/null +++ b/pubmed/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6098df7ff99c739fa4ac5f462000d20b196ce6db1ad892ddc004224c62b0d7 +size 25166176 diff --git a/pubmed/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6ebe940f288f995922b913c82d97914d89be9bd --- /dev/null +++ b/pubmed/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8bda102ebc848f38b6580084c635ccced0f3ef066f4a178a25c35996a4f5663 +size 4192 diff --git a/pubmed/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2f3b0d820d612ee33fdce931aac20b783965869 --- /dev/null +++ b/pubmed/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373346315b557c8d3e2e990f7e72a0a834f5324a22ebe6e79100e95994e95367 +size 33554672 diff --git a/pubmed/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8e1292f858b0661e2b1d555a6d51e9c068e32a3 --- /dev/null +++ b/pubmed/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec4b8c2d11ad35f0143a8603da8a109e88cfc7cce6024207ccd92dd5867135c0 +size 67109160 diff --git a/pubmed/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1f28ee9957b41e174a4f0a3e2d454adc82585c2 --- /dev/null +++ b/pubmed/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2bc0958b4bc2e069ae4a60d6b8f1c22225703641edaf39a3f9b8e21a06132c1 +size 4192 diff --git a/pubmed/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47862cf59808db24f32a5e55f69bef4b823ed865 --- /dev/null +++ b/pubmed/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab9e18aa8adbaa3f37354a14918ebc2e90c591b0b8e39eeb4ddeef77956742e +size 8388848 diff --git a/pubmed/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0be6f824c2f4ac9b250e288199a73ed4a587755 --- /dev/null +++ b/pubmed/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dce09ab50ea55f41fab20e5a66c3907f2eef8b1aa85114f5f0ed5a49ba59a78 +size 25166176 diff --git a/pubmed/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f7903b08a6e4a2a486f5e27e17ac447f5b2f84d --- /dev/null +++ b/pubmed/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e86addc0e5f66a65a048feaf7ab144c78f2a53218a81fbd9e98f6ed032a158 +size 4192 diff --git a/pubmed/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c330bb748fa53f42ba5bcbc7d5b99bffe2928b8e --- /dev/null +++ b/pubmed/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b11b59727799556c274f4eb5fb58fcb2997c4eb20ac46d6382c02b6089a6b9 +size 33554672 diff --git a/pubmed/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d910e1691658290ff5eab7b8fd5c815f0bec53ee --- /dev/null +++ b/pubmed/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d416331201570e863ef24c957fd776aabdc0379ace412c4e9eeca4c9e3663c77 +size 67109160 diff --git a/pubmed/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdd993d35803629a065b3e30cd82a96f8d266484 --- /dev/null +++ b/pubmed/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:804abfa13dff6618eb2b31d5f7c7ed2fea0ba2f920bb761fa653caa0a4a75953 +size 4192 diff --git a/pubmed/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ae1881618ebd0b1012a9c3459e777e05ee7b16a --- /dev/null +++ b/pubmed/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44203b94dfabac65cb624afc1155b7ebd1ab4d39d738f16a3b1c75298dcf0e88 +size 8388848 diff --git a/pubmed/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88ef18d830a128b93e08cc6ea8d1a3c98716c3db --- /dev/null +++ b/pubmed/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef493e547adecea5e33c8cdf28a00e83e6faa8efdcbfd53054b2eef0c4c5e790 +size 25166176 diff --git a/pubmed/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/pubmed/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae108bf6bbfe91b4f76ebe4d1675fe3c250d4c5f --- /dev/null +++ b/pubmed/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5133a9f76760b77632e8c80e5c0dbca0a47ab9eed36be811dc1d20dcb7e3335 +size 4192 diff --git a/pubmed/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c495d79a92e112de7cb0fbf75325ab5740f38fc --- /dev/null +++ b/pubmed/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:985ef978dfb89c01f48e2e26a29e8b5112d7244fb95bdf96dc4b10af2aab4eb4 +size 33554672 diff --git a/pubmed/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f69839600f02b8b88b85aed0eb1cff4cedd9f38 --- /dev/null +++ b/pubmed/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:673028b5941da2b606833edc16ea89e5d15f571dd19aaef68ad8be32b107b3ca +size 67109160 diff --git a/pubmed/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/pubmed/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..595f20fce09cfd96afda323c7308c1f1ad58c326 --- /dev/null +++ b/pubmed/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebeb3873058d6c0c2d1332877920bd477f785f44417dcca1238d4289f7f97531 +size 4192 diff --git a/pubmed/model/final_layer_norm/pp_block/model_weight.safetensors b/pubmed/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a522b9a576c082ba8d0b1be914b82b7e20444ace --- /dev/null +++ b/pubmed/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e146c78d0094b8cc5faf3a18a3ed0b37cb00573f219aeb201f4b4b376451c25 +size 4192 diff --git a/pubmed/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/pubmed/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90ae623c1fe88d5232c8636bf2a7a2e606661254 --- /dev/null +++ b/pubmed/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e25a07b92896f2ae19ee4be78a34628ac66f1c3ab2f3eec43c6a3de595790649 +size 205914352 diff --git a/pubmed/model_config.json b/pubmed/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/pubmed/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/stackexchange/checkpoint_metadata.json b/stackexchange/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/stackexchange/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/stackexchange/config.yaml b/stackexchange/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38c4a711f01ce014869ab0697d901779c378c36e --- /dev/null +++ b/stackexchange/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredstackexchange-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredstackexchange-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredstackexchange-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredstackexchange-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredstackexchange-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredstackexchange-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/stackexchange/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05a692c119ac8f0b5c7bde7425d07d7f9557b0bd --- /dev/null +++ b/stackexchange/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c00d89c0dad89ed1b51e1941260dda6154ba9896cfce5db5e3dbf5bf23f2b58 +size 8388848 diff --git a/stackexchange/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f8dde4b46334b7d0857da706d08f5e33dd92df7 --- /dev/null +++ b/stackexchange/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e321b80974a7792d89dd49fec6b0ddc356d867aa9e134f8c3c69e724210b857 +size 25166176 diff --git a/stackexchange/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..003dae5fd99e99a6835daa34fa5e739736e9ae38 --- /dev/null +++ b/stackexchange/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d98d6d93ab930f526d36247d156f20824f2fe6836b2adf9a4525a1ef3fe094ac +size 4192 diff --git a/stackexchange/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..addc3619de9185fd1db49005e15d853f9bc5f36c --- /dev/null +++ b/stackexchange/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa46906715a37850c538615d93b10ac3e8cb51ff026b493d1ed1c57f8cbe1f01 +size 33554672 diff --git a/stackexchange/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09d999588b5ffc35cda7c6eabf0a5d0fcee27393 --- /dev/null +++ b/stackexchange/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d7a4249c8dc4ddc7585dd0fd126907c97268d2977261386cb5684777b223e86 +size 67109160 diff --git a/stackexchange/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4fa3dc994b96013b6da233def4bc21d816a28971 --- /dev/null +++ b/stackexchange/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24e16e9cb1bf6a66fb031b9fbd2b853e7596ce9404a85253263e74ac9a504290 +size 4192 diff --git a/stackexchange/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7925d86f47c25e8efea8286eeb290f52a731e873 --- /dev/null +++ b/stackexchange/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:828bd849c39edab4fb98e70425bc01179c99eb2ee9ddd7a61f1b7768303533f9 +size 8388848 diff --git a/stackexchange/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4bdcae7b42929f81fdda6b80b898d6c0102c685a --- /dev/null +++ b/stackexchange/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4569b98047529d08b9c4ec8403d8c16437ee9d50b3dd8611ab2d86442ebb0ab4 +size 25166176 diff --git a/stackexchange/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60ad6d26afddfb6ca163bacb315d06b80ccba458 --- /dev/null +++ b/stackexchange/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b34e2dbaed00d495ef6a8fcff7589190b5f2481d1d6c81b5b37540b2cdf386b +size 4192 diff --git a/stackexchange/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b724b88b326e2a66c853da1c00822772662a1825 --- /dev/null +++ b/stackexchange/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14ab95c90bcd5b8401c32c1eaa1e28d0254ba84c5ceee5451778ebaa6c9a684e +size 33554672 diff --git a/stackexchange/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e134ade47c1b6ec90908d60141d65c32493a4a63 --- /dev/null +++ b/stackexchange/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e94bcf00a53a9aa70219e5433c52d7372599b13e0b7f3c66174ce87c84f02093 +size 67109160 diff --git a/stackexchange/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b25521f6db9df6b126fd3b0bf9c5b44666145467 --- /dev/null +++ b/stackexchange/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb42d33a28c4e3049d5acc95c1a2aaf790549c9d34c01d2de13a3e0699cac9dd +size 4192 diff --git a/stackexchange/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e187df360be32a02f051e93fa6fbb1549e5bdb16 --- /dev/null +++ b/stackexchange/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9036be5ad190a14f1bd54057f9120fba8e66f4140b34b4ceb194633e906cde89 +size 8388848 diff --git a/stackexchange/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9392b219a9c4e124233873ef6237b5f366e81099 --- /dev/null +++ b/stackexchange/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb8d86c39352511a712f9e30cbd271903b08684b83854f28eb48feecdc6d676 +size 25166176 diff --git a/stackexchange/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a06935b2f31aed934ef398615ae9ccd842a15a1 --- /dev/null +++ b/stackexchange/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c50b7cd81c4feb0a02dcfd77df8048b58103d650f4aa75eb504de8d2dd66f6c0 +size 4192 diff --git a/stackexchange/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f28d2244adaa2fc3bd3af94da61449a7b8c4daeb --- /dev/null +++ b/stackexchange/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:271370b7a78a096757bcef32ceb9770670345c765cbe22af74dcd891a22003cc +size 33554672 diff --git a/stackexchange/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e802b79c47e53b2b36edf4847c1ddafec6e567d --- /dev/null +++ b/stackexchange/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:759723c91899a46aa7bc6e927682e8da9e0226bff824ed15390d995e728b7bf6 +size 67109160 diff --git a/stackexchange/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3996a1c4aee1033029d1a996b1337651ebd450a6 --- /dev/null +++ b/stackexchange/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69f36245fa8e8c98256b3ce5b5bfa854f05e94c6675a228539be5d0e3778ae9b +size 4192 diff --git a/stackexchange/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18abdb384795d500ad42752597fe7d94f343d039 --- /dev/null +++ b/stackexchange/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2470a242c499d1f2bd8aafa8ff6396d655a104c086efced94ba8a57ba969967f +size 8388848 diff --git a/stackexchange/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e1169b6f12c801163bfe43cd0dc52fd23ca4e84 --- /dev/null +++ b/stackexchange/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc406d3124e76102e6b403ec1d2078e5892fe085c30a06402ceff747a168ffb +size 25166176 diff --git a/stackexchange/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a7ce3170f1c651e9ca19bacd4aec4e8103929e9 --- /dev/null +++ b/stackexchange/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b440186a950cd6897f534f36ae990c1041ff5f1e2a4a8d74e7505789c2bb8eb8 +size 4192 diff --git a/stackexchange/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b7d83e147337b20fd7c52da5adeefc0cb1e2c2d --- /dev/null +++ b/stackexchange/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6e48ff3e7ee31a7d73d1475d7f2ee95c511e2f60457d54d9a74f6ff88b0538e +size 33554672 diff --git a/stackexchange/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f70fee79cb162c5061f2f894e28ad2f525cbff83 --- /dev/null +++ b/stackexchange/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25b38d158cda87f6809624807c1dd308ef14097a7f961c615ae0ebbef0d6e7ae +size 67109160 diff --git a/stackexchange/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..704dc3dd5ab3edd191905f0ca37c65dcd5ae8626 --- /dev/null +++ b/stackexchange/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93a986d2b0d74af6f2d22fb8c8b8a2a7efa0c7aba3fa59054f73341824fae3b2 +size 4192 diff --git a/stackexchange/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5375f9463be45a282ed133b65bb57936e724b93d --- /dev/null +++ b/stackexchange/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c80e8ddf0b1098941ebd609c5434339379203f56bae5fbc7ac16e4c3ae812e +size 8388848 diff --git a/stackexchange/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f1c9afc03380539b683cdc1f4e055ad3a733bbfe --- /dev/null +++ b/stackexchange/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca26c3d377e832f66a504774da90a0c897219fceb00cc66889a431698464b366 +size 25166176 diff --git a/stackexchange/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f78a901be785d3871702c85bf2b914c342e12e6c --- /dev/null +++ b/stackexchange/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e0f915150753560b75c2eb6c12dc621499b640042371fe08b79103d8309ec75 +size 4192 diff --git a/stackexchange/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3643e01ba0d4711cb92ec853ea387d8f9fcf0e5f --- /dev/null +++ b/stackexchange/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23571e62e1bcd93f727ac46442ea69a16834ad7676df02f196d915aa3f665faa +size 33554672 diff --git a/stackexchange/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f92e9c23e15335c46decac7975f95ccb246e612f --- /dev/null +++ b/stackexchange/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d27918d3e4e68d399792c68aa6a0b32be15bd523a9ab312e540589842c3324 +size 67109160 diff --git a/stackexchange/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88218bfa855f75ea5fa2979c326a769ec56ce797 --- /dev/null +++ b/stackexchange/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa7133fda445179c215186649c2d6d983c00a8a69e602a33325e4615bfb6862 +size 4192 diff --git a/stackexchange/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6dcd94761c97f2b1cf4484a5be4dabd666cf487 --- /dev/null +++ b/stackexchange/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74f510a6c343b780596311d27c41ff51dc0c9a9cea7e512c5e8877ee289efcb7 +size 8388848 diff --git a/stackexchange/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cd91a7a11b5a8edf4151fc767db627d77193df3 --- /dev/null +++ b/stackexchange/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c168cd6e55977e1731171332fccf804fc953a9a9b74b38ff1307a99c7fb546ba +size 25166176 diff --git a/stackexchange/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25133a5141521731ce436b119b4fb15b53227b7e --- /dev/null +++ b/stackexchange/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe439a5b331a50c63905b35e46422ea3515f4c3ce54b8abbb25b7b49b938108 +size 4192 diff --git a/stackexchange/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..343b01a8f3efe2e21bea87054ca5ec89dc430b4f --- /dev/null +++ b/stackexchange/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad6c80f327b9136219fd58ec19cccabeff8649d50430c1c4b5068099d14904a9 +size 33554672 diff --git a/stackexchange/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41c49cf1cb511ad321f6245f06178d022cc78d1e --- /dev/null +++ b/stackexchange/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:977cb4257a28ed0ebcd8ef80275c6269702117e4e0f59194c92ee762d09a2bd9 +size 67109160 diff --git a/stackexchange/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60ada0a378b7e642c10502c017786ae254230e82 --- /dev/null +++ b/stackexchange/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c5f7a95b1b68dd64bdcf755c29c80d71b0fb41a3d07a1fad8371a3a7fb5456e +size 4192 diff --git a/stackexchange/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f5e6a3145399fd3f0d5e1969e0497de1fac2cfd6 --- /dev/null +++ b/stackexchange/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e71ed7cbf564c8bbab7db79f87388b84956e519d5e48587313b66f01bfedb74 +size 8388848 diff --git a/stackexchange/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..370d53b4e234c283a743c62b0864150e84b9e698 --- /dev/null +++ b/stackexchange/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e90c95bbf3e4348883145a960b332527e575a50f48161fdbd6fe87e24f6c1f53 +size 25166176 diff --git a/stackexchange/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be9c0b4425ef09af0188901fd2c98a13427ee3d5 --- /dev/null +++ b/stackexchange/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08aae2e906874d3ee295860c2555ff1e00d96e9bbec383afca1a2e115f4bad56 +size 4192 diff --git a/stackexchange/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d417fd04a77b3ca4cb24b9e59bce441c8c3ee73c --- /dev/null +++ b/stackexchange/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c8f860bc8d4c3f6beda2993613c86275a696a54ef024d3ef7c460e3aaf90f72 +size 33554672 diff --git a/stackexchange/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1945b368292a79da84507dbb38f1051884e83c2c --- /dev/null +++ b/stackexchange/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e096fdea71ba5878f750c216ba09dc38dd2cf7ccdf9f3856469410f233dd8d2 +size 67109160 diff --git a/stackexchange/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b09a36bdccf6bbb58d0c73725be1277cfa0f2006 --- /dev/null +++ b/stackexchange/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeea7f09a4e61e0605a20c4fa54f019dcf1259e8df4eb7e0999ae8828f56c6cf +size 4192 diff --git a/stackexchange/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af05d9924999f43dca308ae8c509f964697d2d6c --- /dev/null +++ b/stackexchange/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fae18c81c86ef0164cfc256a78404ab967fd5362ceb949916f09e67009d32fe +size 8388848 diff --git a/stackexchange/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dad75d757d014236dd7e9e6e1905485fe73111c9 --- /dev/null +++ b/stackexchange/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:631d4c773fd3510220953842442721663a482fc7a72e45db3bc320befacb09bd +size 25166176 diff --git a/stackexchange/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e38724ebe1769d7d76cd01e89186c3b9e927d642 --- /dev/null +++ b/stackexchange/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f3b996cf685df122b80755f7c7c9fea7804f39f0f361d88473b2db5dcf081f +size 4192 diff --git a/stackexchange/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..036fbf768667f121feb09216767cfc3dcef02514 --- /dev/null +++ b/stackexchange/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d038c4fba9cc477b1c62098ff111e9ad3cfdae13f0ea8181b00b6953fa9ba4b +size 33554672 diff --git a/stackexchange/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c99dec7085009d5a327384a3336c5bdacd15d2e7 --- /dev/null +++ b/stackexchange/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b704a2743a9840edbbdd164ee4821fc680e36800f2688791e62d0fc2cbed115 +size 67109160 diff --git a/stackexchange/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2b80e317e5e26726dfb7eca48f700ecff7fa338 --- /dev/null +++ b/stackexchange/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db0bbe4155ceddd99accf93e17a772d53aeb901580766320210f87ae0185a84 +size 4192 diff --git a/stackexchange/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42a0038835e5e72d490c9685e005283b0cc691df --- /dev/null +++ b/stackexchange/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7629e2aaf9da0ae983cfa9bed0636990521a8c693a2fa0feea79f91022e84a02 +size 8388848 diff --git a/stackexchange/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d7ce198971eac2efbb8b51375ff04ed6651fd2ab --- /dev/null +++ b/stackexchange/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b5a2bc1df0516e510bc5bb6fe29bde368da6cfc9c0449b14662374c3cc8865 +size 25166176 diff --git a/stackexchange/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48fe307b7b43fb1b47e8484b3f95638d58b33bf6 --- /dev/null +++ b/stackexchange/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fab6603f403089335dddbac21151524b806e3d572f9ec143bd47d7fecdceed0a +size 4192 diff --git a/stackexchange/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b71d228982cd95494cfeb08f041f24219327bf2b --- /dev/null +++ b/stackexchange/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508dd365d539411d55201a0a903f55767e3f91d776c20ef396c8e6b44ea7f6ae +size 33554672 diff --git a/stackexchange/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0903e3fa9c0713fbb8d8bfaba13d437f887c249 --- /dev/null +++ b/stackexchange/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9068b5d501d95cbb2782d40c3413df65c8001a9287f99e2505f605e9b94c7f3 +size 67109160 diff --git a/stackexchange/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..698bfe18042ff3cf086904b6e25fa71c7c97135b --- /dev/null +++ b/stackexchange/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ef7952f157d17feed7d94e7c2b8699d95aef9d86b776287050e4adbb18881c7 +size 4192 diff --git a/stackexchange/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db954f839abdccd3e56fbd438f6d455f5bb8ec70 --- /dev/null +++ b/stackexchange/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d57eeaa3c54574110133ccacdc7ec179f3034b6cc15767c3f2d5679318a448fb +size 8388848 diff --git a/stackexchange/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..201480e7b756934cb25aaa9f91a9df00e84338cc --- /dev/null +++ b/stackexchange/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1357f44757c94e0316615557701d6de213fa3ddb4ab36c8b48653a4e888026 +size 25166176 diff --git a/stackexchange/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba7e428630d9ff32823ded121e5b25dc5996424a --- /dev/null +++ b/stackexchange/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b06cf5e6b45e80982233b840922bc39c2578f797cdf8f1f8519a5a2f17d35d7 +size 4192 diff --git a/stackexchange/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f86e9a1c67c30a737d75096970cdda2832083610 --- /dev/null +++ b/stackexchange/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4d2257e5da0bd40841a46828848980a56cbf0d973ae8bd84311f4ff04450a2c +size 33554672 diff --git a/stackexchange/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53ce2beccca8649f0db5f9f90cfbf61180f0c7ca --- /dev/null +++ b/stackexchange/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b93a9a2b03a4962e1dc2936f99254b031a404e5991eadf971daf7ac42808512d +size 67109160 diff --git a/stackexchange/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9ff3dc0dc7ec1c34c8e829ea8217af92eb65ff7 --- /dev/null +++ b/stackexchange/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc6f6db7069961537522ab457e2cb9ef75c3cab36a97c10146a11537b21445fd +size 4192 diff --git a/stackexchange/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0e3869801fee4635497f4ef626c02b844c3359e --- /dev/null +++ b/stackexchange/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc1785e81f0052020760b9925e91f8dde375f265e290c80b1b6852d0465da88 +size 8388848 diff --git a/stackexchange/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b9a2dff74fd7a18d04901430065477ef1612cc6 --- /dev/null +++ b/stackexchange/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e5aa1cb5367c1f02b1a67d35fc5c8451f46605818f1569adbb96f7023b08ce3 +size 25166176 diff --git a/stackexchange/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c69512bbb2e17207d07b891a50e4001448da4fd8 --- /dev/null +++ b/stackexchange/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66cf79d4b77a3ea31cea67b2f0f03dbe306cb64f1af161de2e002a480e42523e +size 4192 diff --git a/stackexchange/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a18b24a1fd016924c9e84cbe664bd52d78868061 --- /dev/null +++ b/stackexchange/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01c81d5758246bf5075c76cd8199aef3f5b0eb88d6d21a9082e97ff3b9ad5b57 +size 33554672 diff --git a/stackexchange/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d37638425763be5ca7157f06e0649418c10c0507 --- /dev/null +++ b/stackexchange/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaee458362398ec40b4a34ea48abe7de8ca19ea372ed0c0f90d7ffed425f6d8c +size 67109160 diff --git a/stackexchange/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34432829f586c7497acf10944cba609981775237 --- /dev/null +++ b/stackexchange/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d1686bc67c6c34878e8fc59cf2e9758bc881f0ee39f5c81a656a49b78b34f2 +size 4192 diff --git a/stackexchange/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d94bba9d21b40d70188b71c3d5f4f78ffc6208b --- /dev/null +++ b/stackexchange/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7daad0dff423721eedf5d83f1090c4ced00aa222ee6964c03441f4f0157d10ed +size 8388848 diff --git a/stackexchange/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2acd51a5f1f2b81187df9c6abb086daf2e6c4a38 --- /dev/null +++ b/stackexchange/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6805e84a469c650ddd2afcf5564438a5f6950159f675e78b1e4fd092e83fe3dc +size 25166176 diff --git a/stackexchange/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1544e8dc1d91c0b680a0c431d0c9a87b77b2c7e --- /dev/null +++ b/stackexchange/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6104c7ed9001d10c7a0b0ef823ce38742cf61a3d118165e23f88bbb6dedc575 +size 4192 diff --git a/stackexchange/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..213257ad6912e4205a98382c1c5fd2658ac5c6c6 --- /dev/null +++ b/stackexchange/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:368769668de1a55ec02f8a68bc2229e42cda70b7da9f55578b27570a64510c4b +size 33554672 diff --git a/stackexchange/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2d3db110ae3ff7f79e3b860943cefc492110c2a --- /dev/null +++ b/stackexchange/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:339a7b5c4c9e3c28b0f27e6fe6c4f6a84204d798d7b09f72a3e24d7de75e7439 +size 67109160 diff --git a/stackexchange/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ea7f40375047f79e8df778b13daaa8c6fe9db14 --- /dev/null +++ b/stackexchange/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bbffddb882484cb5be1ce4e186f710b6bdbc1cf57d839f34feff55a5958fa6a +size 4192 diff --git a/stackexchange/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b98fa62ad46e3f480db902eb317c3e664381faa --- /dev/null +++ b/stackexchange/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5935f3624babc56fc803257bffcd05c26aeaf761938ba2a04f9a143d2dba68a1 +size 8388848 diff --git a/stackexchange/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..186cf27af489da59fbb2fb8009c1978277556573 --- /dev/null +++ b/stackexchange/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4968c2f6910284c04e883af0e53a31a08863fd9626ac112de3665ccd1e72cf06 +size 25166176 diff --git a/stackexchange/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80f712fa7be4ac2fee63315af067dc0a5cad96c6 --- /dev/null +++ b/stackexchange/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50113c3edc59fd3cc5711798534f17105bf4e1ddbd173c9edac0dd42c5ddf3dd +size 4192 diff --git a/stackexchange/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..301dd5338d535df78eb9ad35e43eaefe56efc66c --- /dev/null +++ b/stackexchange/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66f2b84d34d9e3286470a06f2f615496a39f835f5dab3a8aa8cfecbb10eddea1 +size 33554672 diff --git a/stackexchange/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..714f484036be1b36c56a5dfd794c5d5a2b12c8c1 --- /dev/null +++ b/stackexchange/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f08dab6db838e1775adc1b82baf11414d1d2cb05347d2f3bd8b387cb6c4da195 +size 67109160 diff --git a/stackexchange/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3872da3a007fc6a9fd1c7e892ec62a1125c456a8 --- /dev/null +++ b/stackexchange/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6534f43d6cc9bd99254f7f768bfbb691e65054b644c7c07dd5fb3763c939824 +size 4192 diff --git a/stackexchange/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0cb1e75ade39c6ce0fe366cf85e52b651cada08 --- /dev/null +++ b/stackexchange/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:676fe1e3c18763033c98f4afa56c6e48c0c15efb844f10b54e1cd5b0b46cdd16 +size 8388848 diff --git a/stackexchange/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..206315cb7856baa3b4bf989ebbeb5036b5415ddd --- /dev/null +++ b/stackexchange/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b715fd12b1842a28ae7437a17bc165a32c701bd71178d40c0e535f12b8958c56 +size 25166176 diff --git a/stackexchange/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e9f7ea2dc285f3822397b5feacc4625ef7f6452 --- /dev/null +++ b/stackexchange/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a153144f4fae806a66738684c58fb49253f1d4295baecebd5d4c65250caefe6 +size 4192 diff --git a/stackexchange/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7570ec2af7178ac6d5bec917acc914466476a844 --- /dev/null +++ b/stackexchange/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e78e765a6537b5ba0e92a16372465178ab39ce59432479956281d59abf09514 +size 33554672 diff --git a/stackexchange/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db4a4e6b489232c2e2643a0e27f6637dae04e3f9 --- /dev/null +++ b/stackexchange/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be5e083b68d71bcd0c57c33e3facd88b2dc5834428a7495970bc62ea11cd637 +size 67109160 diff --git a/stackexchange/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65c8b1927f2f6d74da1f8d3355f6c485b935702f --- /dev/null +++ b/stackexchange/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ca954eb57bf2cc3bc3731d595bfb025fd44c80533d75271c95ab002e9a87d0c +size 4192 diff --git a/stackexchange/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86e4848ff4a51f8fa4520d5984d6e6fc866181ae --- /dev/null +++ b/stackexchange/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c87c3c02df53b6cb935739ceaad279eac5b0d8bf007cfb47cceafdca49d89df6 +size 8388848 diff --git a/stackexchange/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..395d132cd971038fdd63cbfcd9e0ff64904e1ed7 --- /dev/null +++ b/stackexchange/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c52a7281b6ec0561c17d35688941375fb168d13ee614694f418c04474b35533 +size 25166176 diff --git a/stackexchange/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38e7209c2c1c8895d5d09d388fc979175e9eee7e --- /dev/null +++ b/stackexchange/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fed8576d031c3c2174e6a397bfeff86226b08b1d9ebf72536d6c14733cd3a8c5 +size 4192 diff --git a/stackexchange/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..72d7c6b8266f24c4c1a9de2ec1173a2aa924cb53 --- /dev/null +++ b/stackexchange/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9248da2b4830c618c6024ae6b8314b6ad3afd9592bca36f7c9d7b66ef6d7586b +size 33554672 diff --git a/stackexchange/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd5f7f0caa727de42c6f2840ec879683923e8c69 --- /dev/null +++ b/stackexchange/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8650ec7f378adb54b19e4f5bbd20fd07a43f822f2aae2c31bc3f60943ef1ff09 +size 67109160 diff --git a/stackexchange/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1109530942670a97c6315f01f71ed5930284ff5 --- /dev/null +++ b/stackexchange/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65f45ce17514232544aac9da29f5bba5a697c383ee0d6880c510d5b0fec0dd08 +size 4192 diff --git a/stackexchange/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e872b5ae15ed7306c95db98be61fd84157c0ff5 --- /dev/null +++ b/stackexchange/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3de86f67107bb96ca73320b3918e9a1e037deb796af808bf2839070d7e35798 +size 8388848 diff --git a/stackexchange/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..343112f8737ae2c396677d1dee1cd432117faa05 --- /dev/null +++ b/stackexchange/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b72bc31a5101a7d99882d1b0f47d007b5344fb7f309a3d0ff0bac6fd8073cf8 +size 25166176 diff --git a/stackexchange/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abd2edf6e649359e6415b136a2eb67bd533193ba --- /dev/null +++ b/stackexchange/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2a32b7a5e7361eaceb086663f0b518ec70125d124b9222244650e0f0d237262 +size 4192 diff --git a/stackexchange/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8650a824fb60a0adf4767bfaca3762ff96f473f9 --- /dev/null +++ b/stackexchange/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9ccedce65bdb02ceb97f029680a822ff7901c11a960b5f5779dffa05f61f893 +size 33554672 diff --git a/stackexchange/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..854accf5b831bb8e078c05175346807208eeafd9 --- /dev/null +++ b/stackexchange/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ecbc56ef24a8c48188a8be126674d4d8eae9be7f7d48d050995338678785098 +size 67109160 diff --git a/stackexchange/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca72d6bf0954cc34ad80d5d40a5eff32f755e962 --- /dev/null +++ b/stackexchange/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b66679a95d50bfd2bdf5201a267cc32a1f35f03746ffcef218a6bd53e3bdf35 +size 4192 diff --git a/stackexchange/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..421d1e9a1545a45487f959f44e6446e5124e2f14 --- /dev/null +++ b/stackexchange/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:198179f81702d69c1c0fb76af9faec9ffafbfe1bc6b0b77d1d90a62f9688554c +size 8388848 diff --git a/stackexchange/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7779f7d29f4e32a907471bd0500daf6918cfd980 --- /dev/null +++ b/stackexchange/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c25ab40a1e78cc86882193b213095dbacef2162b44e212e566e14adcd589aaa +size 25166176 diff --git a/stackexchange/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98d299d0ffd26a931e2967933b5737efde042200 --- /dev/null +++ b/stackexchange/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1f7b7a2b3c30e20d9d9148f319e533f5ee29850db03e878127dec089d9a8068 +size 4192 diff --git a/stackexchange/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f544f34be41e237807f5b9ff74a8e6d5b70ff48 --- /dev/null +++ b/stackexchange/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af7b2fa37f6292bed272ee99bed94a7e46b17ba55c3e0357103dc5ad1b7e0290 +size 33554672 diff --git a/stackexchange/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e081d90a0c68b0c21a2972d56988ae32399c9e8d --- /dev/null +++ b/stackexchange/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4258ab3fa3136b1f95b07a6cc29e6fccf8f22ccdca8dfea1b4a46ebef3807677 +size 67109160 diff --git a/stackexchange/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7495736222caf3079f16c8f4ee28459a7e1a1abd --- /dev/null +++ b/stackexchange/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7c6fe4dc60f2c0c5cb3c3de8d5f713761764b36e88897daa120963348572c22 +size 4192 diff --git a/stackexchange/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7fbd17852ce7657c66ded8ac0c5df4f99e7dcafe --- /dev/null +++ b/stackexchange/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c43b5980eacf08f536345200e37528728a3e73fa80dfdc90cc9c55b0b97a550 +size 8388848 diff --git a/stackexchange/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5938775bd6e4066103cbca8449123de1609ee2bd --- /dev/null +++ b/stackexchange/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ac400f989797033bea37ae0c92933cb100d430c0d967d3d90966983e9c4b63a +size 25166176 diff --git a/stackexchange/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e628b4c107de08abd534f75e27b144851f3b79d2 --- /dev/null +++ b/stackexchange/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e05cb2b69fba3b1cf66acc00c05b7da2b58e79af6b32ee9675fdf8bcbd38666f +size 4192 diff --git a/stackexchange/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2356bf2339494cb940223ffa3b9b7d693b79e3f0 --- /dev/null +++ b/stackexchange/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ed3aaa0722870e7e0d51547e5b4614295a7ff50a440233da2c7f526b742f7c +size 33554672 diff --git a/stackexchange/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..476dbc844644187d25387d94fa4cf3131ae4a0f9 --- /dev/null +++ b/stackexchange/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44266f4d47c3b379082e685afc2f9352050f7fc64bc467da28863e6455050cc6 +size 67109160 diff --git a/stackexchange/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89103a23fe8164db971a95538145686f499ad26a --- /dev/null +++ b/stackexchange/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a061e40f09df34fda141972e55b4a03befbf428301a12d508e5e7ce977d223a +size 4192 diff --git a/stackexchange/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..479cc90cf26820ab3519493ce96cbb765cb90b7e --- /dev/null +++ b/stackexchange/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deea46ee4b933abcc1905c7a2f7d369ac840ca476eb394d623034b9f66a26b09 +size 8388848 diff --git a/stackexchange/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10fa0414352f9ce6dbe77078a674d288d7b02a87 --- /dev/null +++ b/stackexchange/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fce92263c77feedf836e72a5be85180d4560f0c0de4ae983ef7af5c538a6188 +size 25166176 diff --git a/stackexchange/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f066042a5932486a9bff88fdaec67e3ad63a0bb --- /dev/null +++ b/stackexchange/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7edfd5e7b8529765585305a38f9a2e3d0472facdc22e904d419341612897fc8e +size 4192 diff --git a/stackexchange/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6962513dc925042389e2e5013735ebb0a98f5662 --- /dev/null +++ b/stackexchange/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0978b68fd41ce14bb156a2a79fdbf05394cacca361354b8f887bcca43ee81b9 +size 33554672 diff --git a/stackexchange/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd6c132b197d1ec575d9c2728225b167a47d4fea --- /dev/null +++ b/stackexchange/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95edea73caa76fb400e64425328ea6e669da7bb3d5ae04989cadb31db8a9df59 +size 67109160 diff --git a/stackexchange/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c74d747bf3b5574d083f8eb35ed69c4fc2f862b --- /dev/null +++ b/stackexchange/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a6ad6aeae89e9907ca4dbe31c7b1d3bf72fa207b28a3ecd6e842548f70444b +size 4192 diff --git a/stackexchange/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9bf9372c23da6660270f0db11e2a353e2babe691 --- /dev/null +++ b/stackexchange/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a48cb34c22c75c574f6f1ce95bfd4a434d5e5d6d3353054e3683859aa4187796 +size 8388848 diff --git a/stackexchange/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92a21646421d6baed133502f02c15d61876b201d --- /dev/null +++ b/stackexchange/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53943ce1eeee1885bacdb383be818ddaf1a14a1defc949ff8435f888ed0b43d5 +size 25166176 diff --git a/stackexchange/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..893fe098c07fa7e83075673b3c741eb0258e9690 --- /dev/null +++ b/stackexchange/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c0a46e56c1a6cff5b8c9de137cfbd8edb1a06c43e84a4fce68d247c50518dd +size 4192 diff --git a/stackexchange/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b74a4d4a21a6bd53fcbeb9f494edf593fb8f955e --- /dev/null +++ b/stackexchange/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcdb73497e3945e8200eed9d0de3b8d973d92aab6ccd4dac14c61496ec4ca469 +size 33554672 diff --git a/stackexchange/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abe6e2eaf1111aea2634ce2ba404e86644489dcd --- /dev/null +++ b/stackexchange/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c10e61585db3072dc9e8264064ebe6bd037ad0650961be2a26e3f9bf6b3e732c +size 67109160 diff --git a/stackexchange/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5f77ef1d981d9233d85fb8b1d3d3e7467e91786 --- /dev/null +++ b/stackexchange/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a788856c07467a2433627419fea360577c60c02f05bae839bc4987145946370f +size 4192 diff --git a/stackexchange/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8735637480d6e52429c4944d9ae534dc0f7f8838 --- /dev/null +++ b/stackexchange/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec01027411accfbc632237b5e4411df4c51038321010c9ab805f58f4f5bae22a +size 8388848 diff --git a/stackexchange/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0d0336d66f9568bb843d39434fd30689627fc69 --- /dev/null +++ b/stackexchange/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bee7354c71dc53e143b0ece4d64f228de3e0eae914c9da196a7f50916d8356fe +size 25166176 diff --git a/stackexchange/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eaeacd2cade9b47fe44cd4bae61a86b51a0d555d --- /dev/null +++ b/stackexchange/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1152d8588024fef62b6140f7558fa776a8ed871a72ba9a183701707001f0fd0 +size 4192 diff --git a/stackexchange/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d674b8936999a47a866afe4217923102409379a3 --- /dev/null +++ b/stackexchange/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:510fac02826425cf7ec004a279e009f215498d33c02af081eee135bf7dedcc83 +size 33554672 diff --git a/stackexchange/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50deb458c2465604da4dfa88c45f702f0afd1a3f --- /dev/null +++ b/stackexchange/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fac91baffebd7cb27357ac358c8a44e126012eb9ef58da813cf99c47ffa0d23 +size 67109160 diff --git a/stackexchange/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c12b61d43db1415bcba3c6bed90e6a25807eaf3 --- /dev/null +++ b/stackexchange/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:526feef185ee273ebf7d4c295a676cdd3666f600a642a7612098441707e14b08 +size 4192 diff --git a/stackexchange/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f4db07d9c3e5055ba13bf7a1c7dc4a9087c78da --- /dev/null +++ b/stackexchange/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a6ffedf4213bd2d15718c6257339d8fc6cf678a2afe860db96c7faf7f4d0a4 +size 8388848 diff --git a/stackexchange/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed91cce2be447fec10bffe77494175da970253a1 --- /dev/null +++ b/stackexchange/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cf102e3ae79bc93b922415d012f4921274c7a6ac451b914124b489dc17dad6d +size 25166176 diff --git a/stackexchange/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..769ff81c8b2fd2eb03be09827e893b6e5ade04bf --- /dev/null +++ b/stackexchange/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5db5c99919b7c1d1807fce64ce6ced19684490a97d64634f425bb992740a05a6 +size 4192 diff --git a/stackexchange/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35ca33550d392f53d2dfc14b257b798c3e0c0a46 --- /dev/null +++ b/stackexchange/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a8b4aa3b789ba03380d97a97a6ba905d659fc56e4e43f2af6d15acd15071ba +size 33554672 diff --git a/stackexchange/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6c9eea5ad530ff981a995a49e69ecd1f4ed28f1 --- /dev/null +++ b/stackexchange/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751c3cc9e170ac6cfe1e752859f36da1c6e0bc4bb217207e603d715dba18a559 +size 67109160 diff --git a/stackexchange/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a987212d77dddf7162b18cf118199a0101a83062 --- /dev/null +++ b/stackexchange/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d26db534ef07642009200b7281b449b01e6619fd73af2045fcb8bc9bd53bb74c +size 4192 diff --git a/stackexchange/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d79ed2d2304d7e62536cce306cda08f7b5269511 --- /dev/null +++ b/stackexchange/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:954dceb82d31df41776247af324df5f99beb921e62739abfbcac47e060eee660 +size 8388848 diff --git a/stackexchange/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bbce5223c68ff01e84b74f0cd9b8467734c891c --- /dev/null +++ b/stackexchange/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8efd5b3274e90d8763b98b9410478ad4cdc3af30c3bbbb0ea2720fd340eef7d +size 25166176 diff --git a/stackexchange/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b03ca3fbe033545925c9bbb9a8d605fc8218f2eb --- /dev/null +++ b/stackexchange/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8383325e095f4c04331d9169cdeca24bf61b0740c5f35b401402c077217fc6a9 +size 4192 diff --git a/stackexchange/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e385410900de9f6f4d406c8d09eba6b60cc95acb --- /dev/null +++ b/stackexchange/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e2d0531ad060548f57cc5ed33a0f69aefe13639240eebda7633632362c5510 +size 33554672 diff --git a/stackexchange/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc816b058f252e3902ddcf83254d9e9d33e44f45 --- /dev/null +++ b/stackexchange/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6265366fefe2bc7d8f0174910427c15f95354ed454e922ef6949c7ee0513fa03 +size 67109160 diff --git a/stackexchange/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45ab2f05829406f0d2ea2c2546cbe25f20b67c12 --- /dev/null +++ b/stackexchange/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94badf59cef18a192e3b138ed4cc99c6697457388262f1addc7ff65b5cd8a103 +size 4192 diff --git a/stackexchange/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b3a5d9e6aa1440ff972b388cddf8436f9b68fc5 --- /dev/null +++ b/stackexchange/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6a896a5815db4daafcc99866953d973372a5374643a9856cf61ce8c3cf6e9c2 +size 8388848 diff --git a/stackexchange/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36030011bafda4cb95be459077f81b7ba564a1d4 --- /dev/null +++ b/stackexchange/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88dae29f982297abbc14d3ceb173583b16f7dc3841e7535056638fcfc851a614 +size 25166176 diff --git a/stackexchange/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/stackexchange/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5af5d53fdd17b9473388fc6dd49721a677c70744 --- /dev/null +++ b/stackexchange/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d58604d97b42e5a6c27e26595d46ea85652971b012850074dc3f1a444a81accc +size 4192 diff --git a/stackexchange/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f556b679e97abf4e66d58a09e4dd9f0194685a3 --- /dev/null +++ b/stackexchange/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04a28c59362a23611f6328bd02d3aa2ce69a72390157afb1e9e6b94aed3954d9 +size 33554672 diff --git a/stackexchange/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5129fb82de1b5cfdb120eb9532e3be537239fe03 --- /dev/null +++ b/stackexchange/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19cf3c2993a0c570ee11e7fbe31095dfec326611150c10037e782668026cb77b +size 67109160 diff --git a/stackexchange/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/stackexchange/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51198c82923f9b536088c95777dc7aa45ad5fa9c --- /dev/null +++ b/stackexchange/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0b27827880a82cdba477749e3d1f9dede3fd91129571ac10e576fb5feea084c +size 4192 diff --git a/stackexchange/model/final_layer_norm/pp_block/model_weight.safetensors b/stackexchange/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f25f43cc7ec4bb3a53be9be23db93e2a2003b83a --- /dev/null +++ b/stackexchange/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c06d36c0f77db9babc80ddbcec4841a66dbc0142fddeb5026f8dfbc3c97e6ec +size 4192 diff --git a/stackexchange/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackexchange/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b663827cccef0031ee4b8f54834c4863a8e8974 --- /dev/null +++ b/stackexchange/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8a574f77cd88c136e6769fb1c49072dff7de47ecd3cbc87db9ed4b7a0f12d96 +size 205914352 diff --git a/stackexchange/model_config.json b/stackexchange/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/stackexchange/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/stackv2/checkpoint_metadata.json b/stackv2/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/stackv2/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/stackv2/config.yaml b/stackv2/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f30a989b712e2cfba2cc0cc2abd6a33f810032f5 --- /dev/null +++ b/stackv2/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredstackv2-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredstackv2-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredstackv2-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredstackv2-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredstackv2-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredstackv2-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/stackv2/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad59f50163e19c5f7255126e7584b440c595b0ed --- /dev/null +++ b/stackv2/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa3c603d1f40d428c16d165fd87f615a3f94a4b9b445dbb9ed5ed502a963908a +size 8388848 diff --git a/stackv2/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8387a30a9b7407fa974d71ec4f476e5ddbebef4f --- /dev/null +++ b/stackv2/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d77d3bbdcb5793a66d795db484b52b2ff88c116f1480f64d451108c701abcc +size 25166176 diff --git a/stackv2/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4f3752a4a42b0cce17dfb1ca5f8250db75968b3 --- /dev/null +++ b/stackv2/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41071209c86d6a4a2b8bdc7860c51e702cff1b1a56af2a08f1454fcfda64ebeb +size 4192 diff --git a/stackv2/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a458fbd76714a15f32194c6c52a52aab11357a2e --- /dev/null +++ b/stackv2/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd41a76db9446c52c631efc629ea44dd78f888428e85f28a36097c1241b37cf +size 33554672 diff --git a/stackv2/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14ebbe85136bc3ccc918148762c8c6b5c602af7b --- /dev/null +++ b/stackv2/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fdba78b60ef790db23be031856827bf4539e73219151b0450184ccbd8e18193 +size 67109160 diff --git a/stackv2/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7907b33ad9d862ac0bbe3478c2594776fe554b05 --- /dev/null +++ b/stackv2/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc2da912fba2cc954c7da755e3911ce467810262f4a20e331b61cc4fbceb4b8d +size 4192 diff --git a/stackv2/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f5099a35ee30705cbd148aeea993158da59a92a0 --- /dev/null +++ b/stackv2/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d59faa7f9fce23b89e2c36a4d9902258d601dbbfb45bb20d5e0b4b781dfaa50 +size 8388848 diff --git a/stackv2/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f347606ad7bc9e2319c966384c5bfb3b515b280 --- /dev/null +++ b/stackv2/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1edca0597f33bc2f92065e5a367af3e9b48370bb3f89de38407d4c3642773b37 +size 25166176 diff --git a/stackv2/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d468e44486c83bfa0e9c77225a5e91a1c26fde4 --- /dev/null +++ b/stackv2/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80e02a329dc0612ce1d2cdd4cfc97138b242c48f1182cd33da1a104121e57a73 +size 4192 diff --git a/stackv2/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cf349933706a3752c0d3c5bf461f733159f9cd0 --- /dev/null +++ b/stackv2/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fea711d061b1699487d5b9f178f05094cb89cf7315f870a28310d3947ab926c +size 33554672 diff --git a/stackv2/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2a1ba1203b7c6b13841de91bd515d8da71c0227 --- /dev/null +++ b/stackv2/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7fe848b9e3518c4a30befa8c78394fc2c91a464fc176c7e3abe07a0ebf96a76 +size 67109160 diff --git a/stackv2/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..449f17131ecbdd41a38cb891f274da174064a672 --- /dev/null +++ b/stackv2/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:258a0a121a2667d5f4c4c205244b477201d31b0d95f9a88503dbdf97e2f6c7c1 +size 4192 diff --git a/stackv2/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7427c3a1aa81858d87c29c71e402cf4ca8a20b82 --- /dev/null +++ b/stackv2/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:354a0d50c5e4cd4e8cb14017bb90ad3ad0e8bfd466b6ba6b73aa73f3c1b1f34e +size 8388848 diff --git a/stackv2/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..639f185ce8bb2c3a9983514d60ceb434d964ea26 --- /dev/null +++ b/stackv2/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad8bccac649c48af257d42d9e4a8ff72399e02817c7040836a69e3c651772a3 +size 25166176 diff --git a/stackv2/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce7e62df68e33c97f7e63831e3cbf534cfe00e5c --- /dev/null +++ b/stackv2/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47d0aef670681c3965bc3652267972a2baee960f7a508d778ef1a031f98a88ad +size 4192 diff --git a/stackv2/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c93fd9f7a931593f4c0c5acf43de58ea17fcb3c2 --- /dev/null +++ b/stackv2/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fab1cc44d7158c5de1b9e63175b0504dd27bed7acb4a6df3cb6a435a7317e0b3 +size 33554672 diff --git a/stackv2/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38dae9fde7879ea04b70ea658b42460ded851d02 --- /dev/null +++ b/stackv2/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e68431487726a52e699a95519fade33d255c4db9ca6dfad9f314e7ec65efd1 +size 67109160 diff --git a/stackv2/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cbf0c26e1feb944e0c76a3d8d6ab392b867a77d3 --- /dev/null +++ b/stackv2/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8199cf98d782322ef3a49e14b6d23f3c03ed027e1234f6bba0f9c3d335217e43 +size 4192 diff --git a/stackv2/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e9171c358a6e03f95412604c0d7547d26b41219 --- /dev/null +++ b/stackv2/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa3b72a22d88b48af170e76076ffd69e18d8a4bfe10a5f8d879537af24bf3864 +size 8388848 diff --git a/stackv2/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..deb3e3f93b9b635328ce9bc28a16b332f68a485d --- /dev/null +++ b/stackv2/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a07e6e9a2805e21e128b3a9d263bb607069555024215c012cb0a6e82f9d53df2 +size 25166176 diff --git a/stackv2/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c4972ee9b2516990efb8c1b2d42ae59bec1bf63 --- /dev/null +++ b/stackv2/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ab59acf78780ab66150ddd22207418e2511f212bc32eec5b709e63c9bce7773 +size 4192 diff --git a/stackv2/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95149215db2906b7b2d306eabca85d89a2f87ab3 --- /dev/null +++ b/stackv2/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6093ddb583477e041fd2497fd161b365bfafa8b73e0910621c2a64ec1ec4a7f2 +size 33554672 diff --git a/stackv2/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e854f3c420c5c31ec5a7bd027a774a419314621 --- /dev/null +++ b/stackv2/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c9029e2969e5b02c1df4be4fc8a0c118d0b9db5babff31e686c22a0d3f59369 +size 67109160 diff --git a/stackv2/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bf0eeabc94927dbe6f2e0dd2fcc6d99161e39ca --- /dev/null +++ b/stackv2/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afe5332dfa901022504ab0d12c1ce5a46966591c3e5527bfbad5794b6a5d97cd +size 4192 diff --git a/stackv2/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf907c0836d9125def514085a57556de2925325a --- /dev/null +++ b/stackv2/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84a9cdd72183d7704aa68435e0bfb1b2373283661b4d284203f597956f35b7c9 +size 8388848 diff --git a/stackv2/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe3459ef5d70f1d1bf565f1d122b0f1d3f01ccd4 --- /dev/null +++ b/stackv2/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b3bfbae9d90322f10f77cf73acff508bd5b55f7ea1eb8838b04bc1108551f27 +size 25166176 diff --git a/stackv2/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bfc0a1af69b557f5d54af8bc0bd1b4e286cc03b5 --- /dev/null +++ b/stackv2/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f95ee840171a3b84387390b21555381220aebe33a0efdecc19a0d869679dae8 +size 4192 diff --git a/stackv2/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bfcc2aea64c02d65c199279abc52da8f14855b05 --- /dev/null +++ b/stackv2/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8962d90c913397b2cb50f6e0538465800b7fdb2b49946491e65b3a02b7751e97 +size 33554672 diff --git a/stackv2/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..122893e5539273439a6b3b5732a7c50c15258efc --- /dev/null +++ b/stackv2/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbba71e373cbcc8fa088be2769503fc778663e29c69397fef6f07588724dfa54 +size 67109160 diff --git a/stackv2/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21f186530812c7191dc832178ca0cbc899e0ace6 --- /dev/null +++ b/stackv2/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a5076897a176b84d93363964b4873bdf2785d4b987fd449bdee8bb37361e94b +size 4192 diff --git a/stackv2/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c132cccdf6aefe748cda975b640efc340eb238c --- /dev/null +++ b/stackv2/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6039e1ea964a95ec62b8f4676140e39f440d8003a687253a81f5bc7d8142a8a +size 8388848 diff --git a/stackv2/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a1ed846530950d8a102403b5011abfeb0d11aba --- /dev/null +++ b/stackv2/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150bfaa1088f0c8ffa46e53502356ff1372ffa2b6f35cec62ae6aae06467d7d6 +size 25166176 diff --git a/stackv2/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9771b35d9db8778851d42f8dd7165b33fce843c8 --- /dev/null +++ b/stackv2/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af855817b64b172a593a30880e490d365fe2c8b350f49435c0bdd5858fe903f2 +size 4192 diff --git a/stackv2/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea8a33c80fefa686a5f6c78f08b534f2db7a88c6 --- /dev/null +++ b/stackv2/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20264c8d815949be6ae9c34ed419db8512ee8380dd4eb668c55df979163c7f2 +size 33554672 diff --git a/stackv2/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd96619283ba46bcdfcf0a93fb07421e3c55c9a2 --- /dev/null +++ b/stackv2/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b5c0ba470fc1cb29a6674d6a2ae7cfc354902b277c213f971f556e3f430fd93 +size 67109160 diff --git a/stackv2/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43ac41e6e4ae3a4be562cccf9ecd25ca41dc5a66 --- /dev/null +++ b/stackv2/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7d0d6d07bbc1e11160b11b94c5e24d8c166d994bbe26b2f357be3b34e0e22a1 +size 4192 diff --git a/stackv2/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70dfda48f4a34e7f87a392f1d0273a065d666edc --- /dev/null +++ b/stackv2/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2370195fa0ed6d25be6427b7ef5df8654d557eba6bfa233defdd9b49185e261a +size 8388848 diff --git a/stackv2/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c90c4b60d6ff87c192a9b20b543fd1b47b5fd88 --- /dev/null +++ b/stackv2/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62974e3b6a056c98546cba63c7cdf0098cde325e8cc81f1ebd20896ec83789c2 +size 25166176 diff --git a/stackv2/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03d3aa7d672936563084cc395fcd44096043d9e4 --- /dev/null +++ b/stackv2/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c75330030ee15b1fc14d94fb670238b1975fc5227a1eed1ce8dd21fe40f61b94 +size 4192 diff --git a/stackv2/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b00806b7b4df2fa7576008c32a31c9fa09f17a2e --- /dev/null +++ b/stackv2/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fe00e3d5801bb33e0e011474f2988f2c924d996278f6f7de7ebc2f21a2ba504 +size 33554672 diff --git a/stackv2/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6798e61254429c6abffff176a2aa34745cf5cd5 --- /dev/null +++ b/stackv2/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:491f5b6265f3ccfe0cb0242698381732302b2142a1439caa0eea3ac1e77914c0 +size 67109160 diff --git a/stackv2/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c1d17397c5f44073acc8ad03127b780014a67fc --- /dev/null +++ b/stackv2/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d423ce63003ad3d842c5d824a0b33b65e95504c36c7c69df8eb8b358cdb0ff7 +size 4192 diff --git a/stackv2/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aac1dd8284e7f2a7d1d91f8ad1f622e6e3e0657a --- /dev/null +++ b/stackv2/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:705b10527666bd550d3f0f1311c857e5c1a2960095f618642767263c76104b6b +size 8388848 diff --git a/stackv2/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b06eec8f8bd84fa4590da30cd50f47e29fa983d2 --- /dev/null +++ b/stackv2/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec4be361f8d74bd5de653e4193f96b6bdfde02af0052d9a9cab9300fdf1b6f1c +size 25166176 diff --git a/stackv2/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0fd50124d8b95461569064ffe56d2a26ba843a31 --- /dev/null +++ b/stackv2/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05fe557a717051de25a018a8c6e07aa09639fea19fbd82b8816c08e9eceb4d40 +size 4192 diff --git a/stackv2/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc44b6c6662c801f64b19dd5670f6720fbc98649 --- /dev/null +++ b/stackv2/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f47f4d406331450e4b0dab9bbe5fab25ac4ebaa04c2d3baf73e141f5b823876c +size 33554672 diff --git a/stackv2/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5d6cd134b62f1c5d21cc17511714d190f59c9ca --- /dev/null +++ b/stackv2/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fac6a2249cd17d2bd4128a493769b3c9d2cf6dd8d5bb43bee89cf86a849729e +size 67109160 diff --git a/stackv2/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4025654d9bed2e89a2cb61febf554a66a0735a1 --- /dev/null +++ b/stackv2/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4f4871d8920d3e90b70296d777d79c2eea5b969e61a1b75a27c826fc289f9d1 +size 4192 diff --git a/stackv2/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20aaa0bdb4f6ca0bad905346c12773a99bc2d280 --- /dev/null +++ b/stackv2/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86058b3567d933a61e7fe6ddcec442f46bda1024712d4db11b53626a8a176d1e +size 8388848 diff --git a/stackv2/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..575736005b52e4e14dcdd520341fd4be169830ff --- /dev/null +++ b/stackv2/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9339813018bce73930545c3d587d45cc3f6351935f66a2393c5ae6cd8b2000ba +size 25166176 diff --git a/stackv2/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e149a44fb978bdcdd5a616c93040fa0f6081d4a4 --- /dev/null +++ b/stackv2/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b05f6daa0ae2a80bd81d9865943ac546b0e2a4631b677b04c06da364596e35 +size 4192 diff --git a/stackv2/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e735112c87adea9ce5bc0ac3d251cfca07b6db4 --- /dev/null +++ b/stackv2/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:383b3969353e99ded972164324ee08ace19f8627099818a93c3c9c783037bee3 +size 33554672 diff --git a/stackv2/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..acc1eebdd15257221c2f45130bbd9349b03400a8 --- /dev/null +++ b/stackv2/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ff510a51bcdcd158937f4f673c3022b2fd1d6474326f697e23e748d611b234d +size 67109160 diff --git a/stackv2/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7807ba728a5399f270e3561c390e3d4238663ae8 --- /dev/null +++ b/stackv2/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:846330416e33f3db55169ca9743c507d4299fe935c1a299722b2c86e36aa7b46 +size 4192 diff --git a/stackv2/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0561e37948561e616494b55689e0f43d17378eda --- /dev/null +++ b/stackv2/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc720e9a5cf5354343c544f2ae76d2fd0c297a70d8e4dd4e093b55b6385f55c6 +size 8388848 diff --git a/stackv2/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd59623f8ed372a2cb0ff3813455bd2d0af50485 --- /dev/null +++ b/stackv2/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0b9a044e45bab807f2d50cacf9ac5b73bea0fa4cb0119ad3d2feea844949823 +size 25166176 diff --git a/stackv2/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b63f046259b2897bb08f9e490e6fa50d482f8eff --- /dev/null +++ b/stackv2/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8903ff943bf72ad659ba17498ddf03f0ea0cedfdf33ecf88a19b9b198c26178 +size 4192 diff --git a/stackv2/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bee8eb1bf6c1957171c023a457ad205f123d4cdc --- /dev/null +++ b/stackv2/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74d975dfa27e90950c5a62e5c5756f7fbe8cf5a0c8b82b48f720e5d53c7a1e6e +size 33554672 diff --git a/stackv2/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9526c946b7d5b773f4769b4c80eeed008cbf490 --- /dev/null +++ b/stackv2/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66337df87176479ace1806607f5b2fb08f210cbbb983239ac06b2150b42fef7a +size 67109160 diff --git a/stackv2/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8eb4e2efc224d77438003975106a48cbfa1fabf --- /dev/null +++ b/stackv2/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9f52aec613c6587069821deb1e6c708fb3759045c53ec714786ee485cd44aa +size 4192 diff --git a/stackv2/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..87ade62e9716f88df7ddb9e25e72213c81158821 --- /dev/null +++ b/stackv2/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e36ed8a2bd229fa70a66d9eb5bda5ba8909f3db4008c695a54d080c187b6c6 +size 8388848 diff --git a/stackv2/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f5ab76fb6ba8e92bd086783f166ffab95464e6a --- /dev/null +++ b/stackv2/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd62ff784443b8ba341797f9612dad3674dbb310de9375407f467b5647da98a +size 25166176 diff --git a/stackv2/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3c701bd70100bef7c27a45dd395772de753a9a8 --- /dev/null +++ b/stackv2/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0512df5cc281d6b05446e345d272a7247a385bb6fe78de6b8e7a669975141b59 +size 4192 diff --git a/stackv2/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1aabc03e2ddfa056cce9c5789155b94853b98414 --- /dev/null +++ b/stackv2/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea3fff31d229ab9ed6795e6ac480783026f8af48bdf0aaac885739c96ec6f81c +size 33554672 diff --git a/stackv2/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fbdd487a646f78901068779fb185c0c28b50c0ec --- /dev/null +++ b/stackv2/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13f2afafa2ae5807babcadb4c4cabffc835cae642f39c6275bec34e720b44d2e +size 67109160 diff --git a/stackv2/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..792c11a0a990163f77f8fd8dd57eb5c1d6ccf5f8 --- /dev/null +++ b/stackv2/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edaa016d69119203523fed16f2b872fde85f0470c209901d9428446ad1f02578 +size 4192 diff --git a/stackv2/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38d7a09cf6724fbb1190258d3a84f5d4353bdad6 --- /dev/null +++ b/stackv2/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da9c34c83af9eff5d5907b409c0582536ff77ce651adc5f7a71bc0758af8fcbb +size 8388848 diff --git a/stackv2/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ee1b5aa3806f2e44679ea00cf7d53485860322a --- /dev/null +++ b/stackv2/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0701d9eac9cbf50c1f0bc6504736de708f69a6077425e68f5c88d24d3cdac0f +size 25166176 diff --git a/stackv2/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1851a19b08c4beb7508d879eeda54504e0d9d414 --- /dev/null +++ b/stackv2/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:231efbfacb661e5c45a316dfeedb625f806e7033dd07fa4a41f02cb3f0238122 +size 4192 diff --git a/stackv2/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ee0cecbc70114d6e978e8ae46af31ccff5839b4 --- /dev/null +++ b/stackv2/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2730ef597125802979d44e65d250d05bacb23272ed7e59130742a68bb61862c4 +size 33554672 diff --git a/stackv2/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..476b05ae2def2d7d523c2d8e670191127a33dd8e --- /dev/null +++ b/stackv2/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86336363c0c71cbfd0d84b086617e35ffb047d345272d03d97bbab10a96c0f60 +size 67109160 diff --git a/stackv2/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6a258dfcb48e0464ca3b14fa006f2957fe41195 --- /dev/null +++ b/stackv2/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfec478cb0f003ce5b69023e362b58751b92dfcd57f36d05cd2f7bfb361cd28 +size 4192 diff --git a/stackv2/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f094242f5ac3dab80159136b945debd339ecdc13 --- /dev/null +++ b/stackv2/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31bd42eac8eba4c92e84a37a91b1aa2314daa55d9d08e8b56a8fdcac2f2397e1 +size 8388848 diff --git a/stackv2/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d84e5d8a71c1d5ed18142947ca3c868189b09046 --- /dev/null +++ b/stackv2/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cee303a6ba52ee2ddc4d669e44ecf4e02053655b2f1668bd54495090fc451444 +size 25166176 diff --git a/stackv2/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad8b4c6c5009fc0b1585ac186a0a650793fdb5e6 --- /dev/null +++ b/stackv2/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1960732031ab1ccb9d7a1c39030a09369eb89a1183fd8168f5337e60ef85c11e +size 4192 diff --git a/stackv2/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5db76ed77c096ed9d7f0a983ce12988dc7ed51f7 --- /dev/null +++ b/stackv2/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4f018da06772194b0479bd52413eb8b358fd00737789de099d81a417be9610 +size 33554672 diff --git a/stackv2/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5750e2362853086504118133ba88f665de229846 --- /dev/null +++ b/stackv2/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b705d3b450aed8f915a4b4e50b2faa430b9108e0c73a28becdf781408d581a9a +size 67109160 diff --git a/stackv2/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00aca181345ff87e6d7668674aeb5238bf27cb24 --- /dev/null +++ b/stackv2/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22f2845fa61333e785c688d490fd9bc324ddff76286740366c491fe2299de4c1 +size 4192 diff --git a/stackv2/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21750160add107d15a524be2ca137ea44e3e8e6c --- /dev/null +++ b/stackv2/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78c089f0cdfad270ac4e026474b57c4306ffe6e937fc811f9d9f8bd3b77bde08 +size 8388848 diff --git a/stackv2/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9689f89cffc7b93f351de9f0c8be64dd1ad9fd0 --- /dev/null +++ b/stackv2/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c00b40022a91e0d70bdd78b4568ba079208854e8da0073d4d74f3aef43664e67 +size 25166176 diff --git a/stackv2/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..93d8f7f2697e30d10034b2545131e1579b70cdd6 --- /dev/null +++ b/stackv2/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85a9ad6a1c9499962c6119480fa9367f2a86cc6355b505f10a1993bb5c55c9c3 +size 4192 diff --git a/stackv2/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23f2c5f1fb87bda098cfa4e2f774c7216c72aa8c --- /dev/null +++ b/stackv2/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ed4bf75deece763c4a360d24eda328a15703814933667a9af8d1b4ff27a599 +size 33554672 diff --git a/stackv2/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4a0e99acbfc04fc2a6af67627dfdcec4cade243 --- /dev/null +++ b/stackv2/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8260e0e3329f23715701a5616ed2cb013259045b5137e01196487d4f33b0111e +size 67109160 diff --git a/stackv2/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b52a4975513569f0125ba245bb15903fff1a7c7 --- /dev/null +++ b/stackv2/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dab2c9ba580cb263247b6afa9c7f183460d42f68f64a62f8408acd4398485cc8 +size 4192 diff --git a/stackv2/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9627e141b363747b58c8521e7c7d5cf2fede753d --- /dev/null +++ b/stackv2/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28cdfc9e76a0ddfca522b6f0c2e6630f118c0400e47f754d7d020805571f84a6 +size 8388848 diff --git a/stackv2/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88f7f56c123ae1f2da43fb435aced73a34a45282 --- /dev/null +++ b/stackv2/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b2dd7c1e9d453a5f893a63a341de8303a92e9e32fa1b213f9b973cecb7e1a61 +size 25166176 diff --git a/stackv2/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74fd1bf5cc6257e7c1f46d4af75e88d33e591ce5 --- /dev/null +++ b/stackv2/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2dad5f59236b4208699f01fec24465b8432c6b890db3d533566023b1f591937 +size 4192 diff --git a/stackv2/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b03f9d2e1edc485393055f65136621c3096f19bd --- /dev/null +++ b/stackv2/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d05987040800f045a1863b4d7db07d7b09bd60acf3b0c952185171a802e666a +size 33554672 diff --git a/stackv2/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44e3b15829309e0313da2208f5c5594e2879e3a3 --- /dev/null +++ b/stackv2/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0f2e21916e8132f9a699f5fbbe88e3869cedea7031e143fb7ad660901fa4080 +size 67109160 diff --git a/stackv2/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38071a6e97dc73215080f4e180e35b69e8d10aaf --- /dev/null +++ b/stackv2/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76d0668cea18d0e08229667bd9b0152f353133b43dda18608bfe5107de89b039 +size 4192 diff --git a/stackv2/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4260d84799f1c6a70daa5fee978b356d666f33d5 --- /dev/null +++ b/stackv2/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a7525fff8dd8f9ed12cd16779bb5729fafd75f7434ff0cb01fb6c65294f8e1c +size 8388848 diff --git a/stackv2/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf2dd9708bb66107c6e56c433907528336a23b5d --- /dev/null +++ b/stackv2/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b914f86df1d830da2efd15d02fbbd2747ae67e39821f345308c88d9d33b5dc37 +size 25166176 diff --git a/stackv2/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c92c37c0dc1bd8d009d530b13cb63dff31ece473 --- /dev/null +++ b/stackv2/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a3b160d3c526e95efb4c724ab8602603ac7d0fe0bba878d7bb66e47d1a217ea +size 4192 diff --git a/stackv2/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe5f842abdbf9f3997abf05fc0c6e8cc4589fa35 --- /dev/null +++ b/stackv2/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e8ebcaf6102522d7b2fc99700f6b813ee9f8cccc4f97cd0237a401363de186 +size 33554672 diff --git a/stackv2/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4f5cbb2f7ec3daac23a337fefe66167d61fe797 --- /dev/null +++ b/stackv2/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:933fd301d28db3e00985398e4a8b56ee193ddaa09071ea65f9ffe7863e0ea70e +size 67109160 diff --git a/stackv2/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2a40adc1a5bd010eecd087e68659c51e4348672 --- /dev/null +++ b/stackv2/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b212c4d600cea990301fe48f16d110b8302da72cccb9fcc0e195037e30e3ab1a +size 4192 diff --git a/stackv2/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de1312ba70225728c64deceaa6556a058eac84a5 --- /dev/null +++ b/stackv2/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6523c4a6cd25d53a5ddf65dc017a6804f0faaa006f899ad5ef824ab7ce4021ad +size 8388848 diff --git a/stackv2/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d0b76d9fccdecd699bbde2f1daea4040fb0a966 --- /dev/null +++ b/stackv2/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31c297bb5f5ee4829a8b60af443fec378f8962e519ebeea0ca53eee7abf2d74d +size 25166176 diff --git a/stackv2/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..892608482b47931050b0df63d967ba0a7f854053 --- /dev/null +++ b/stackv2/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abc7e5c6f3cb434adfdfc7feae0f053f4b647e99926be671d3b70156598aa78 +size 4192 diff --git a/stackv2/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48cfc63f40094081e2476ceec94e60676ef1433b --- /dev/null +++ b/stackv2/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf9180116048907ccb0c8615aa1012dca31174b25d0dde28f4905f3de1ad4a7 +size 33554672 diff --git a/stackv2/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5821c23290f0963173d995bbd547372a548a1664 --- /dev/null +++ b/stackv2/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fa4130ce827d1e5ac5db18d720b483873cc1b1d663b187993a2c1e52a16c9e4 +size 67109160 diff --git a/stackv2/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f713453c1c0955c5fc44590e9df976118dbc23f --- /dev/null +++ b/stackv2/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc81568051d48ad7f72c55598c6c6803304ffcd0d3d846fe77dcdee805309fee +size 4192 diff --git a/stackv2/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4904111530aeccdf318eaf796f8c3d733491ff1 --- /dev/null +++ b/stackv2/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fc4fc9559fdf76915bff08a446f1983e0401e1284ac982c1f666a2b9d23d1e9 +size 8388848 diff --git a/stackv2/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b15446df301ff6ce861183846458fc346d773eb0 --- /dev/null +++ b/stackv2/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b6ae079c4ed8bb0141d74ae77577b7e99922d12b31a515c83917a23918f118 +size 25166176 diff --git a/stackv2/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d820a9b00d9af921f80622bda709ff7ecc65d1f --- /dev/null +++ b/stackv2/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:321efb212a62034ad4ee69a2ddaaee388d44bd172c9f0019daae0e5bb90b4e5d +size 4192 diff --git a/stackv2/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f20d188a57d14189611ec087b10dfa11c9803df --- /dev/null +++ b/stackv2/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:172a2a744d7684ef81d1f59d8d828e090caac7f0ef61924802dee0c14cfaf0d2 +size 33554672 diff --git a/stackv2/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce039cc1bacdafbda4bb08060bf64aa10ef01271 --- /dev/null +++ b/stackv2/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13176ad77e098803b50e5d2d8cf8a1ddb39696f23b40615c29c97c4a280c5a29 +size 67109160 diff --git a/stackv2/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..152bdbd0d3ea5f7a0e3a80eb2f5b029ea6b03f15 --- /dev/null +++ b/stackv2/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88c16c27132a67cb22aacf15157a243da3dfdd8909073058b51fbb699629d8a2 +size 4192 diff --git a/stackv2/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75f929915c7ca7db4f0a853565c658f30fdf9a3a --- /dev/null +++ b/stackv2/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75539b5437ee5c4fd12ce0e892eba30554d296d7448372c7def1e0bc5b174bfd +size 8388848 diff --git a/stackv2/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e44d87c8a609fbe84da472f83a1a0b73d7fa053 --- /dev/null +++ b/stackv2/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2598691cb03bbd09cb594ed8d04523417a7dd7451aa2271b4af068d21671950 +size 25166176 diff --git a/stackv2/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc3842d314e1467a9bd0c73d7c10d8836d45ba2b --- /dev/null +++ b/stackv2/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f09d8e0f5353b3244a6b1312bf9413cf44ee3ecb17da560a145d9dc79423a3 +size 4192 diff --git a/stackv2/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..756ae7e7b063885b8d802e73d38ebb984c063d5c --- /dev/null +++ b/stackv2/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5150c84f348d727e6aa76039ef9f5c4fd254bcb7d656278ccac151db2e7977c1 +size 33554672 diff --git a/stackv2/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..571bbb49be024bc810a1583afbe99c85f2c545e2 --- /dev/null +++ b/stackv2/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75c6bd5b52e21c35d0b13afa1bcc6fd78ffa94c1f5e06a40c451a238122ab410 +size 67109160 diff --git a/stackv2/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0b56c6bc597462baf39bed85db7b0b763e70913 --- /dev/null +++ b/stackv2/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f59a5f7b5394a7043f3166782e818f08f4bd59f74eb2f24144d8581c2da3373b +size 4192 diff --git a/stackv2/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe67a3ddf475abc86f130664d72cf807a51bdcaa --- /dev/null +++ b/stackv2/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:237fab3dacbb066c0a756559cd7cd56199b12f06eca37ea95c0e34a37e446ef9 +size 8388848 diff --git a/stackv2/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8609142e1686264d712e74d4f28b64b7c5a8bd1 --- /dev/null +++ b/stackv2/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0460b83b3633aade957dfbdbbfe81b1261f8177639d51eed4c4b1fcf8f06c8be +size 25166176 diff --git a/stackv2/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2eb10976d42f6f79b95ec9a4278c62ef2fa5c28 --- /dev/null +++ b/stackv2/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69ef29e1c5f63d98f9e2820b17e9c558a4c5af478438582c90b0419dacbbd7e1 +size 4192 diff --git a/stackv2/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..adfa5c5ec8d6f6512e3bd4797c2f2d21d24c22dd --- /dev/null +++ b/stackv2/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b36fe4423c278310dde728f86169c68a768f3dad6ac36443d0bc34f025a1c09e +size 33554672 diff --git a/stackv2/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3d7299dc468f374d19c5a00e23613fab06c26c --- /dev/null +++ b/stackv2/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3481d52fab98238fa85a4b9d7b3e16acf7e79a52e58645d6ad79a168a49cae1 +size 67109160 diff --git a/stackv2/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38d4925e2e9b3943ce28a52ab3e3da0b2beaad63 --- /dev/null +++ b/stackv2/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:744679bc2754e2ff9beb4e718770456a3ad4404ca7f2a365660e7c016aaf0f71 +size 4192 diff --git a/stackv2/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4d83fa96e5f2a49160424f633233b4f1ff2e08f --- /dev/null +++ b/stackv2/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65727593c90de1fa2dd05c076b361b4f79b686c20918596ca4509dcfcf8032c5 +size 8388848 diff --git a/stackv2/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1929e8f894348d8f2ea7e2d26a7a622aebd2c6cf --- /dev/null +++ b/stackv2/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a394b57c3218e198ef775a4add58402370f53482e9f1201e7aab76e4c344822b +size 25166176 diff --git a/stackv2/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..345f4f35bd467a8f0f43587d28f81da7753981a0 --- /dev/null +++ b/stackv2/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61164e44cf5ba12ac7499d0a8b92c0b2d7177fda49fed184e7373edfb0d25ed0 +size 4192 diff --git a/stackv2/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebd59be7eeb38fd1d5ec58836bd73c37384b4199 --- /dev/null +++ b/stackv2/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94f1ff8e82f8cd40b09f006d9f7707304a429d21e6c36bc19a7d0d97ebd91073 +size 33554672 diff --git a/stackv2/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b713be147badb07a3a29aedd207b3ef3dccc6b23 --- /dev/null +++ b/stackv2/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b432eca5c857016347ab5ec98ae59d5a15d6a74f164db3ff09dfddb0b6ad807 +size 67109160 diff --git a/stackv2/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0e222cce988bdbc528c33318f756cfc21a1be86 --- /dev/null +++ b/stackv2/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebfa024548311c2a7168bba5767b796f97d889ef21c2b2f0de071ff5ce3c3928 +size 4192 diff --git a/stackv2/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a24b43a54e78fe8ae217844cc7d030ac3947942e --- /dev/null +++ b/stackv2/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:623b825c832f593d9f47e4871e954573694e0b98c7de18e0deea34f0f8336240 +size 8388848 diff --git a/stackv2/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..946bf779c711f4f8943ebbaabde8eae225b42286 --- /dev/null +++ b/stackv2/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f1ed67191d3faf00fd9a1e6c1cec09eff382681a0c2a9aff494e5168fb4190e +size 25166176 diff --git a/stackv2/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a183acddda1f4b6b2f8365e2aa460aa38cf80a1 --- /dev/null +++ b/stackv2/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:876c22550515ea5322b3964a0936e2b32dbd8b1cb6d01b1aa049db41b012a030 +size 4192 diff --git a/stackv2/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95c51b6f888e6dcbb3931ad491a715940e390e54 --- /dev/null +++ b/stackv2/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd327648030de53c360394683fd6d83d9b87f3c59e379f9815c598984333293 +size 33554672 diff --git a/stackv2/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4844f654f295a2737de3eb289aed4bcdf1457434 --- /dev/null +++ b/stackv2/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64c4e6d180bf55bad8ae31846599a3fdc75e31136a4dd40317d7efee40263f07 +size 67109160 diff --git a/stackv2/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e71b15abdd9bcbe5e348de09501e06e9a386caf5 --- /dev/null +++ b/stackv2/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870342161691b4310768b571b05c88ba5ba5d351d17a80bc15cb9d13dfe3fd1e +size 4192 diff --git a/stackv2/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80e072db89e5316d47fdb57ca322fbef93b31c72 --- /dev/null +++ b/stackv2/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:458efe30a250bd1abe3db1d6ee25e3fabf92d8b6751e53a5f0a63c261d9525b6 +size 8388848 diff --git a/stackv2/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41c5a5113b8319ecb9f037b1cf254e7672a205b8 --- /dev/null +++ b/stackv2/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba0db8b209f4d0cc3bcc6b67990bfb636206b3326cb1c3b2f643c26e4018fbe5 +size 25166176 diff --git a/stackv2/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83c69465a40933163f4ab6d3941ea01ae5d76758 --- /dev/null +++ b/stackv2/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac4b6e08519facb6c388b6585152e9594275f7e1bf8903c4afd481089f75795 +size 4192 diff --git a/stackv2/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aeaf6242dcb6559e3f6375445872f5af05cc4e13 --- /dev/null +++ b/stackv2/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f73a9cc3ced8bb3713dd016ea99461e248a2e8bed03e831c003392130429ee8 +size 33554672 diff --git a/stackv2/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d875792410285c4f58ecf9900d8e741ba6a524b5 --- /dev/null +++ b/stackv2/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:565715669113df60b508b5dcfcfdd418d89139e4d89a700d781fbaf0f36e9bb1 +size 67109160 diff --git a/stackv2/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99eadca0676905c316aea08c8019c395cc6f4318 --- /dev/null +++ b/stackv2/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:543e2188f774dda2eea026853ccd07a44407a565809e77c66d1f86cc367b0f2c +size 4192 diff --git a/stackv2/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3a4242ebeab7799ad98da083d8f28750c90a575 --- /dev/null +++ b/stackv2/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e00ba1459d7d7e82b3d4636ef48695e20c6d3a4cf5b49076ff57e60710af8e +size 8388848 diff --git a/stackv2/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c7ec5e9c49ec040a5b875d49b08de9826531db1 --- /dev/null +++ b/stackv2/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73a8383ff62b805f80201ef4e80b763c4571a7bd578e827fbc8dd89cb3328eb4 +size 25166176 diff --git a/stackv2/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/stackv2/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e405412973abee1871bafe85bbe0779486d69f4 --- /dev/null +++ b/stackv2/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a2193fd059f0be973bcb0ad94bae3570caf557b84a7c1b084daaab3e4793a5 +size 4192 diff --git a/stackv2/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6eb156eee2091e10f7a7b064d3222df41e6ff51 --- /dev/null +++ b/stackv2/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93cc4d10c26522ef1bd1420262663d232a68ee8c2a63563f9a0a697681fdc57b +size 33554672 diff --git a/stackv2/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c5467bfc30abfe1c0ee0ea0e4ddc5e79325ab74 --- /dev/null +++ b/stackv2/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc8fb46caa2abec3f59881796a5af82285c0fc1e5cd21e56b35d5cab15c97dcb +size 67109160 diff --git a/stackv2/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/stackv2/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbaf412fbef8d38038e1711ee85f116e4fd63f56 --- /dev/null +++ b/stackv2/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9342b04ce555e8a65fcc318393f6cb1c1e772aa4f8cb2b347c5f53fb1cfb0df8 +size 4192 diff --git a/stackv2/model/final_layer_norm/pp_block/model_weight.safetensors b/stackv2/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13dff265eb6a178145253b5dc32a22e94395194f --- /dev/null +++ b/stackv2/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d9fd3ac064a4a61ad03e104130b03c9f7eadbb7638ea6864a6f7c1cc9fdcaab +size 4192 diff --git a/stackv2/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/stackv2/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f875436d5ad1d31778f3fcd4f1f910ab0fc474c8 --- /dev/null +++ b/stackv2/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:328cb4a51ee57255363e5c071c22a820243506551886b821576d3157a76f15bc +size 205914352 diff --git a/stackv2/model_config.json b/stackv2/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/stackv2/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/usgpo/checkpoint_metadata.json b/usgpo/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/usgpo/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/usgpo/config.yaml b/usgpo/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60adae446ae48b267424338b5d9c98cb29440ee9 --- /dev/null +++ b/usgpo/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredusgpo-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredusgpo-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredusgpo-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredusgpo-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredusgpo-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredusgpo-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/usgpo/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92b19a760749e6798cc3bd99c59234aff6b40473 --- /dev/null +++ b/usgpo/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fadfd53dc6e7fe282ba175616fc319dd201c496e1294f7ddd5916f3123f61f58 +size 8388848 diff --git a/usgpo/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..829540584a27b1eb6bdbb37f19bd60f1014c7a86 --- /dev/null +++ b/usgpo/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9953e787375ca640745ea19f1019c037b04b006a8db86878641a3cc0b1e5b7bc +size 25166176 diff --git a/usgpo/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23fb79fa9d5689ae4cc830b3701d36c27f03cb16 --- /dev/null +++ b/usgpo/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6e2b46913b5440c9fe52a65436385e754821cac7af068d1a395ceafd4503a3e +size 4192 diff --git a/usgpo/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c80deb0b280c1f3af4075996fc5f1977297851a --- /dev/null +++ b/usgpo/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69b6d3a50b124b6af66a2a39852014a44af3264da96c06987abb24be1fadb3dd +size 33554672 diff --git a/usgpo/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65e7de5de8ecc0612432239de984bec0d1fe9431 --- /dev/null +++ b/usgpo/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fe079a145acde3670bf4bda5abda3942fd5a09ea07a3ae9c8adafe1606acf38 +size 67109160 diff --git a/usgpo/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e47add2f915a10cddc5f5b4ee48d139dcf763eb --- /dev/null +++ b/usgpo/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ae7b98345cb3eab068b1d79511ac98df92e3e02c0c7892197e5faec86ad02e +size 4192 diff --git a/usgpo/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db1001ec32df450992b2a98871bc79dbed6f160a --- /dev/null +++ b/usgpo/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0c48b9135d0cc3b5906024497361454b24aa15ff3e98c27510a1d9f2b7db855 +size 8388848 diff --git a/usgpo/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1af4e386ffd33f62cbb1b51055dcc353476378c9 --- /dev/null +++ b/usgpo/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:598a65cb96c5765c7f6089edc688a5569bdd325ef4c57dbe22237b5c7d4f51d0 +size 25166176 diff --git a/usgpo/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1f602d059bc11abef73712f854d5b7be2b23be9 --- /dev/null +++ b/usgpo/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1071177b9883bc30fbe9c804529471643df821721878775716b7dc3215deaac5 +size 4192 diff --git a/usgpo/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c83671c8d42b7cbfa735a14deed1be1c2361820 --- /dev/null +++ b/usgpo/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:658208c6c159061098ca02ee316bad50ad3e348ef88be23716883d50f169b847 +size 33554672 diff --git a/usgpo/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e831f18733e5f11bf970bd18ab2dafcbd3dfdd7 --- /dev/null +++ b/usgpo/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72904279e5b015aebccc75bc07b4f6de82b589fae733a3f1186bc92b6a7a168a +size 67109160 diff --git a/usgpo/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a6cb7c96a4893c5186918ed522c1664ab7d3582 --- /dev/null +++ b/usgpo/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa06777ebfc4926680d32f077519cfa22d87c88217cb0ae6e59a605438f5aabd +size 4192 diff --git a/usgpo/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..002c3ae09430805b1d5aa3a0018c5075d317560f --- /dev/null +++ b/usgpo/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b048d5d5a69abf03f42185c35fe9dab93014dde528ba825c02490c8836019159 +size 8388848 diff --git a/usgpo/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07ee19c78dfa0ede41480a6c1969686af0d8eeb5 --- /dev/null +++ b/usgpo/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f6840a783b86a216971b4798edc215de64c33869a1875ae4f39f90028ab5f9f +size 25166176 diff --git a/usgpo/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..593efaed72236ea87c0a4618f51e1c4a14a58ebd --- /dev/null +++ b/usgpo/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc83545f07369cf86aef9c53377a03111ca76abbf27f91c7606fe30aaad5e66 +size 4192 diff --git a/usgpo/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f58ba010ae3cfb97a3895cc8b8d1ced04520256 --- /dev/null +++ b/usgpo/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f39aca47adb0d30a228d9b0e46de3087ee231f1d3640b29048dabeef3944a6e4 +size 33554672 diff --git a/usgpo/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4fa015d9a45393b30686c4e7839f3bf3ea8d6198 --- /dev/null +++ b/usgpo/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:276f840ccaf41805704014469a546182a7e2d02af68e1d6c69cbec555b95ed97 +size 67109160 diff --git a/usgpo/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c7752148b554bb8aec9d54ce8358a8521ee519d --- /dev/null +++ b/usgpo/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecf4a62385a9326a5fa3b2716a9a27d78b35494545648d8fdeaad34a75dd6131 +size 4192 diff --git a/usgpo/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3195d5ff7e270309e28647946e134e7f63ffaa8 --- /dev/null +++ b/usgpo/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57d32005375e75dc7f3d94a7eb9aefc9dda31296e1ba76ddebffda353a7717e0 +size 8388848 diff --git a/usgpo/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58dcfc5ccde08624a19d4c86ac7187f4d0bbcb6f --- /dev/null +++ b/usgpo/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a0faeb8825f3828713ea292d9c6c11130177054933167990fef628e1f61f295 +size 25166176 diff --git a/usgpo/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9eec80c6e0fa071f74fe7a68b760d8279842e58 --- /dev/null +++ b/usgpo/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482b5d3272f0f8c81f41a0a0496351a4577cf6535faa500f826fa98161a13d86 +size 4192 diff --git a/usgpo/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f462e08153ba120681e75deb57525883eec2844 --- /dev/null +++ b/usgpo/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dee7b5b3a039e304efb9d4e18b690a3f821467bb6eabbae7701c5276f8883703 +size 33554672 diff --git a/usgpo/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..963fc522546e1119b0f41e1fb0c91bf4f44a41c1 --- /dev/null +++ b/usgpo/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:969a35f45b1e0b305b343c32bed9123402afc33d943496f0229ef0dc8342f9fa +size 67109160 diff --git a/usgpo/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53fa07ee28eaea16b6b1afe8d728a63548bfec99 --- /dev/null +++ b/usgpo/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0bfba335163e6b716384472597a26454129f95f5d5788b142af19d9b727e62 +size 4192 diff --git a/usgpo/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5a59b9cf84f5e46be932c17bb4b92b5b3bafb07 --- /dev/null +++ b/usgpo/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57ddd8f10d51d5b963ced8811ecdf182a18924ad07dd6c9e92e1aa836c6f9923 +size 8388848 diff --git a/usgpo/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20e352e08c32f436ff9b52bdd7c555538d07a859 --- /dev/null +++ b/usgpo/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecec7c057de39755039bea0c808e7d821a1b235ec4afc7dd812b30d3516af0ed +size 25166176 diff --git a/usgpo/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bddad6885eae4a235999e60ef3c9b4428a2ddab --- /dev/null +++ b/usgpo/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84a0f73c9f2ca9c5d2e5e73014fc997b38006594729c51d3da2136aa6ce2a673 +size 4192 diff --git a/usgpo/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c1988c1cd1450886aa23b116055b29de177028f --- /dev/null +++ b/usgpo/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8605949a1c03b444c88dad34504938b598dedd15d38d773d365144082de140b5 +size 33554672 diff --git a/usgpo/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b00a1dd5eed1ff14c2854101e0f9164b1d7cd38 --- /dev/null +++ b/usgpo/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc56832f1872a784318e5c2fe335030373efe5cfafc5229c29c729b700f64027 +size 67109160 diff --git a/usgpo/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3aed88213362c3c61f1acd5ebfebbfee0ce28d97 --- /dev/null +++ b/usgpo/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3de98925ba66147e5def5cbacb510c0b00745c06429cada0e3469e795af44110 +size 4192 diff --git a/usgpo/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6e5c55a1d541e32bd41df21e0e967fdc801b040 --- /dev/null +++ b/usgpo/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1becb3867af4547e7bed25a2f3372e2112c9426cf6a5aa0d5cfc63916a100195 +size 8388848 diff --git a/usgpo/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..863ab637af10831a0cfdeb98fcd7445737f19608 --- /dev/null +++ b/usgpo/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b9ca3e845a895a40c25cde28dffa0e068c78edb657f934eecc01c3451ff26a1 +size 25166176 diff --git a/usgpo/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3e14b007deb5d3ac744bcd59a289ecc7694b69b --- /dev/null +++ b/usgpo/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb2fada1663d398eb61f9693bb2f4bf1aed109b1421a7a621433eeb8b3d8d78 +size 4192 diff --git a/usgpo/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd5632516790116ac62fb6a9d357acfc4b5cb39a --- /dev/null +++ b/usgpo/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61322a8c96b7961a85dea1a1edb36657eb79dcecfcfa6f8124bb1a792f06ed22 +size 33554672 diff --git a/usgpo/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7df491ff22b43f8083557264ee36231084ff0c73 --- /dev/null +++ b/usgpo/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61393c8fec7cc26bcec552a1f6b08eb499381aa93f319374ab5aaa8618a4c2d +size 67109160 diff --git a/usgpo/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..461844b19170b20262577abc904686ef1d7d29be --- /dev/null +++ b/usgpo/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89c682977757f25e3940f633d140e469cc606d41ae760bcd6d0559ce39e193a8 +size 4192 diff --git a/usgpo/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69d4412d0f64e9be2cc56ef5edbaf72e41b503e3 --- /dev/null +++ b/usgpo/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ffd53ff2327af577b4ef9963e8c36da94d58aca299a203084a63ad4d56c3cd +size 8388848 diff --git a/usgpo/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2352b06fcb0afd1406f0b28f06808e53e035ab5 --- /dev/null +++ b/usgpo/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3703b1345d889f33783aa15ae4527786b961d03484c7a39667685be3c431a768 +size 25166176 diff --git a/usgpo/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2510d5956ef2383103798766177d81f270fa15da --- /dev/null +++ b/usgpo/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3140981b58eb75b0d9a81709f26bd59a27749ad243c2450a70c4f4bdaeb40a07 +size 4192 diff --git a/usgpo/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ca59542846c5ebe6d14114fcdc0629684554fac --- /dev/null +++ b/usgpo/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5902ae8bc7411c3d70f192fe49b78f4b273ce9922205b11a7a7ddfb68606f3d7 +size 33554672 diff --git a/usgpo/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12f92224e54260a988ada8bb1e2485b36193f9b0 --- /dev/null +++ b/usgpo/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:976e932d89d4c142f6a220ff8f4dac6344da83e3c9228f0fb1bd8afed3218588 +size 67109160 diff --git a/usgpo/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..669e107caa9e700e0d4c042db68f2856a663578b --- /dev/null +++ b/usgpo/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17527fceb0cfbba6cf4776c61fc63c122b46a02fb3ef82034812fbe83fe09d46 +size 4192 diff --git a/usgpo/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee7aecb8bee5a4efb434c33959eb5a7d4f890b99 --- /dev/null +++ b/usgpo/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4841cbd93911a2c9bde470e8350ce135162a5ddab82a5cf86f4f0bb303bde522 +size 8388848 diff --git a/usgpo/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..335dfb92aeb37fff1c6475c3e14ee44f26c33983 --- /dev/null +++ b/usgpo/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:036878f4a701565da0a243fe41be5546d005349a906648262de83af3f35bc196 +size 25166176 diff --git a/usgpo/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..93321195eeb2098d68e217a5272d49cfd5bea066 --- /dev/null +++ b/usgpo/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a59846021edef5d5acb71f44ee96418da8518b69318b57b1a027d202cca3de85 +size 4192 diff --git a/usgpo/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24e36f31dc08c1766d9b45bcd3b72a34c754447c --- /dev/null +++ b/usgpo/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acbf2ec1dcb03df67482f1c4c47131b86df06cd3a2a6b5eeb9740002aeaa73db +size 33554672 diff --git a/usgpo/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be4ba9723eca50ec25bbc31884809f5473d24d17 --- /dev/null +++ b/usgpo/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:999a95f8b597e8906d8770701e2e18e670a117cac2a170d23f01cd5cf627fd84 +size 67109160 diff --git a/usgpo/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..adf1fb159628f3fda0487f7bc1e3f485cc6bbc1d --- /dev/null +++ b/usgpo/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4644dbc41a592b245cdbef1f362778bf17e13cad0831638481a93d7252395db8 +size 4192 diff --git a/usgpo/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8c72f27b11844bf42951e07050949e56272954a --- /dev/null +++ b/usgpo/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42dd2bc2d4de5deeed94921587829b58f18386e05ca31e7058b9dbc453e0ab3d +size 8388848 diff --git a/usgpo/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c44c140cb57e4b942eadd7e3c1ccdc58379bd33 --- /dev/null +++ b/usgpo/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:481a27f567ec61fb715ea19394d15ffa7b874e683dc43b2595ecd6357f81d4aa +size 25166176 diff --git a/usgpo/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ed0998e7745d026b39a092cfd3f6fc9b250c162 --- /dev/null +++ b/usgpo/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16e673cb5cfb7d80cedab3a22b8169af47348876c6ef53c0d019a1cbf2e21330 +size 4192 diff --git a/usgpo/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0736ca308cc7a24bd2c21ad02bceaa94cf3a99a4 --- /dev/null +++ b/usgpo/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d68f0efa9c7f1306afced26987d1ae625929afda424284f90b2999a27991c78b +size 33554672 diff --git a/usgpo/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afdf7fc655d4d644bb423dc139dcd902374cf759 --- /dev/null +++ b/usgpo/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e96884ab207949b54098dc8565c626bd235e69dd145e6420ed7ceeab4345fb0 +size 67109160 diff --git a/usgpo/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb3bd286cf4f09c4fc41a1cbc5438a0927751c31 --- /dev/null +++ b/usgpo/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce45f5a115a665542c24df4d0398d5cdc9d9ff6ec7baeedf39ecb0222be516c6 +size 4192 diff --git a/usgpo/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1fa3f11bd1b4a501bbc3a900a2f0983946371236 --- /dev/null +++ b/usgpo/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3ef8df9b04f1e3511a72476d1d7372066aacfe8e581098aa4d5d68208c08df3 +size 8388848 diff --git a/usgpo/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1377614c2ff1e3af500e7eaab8758247482d81f --- /dev/null +++ b/usgpo/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:142a3fd35859e562d760f0de84da2af090f522e8dbf5ea6568fdf6d6884b4af0 +size 25166176 diff --git a/usgpo/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac1de205bad9440507ed56f1c7963eb0d6bc9279 --- /dev/null +++ b/usgpo/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d14aa24e690b316b7f1b05a975217356423eca4bc4b6718453d6bd18fe474a44 +size 4192 diff --git a/usgpo/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21f2448e9eb2e6833880e000970af8723fcaebe7 --- /dev/null +++ b/usgpo/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af6a69c0e664342954e3e220e32b204e2aa789b5b8a36dc6bef08ed902e52fea +size 33554672 diff --git a/usgpo/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b81a22dee452c96102b04e8df6ebad490d24dcd --- /dev/null +++ b/usgpo/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:077686aea61c6fe0a40d124c9fab0f7920410be38a1686f2beb7deef8c07dcaa +size 67109160 diff --git a/usgpo/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f44bd8a4a347fe26580d6ead361322b06e5cda03 --- /dev/null +++ b/usgpo/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ebb9d2972a904d66ca4c6c33f4384cb46ea431538ebae70c73c92e4664eec69 +size 4192 diff --git a/usgpo/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce5a7a82953a160b44f6aed195babedab640ddcb --- /dev/null +++ b/usgpo/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91a60b9eb519275cc9fb09c9e730d52b422e5575afcbf09c7a53060c0398196f +size 8388848 diff --git a/usgpo/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aab75ccb3e1dc0983f81444a7b05000fd9096c56 --- /dev/null +++ b/usgpo/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d86f4df8c1ecf360b47f25017f3905c6aae3cb53600cb0346b5e00c22187b1ac +size 25166176 diff --git a/usgpo/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3ef503e5e83e3cf25c9a3f2266f8b744f56f585 --- /dev/null +++ b/usgpo/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:184a2cc5254decc4d8b2cc75c8ebe73d2a08b7b69b52d8c82c31b1ee58c34783 +size 4192 diff --git a/usgpo/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb87bdad675d018522b9b3fe72f991761222ad04 --- /dev/null +++ b/usgpo/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1bcb7e8caffc59eaa56b1d3237a44647142cd312e8b44fbe52ce7c488b1c1bd +size 33554672 diff --git a/usgpo/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..132b861abfd4bb0b5f820c6c3af9cac79eaace34 --- /dev/null +++ b/usgpo/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6150c8616ec6c0a30bc1959688faea879e6b23c4acfc2697df962d47c1911e7f +size 67109160 diff --git a/usgpo/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e863e94feb7f885cd9e114ad2ebbaffd97fcf380 --- /dev/null +++ b/usgpo/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6552f47680b73301f58ad95799d2555358734382dc6190df25fef5d61c03b3c6 +size 4192 diff --git a/usgpo/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1233e3a21dcf343c2c02a6b760c79ccda9429391 --- /dev/null +++ b/usgpo/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db40a2817a483ff4e8916ec1b05613f4488034c4da61fa54b194d964e5ba99c5 +size 8388848 diff --git a/usgpo/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8f304d8ee5a817846f25e8abd7f7b807f87f595 --- /dev/null +++ b/usgpo/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86fb16cebe1c218dfaddce74a58c83655d4caf1ba32aa2a9da1177363bfa8dc9 +size 25166176 diff --git a/usgpo/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82f34afb17d9a3d6a81e0ccd16d203068e6cb42a --- /dev/null +++ b/usgpo/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd372dff244f0cf9c1b622a3bb9736a356bc1a8a3c4f4369aca825d0a530ba64 +size 4192 diff --git a/usgpo/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fac6af83bf60755ce2612602fb8e2993cdbb8082 --- /dev/null +++ b/usgpo/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90984ff788557811455f9d1e8c56e52b4b038880945992dcfedccc9ba161229d +size 33554672 diff --git a/usgpo/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b61a827fbfbbe936e8e075232222b12de73f91a --- /dev/null +++ b/usgpo/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6b3c616af38e6c56d5c3bb5908649ca060de712ea27d9628828eede2755ac1 +size 67109160 diff --git a/usgpo/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e3fb81f0e1edadcf7af8256b37533ff41a3b88ee --- /dev/null +++ b/usgpo/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa34d6c7cd389a90898171647b29b230cc77f5c0e4b1969668b83ecf621797e +size 4192 diff --git a/usgpo/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc5bab1afa365b7a317593950a31bac4dd7af2ea --- /dev/null +++ b/usgpo/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16fe6543e0e8722c75b251cfa116623b1de5d2206fcc6b00b5046ce08df48879 +size 8388848 diff --git a/usgpo/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d87891964d2eff869068a23533196527ccb18028 --- /dev/null +++ b/usgpo/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:900b83cd5a0f932c6bb1deb3745de61c86e58e804f421eafec13a128c9e319cc +size 25166176 diff --git a/usgpo/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f81cf89129799c2a86a4bf29c89a226b339bbd78 --- /dev/null +++ b/usgpo/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccdd98d1e954e81230efaed8b46cc2f7253163d4666f2017d67ecfc55f5bdc57 +size 4192 diff --git a/usgpo/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdfc050f3c36debe11f095042b9f3291be7965de --- /dev/null +++ b/usgpo/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b503b30709b6c95b8c7c820039474e420f99db4cad0f59036ce1d4edcf4300d3 +size 33554672 diff --git a/usgpo/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1fb2d3f22f1c865e6606da941dfb2583a00ad70 --- /dev/null +++ b/usgpo/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b253d73b8de94d079ef80bd8eed2aeb3cc57f8bd4ab9068405fb478d39eae6c4 +size 67109160 diff --git a/usgpo/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..905dc899fa7a3abb534b4c75c3e4ea92420a302f --- /dev/null +++ b/usgpo/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:753deada10197db998c729b0ace0a280a7954903f97f1d72741175fe9f3786a6 +size 4192 diff --git a/usgpo/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0d148c43201c03727917dcf29385dc59dc4f399 --- /dev/null +++ b/usgpo/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e977f968e8eadc64f23099c5a2a2023f9372aa578c8ed28207ad9f47c363312 +size 8388848 diff --git a/usgpo/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ca8698080745b1c5a750de506ab39ef722d190b --- /dev/null +++ b/usgpo/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dec9db12ea17fa11cbd2235feba681debe57ad5306445cff3ce58e29a6c9df2c +size 25166176 diff --git a/usgpo/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96bd4330b7707f4668a18e7759d9d73db230dcf9 --- /dev/null +++ b/usgpo/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f88d2b14bf7c0dfba5c661ebf7b0ae1ba06535931c4807bb709e07487c3332 +size 4192 diff --git a/usgpo/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47f05dbfca8c6744b316d14a051098f88c1b55a5 --- /dev/null +++ b/usgpo/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a8f1246727ead1c841f94bdd3059c94ef8a6db7c28f8ef04df077df1618a24d +size 33554672 diff --git a/usgpo/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..077c279fb3deaab8a306e6eaa285db3b905a9990 --- /dev/null +++ b/usgpo/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a170def8d140c277597c57e75cb318b9e07bea3a09f96399581e5b5496961bc1 +size 67109160 diff --git a/usgpo/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44765a42e09788c5f9f233a4ff9e71a5f318597f --- /dev/null +++ b/usgpo/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0215b74dd15f264ca64e9e690b4b837a19db3ece1ddb6b9c5ce01904c5f21c +size 4192 diff --git a/usgpo/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81dac50ec958e46e1af7b89f10dcfbb77a307e00 --- /dev/null +++ b/usgpo/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a7bca65a2c8d68f111e9054f2858279239215fc0b0ca461244251b8562a1079 +size 8388848 diff --git a/usgpo/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..004e96617c2cd3629c73d821c4a2d2ee256f3bab --- /dev/null +++ b/usgpo/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e5894425be49b9e9b708cb47ab2b3ee4f59543790f507cd7caa7ab19c081b0 +size 25166176 diff --git a/usgpo/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e28448058b5b5f0ed540d1e0e4ba1d8625053ce6 --- /dev/null +++ b/usgpo/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eebca7219fbc3fe5ef6810698ac2d6a1cd81903daab0952ab4ce8f238e21cfec +size 4192 diff --git a/usgpo/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0288c61e8bc8ec8155c5f79bd10ab32864dfda11 --- /dev/null +++ b/usgpo/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33c468ce720838feaf73aedbb44151085b063e4701c73a00efc33656acab2c22 +size 33554672 diff --git a/usgpo/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b700f62c49ab67df9e63e37d2a8b81814d90317 --- /dev/null +++ b/usgpo/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6be65d5cd957d0933e2e80cd580ce8e463a1e8cc203929f97575a15f00905fe +size 67109160 diff --git a/usgpo/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..407d1aabc402190c325642154a6d758232f64be2 --- /dev/null +++ b/usgpo/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f911bc30e53d253b2183745021916a910e1f19d659f7afddb685eb752052197d +size 4192 diff --git a/usgpo/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff0ac463b0084d66713e904726985c1d3ed009d6 --- /dev/null +++ b/usgpo/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2300d0aa9faeef1ed3c03c35e1f2b497cffa1be5ef173f3d975d0a0bd5b8be29 +size 8388848 diff --git a/usgpo/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4df6559bbfea24e80e15cc0483c2893ab21b57c3 --- /dev/null +++ b/usgpo/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e41519ca62ba5459b1b42b7b31da7ea7e9af3cb02335fd8f101836f648d28304 +size 25166176 diff --git a/usgpo/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0427633795261e1d9c67b2b331b116bcd9131425 --- /dev/null +++ b/usgpo/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8316692971bb7279d70f571065eaddfa11ea8ed64d7c403afaffaba729a40a0f +size 4192 diff --git a/usgpo/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c7f8d62eabd7fe89cd381a11de75de952953489 --- /dev/null +++ b/usgpo/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab80f701a54e092843a1591a53ee8e7d3373c46d6380e1fa7765583cd52cbe2d +size 33554672 diff --git a/usgpo/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b17a4338974065ac3b785efc9d347db8062b1659 --- /dev/null +++ b/usgpo/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b53443a0f8c4fc84204fd1ad55c8d7ad151b797256bcf8db6f81b67749f16c7f +size 67109160 diff --git a/usgpo/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0a580efefa740a6dc49e80d168b90ac97deee43 --- /dev/null +++ b/usgpo/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5982f834fb7344ffe0ca166955f9137a788e9ebc3ad11544f80ab9318887d98 +size 4192 diff --git a/usgpo/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a40c6ee2b0776eb7d0f32e30e9a055197cd6cd42 --- /dev/null +++ b/usgpo/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7a8dccc752c4ef8a4ac46abafb96f3ce243b68e9bcd6bdca5c163f73da4a7eb +size 8388848 diff --git a/usgpo/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07c1b313d43fd7548dcee31ecbbd1517bff42521 --- /dev/null +++ b/usgpo/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf8cf117aef65fa5475d86ce3ffe36d63ccb51567f3222652508b201b1294474 +size 25166176 diff --git a/usgpo/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3227b180245dfe0a2d439518a23d9bd295e0d6bc --- /dev/null +++ b/usgpo/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5227f587c214f5b1316e0cb05bc53feee63260e5bfd95c84711d4a9792605b0 +size 4192 diff --git a/usgpo/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2898cca5f50729006756ef5b9ed432483af585d --- /dev/null +++ b/usgpo/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ef0f2ed0921ab78fb67e347cb3f6e9377ba1d780f75f42bbb3a4f59afe03497 +size 33554672 diff --git a/usgpo/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f9fb516cfa1be0684b4459ade01187183aa820a --- /dev/null +++ b/usgpo/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59dd25a080af2c8077fe990ff25ba42b37642d5055b5f29c480ac2345e2a1c58 +size 67109160 diff --git a/usgpo/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b55ee402b55cb641f3ef4fa4bbd5292137abd12 --- /dev/null +++ b/usgpo/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4272c793239b89094eb1169029ef95df095a49e5d28b1984fcbf3eaead8900 +size 4192 diff --git a/usgpo/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0312ea23eec13dfce254a54e5d1b04078d5b8899 --- /dev/null +++ b/usgpo/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc87d97167db6cd5d2c2dfaa82eb985e8ced22f4d887e4dc1572b0eff016303d +size 8388848 diff --git a/usgpo/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42666a1d383a7f6652474e51b0eb46ae1c5ae9bf --- /dev/null +++ b/usgpo/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d10f4eb5f6af43e7e4816237927b832c9838f42ff58adcfae1c9bddcdd1276d3 +size 25166176 diff --git a/usgpo/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f63829060caa8b640d86461fe2f4543304478d9c --- /dev/null +++ b/usgpo/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc2bdcb406ff03c061997cee1c9a5d305b7940ced9897de1134936f9f41f67ee +size 4192 diff --git a/usgpo/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2668c4aa3586fc821a8b2c457578669764ae02a0 --- /dev/null +++ b/usgpo/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee1c7b2abb6a4467e306455d4925f4a7d1c999bb85d818c7cb92d5dda23b14cb +size 33554672 diff --git a/usgpo/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1882f67f79ff82d2818ece69b9880123a3a0004b --- /dev/null +++ b/usgpo/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56b50814cc0a1e30e6f8eca7ffd10c9895087c4af384431ca46f800c0c90d586 +size 67109160 diff --git a/usgpo/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..823f77f37839e957f0d4f2fbf3f46f87ed5f0b81 --- /dev/null +++ b/usgpo/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0154adb2b2718f6d2bce49d2991b6e7e607d3c40c5191750c6d11cda7b6dd9c +size 4192 diff --git a/usgpo/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c24edc79098b7a3fb2a24fc0ca98bcab6509912 --- /dev/null +++ b/usgpo/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba95b83112ce51f868841c7f17f0606bddb7d821e67baa1009cbd0bda7013c93 +size 8388848 diff --git a/usgpo/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d57ee01b9e45423aff9820073afa88d0c645390 --- /dev/null +++ b/usgpo/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f22e1a63b8afaa73c3944adb00c280c46c10cb6d0670283700084d944f26ad94 +size 25166176 diff --git a/usgpo/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1fe5f6d3bf12e5d1fd7daa81d926710e919b3922 --- /dev/null +++ b/usgpo/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22fddc55eab0c7ab00a08a44091a043098bcb112f6bfc165b30f97a6989378c8 +size 4192 diff --git a/usgpo/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..002f8c58e592b0e5a451dde35c9f30072763326d --- /dev/null +++ b/usgpo/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f118604f946c6fcb825d54465561e0a83cbbf668c65dd5443300c1ef2e36b16 +size 33554672 diff --git a/usgpo/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c07d68c03568e91f774a260d907be3765b75988 --- /dev/null +++ b/usgpo/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d0fff4333f888ab1ccddc8d72cf9addcf390fa17c5e74c0d2556ec0032f137e +size 67109160 diff --git a/usgpo/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3ef8d55ae1b6c9937e6429eefb994b469c4a977 --- /dev/null +++ b/usgpo/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00d0af6201d569a77a4e8a8fe54ae833a3b0235c578dd054af983689b6467e42 +size 4192 diff --git a/usgpo/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14d15009be97f49a4a4bff24c8c43de34ba3ee1f --- /dev/null +++ b/usgpo/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5bd31d8c45c7354550131dae7bc2184e09ba9113a9210bed198c459f0f6accd +size 8388848 diff --git a/usgpo/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c5bae9dae3c67141c9679f8b1fdce5ec35cb289 --- /dev/null +++ b/usgpo/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97002cb8a3f1bc0894cd64134a097c5c4bdcc35d87f0957b6380aa7070bb287d +size 25166176 diff --git a/usgpo/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41cfbda584e187a80acd67e163ccbaf745add606 --- /dev/null +++ b/usgpo/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc436751c54117143f018241fb5ff01752fb917418250297bfdcc16fea60600 +size 4192 diff --git a/usgpo/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1962e82a0cc4460a039255ccc8e8bca93442ead5 --- /dev/null +++ b/usgpo/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495982a18f90d770d9b18491084fee329872a6927664ca6bc801b8ef7582b2d8 +size 33554672 diff --git a/usgpo/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..715b0b7bac1561ac95f0b7a3e691e2acc12cf6df --- /dev/null +++ b/usgpo/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e4bf8b79559c35652d6eca680ff12ceb5f8fb7221dd02f6d81426006a4e713 +size 67109160 diff --git a/usgpo/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83a089ef68b86edfdaad9240a1a2c991fed60431 --- /dev/null +++ b/usgpo/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:294cf15f2e7a5b1d74d37aed09ca02497dd4f54a13c964a88d5c1c0f079b78a0 +size 4192 diff --git a/usgpo/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..002d85e88d2c9ec8d94489145b3b9e5ab7ea4404 --- /dev/null +++ b/usgpo/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a7212a677cfb85704d1a9eee255a3ed486ad3988aae5654b4c1751af7fce292 +size 8388848 diff --git a/usgpo/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e902a34c81c83d833c0d6dbf41226d39a38ecd5f --- /dev/null +++ b/usgpo/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b37755f2998198bc9dcc09a8f0c966cea5f6cf4bf2e9fb57f0c8a28d7d7edfa8 +size 25166176 diff --git a/usgpo/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..353795af55815ceb40659445a342a0222ead741e --- /dev/null +++ b/usgpo/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98a92f5a17d7fbf303f84cea184e3c33b2cd4903f87cfea4cbf18ea2995bf101 +size 4192 diff --git a/usgpo/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e35f0cdde9c167adb23aefe21a9ea94cfcbddbc --- /dev/null +++ b/usgpo/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47cf9ce95beb0abee8b54018c433b142a14dde1d331fbe0411aa53da455fee4 +size 33554672 diff --git a/usgpo/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f6d60326c00f98a3fd70d8a53d06bd944200339 --- /dev/null +++ b/usgpo/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5c3b8684e799535957a659d6f7ffd70661e1b3d33eec88920593266e8a2186e +size 67109160 diff --git a/usgpo/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..456b84a78d09555658241ac124f8f09dd5f35a7c --- /dev/null +++ b/usgpo/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc53c3616f39bdc8abe0bc8187cb47fba65cb8d2df87f072e9df3640277eae7 +size 4192 diff --git a/usgpo/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c5be59e3252c5d623da18b96d38d55b25a00f670 --- /dev/null +++ b/usgpo/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d071207cb0c5fcefac6185e6168266c448077bdfed9ddd0411d50a22ac367d5c +size 8388848 diff --git a/usgpo/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc224aa8b7594fdb8d8d1d7ea285dd0c176d250c --- /dev/null +++ b/usgpo/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d43b61ff6c3dae1b0dc1f5916e13b0b9cda2d2a167d884c13826ea8016f7e32 +size 25166176 diff --git a/usgpo/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d6be0a335f15c3f8c6b4d88fef9b4c4d89f1204 --- /dev/null +++ b/usgpo/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77b6dc614e2161e33ddcbd9a4cf3f0a3ab37d36c03d7de6b65aa64c0064f3b68 +size 4192 diff --git a/usgpo/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3e5b3f7da48d824dc82aceef66d0f43d2442b96 --- /dev/null +++ b/usgpo/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ce2a15388abca35388d530e605627c50cc199c6079aaa958c4ebf7a554d71b +size 33554672 diff --git a/usgpo/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68b5ae4084f9137c74147fdb30a69b14d6b60d2d --- /dev/null +++ b/usgpo/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64c4256812750b7f1d4bb62d8a388e6c4d16698daae7ffd980614ad1a85d7119 +size 67109160 diff --git a/usgpo/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dabddb16ba545c79ea7b96937135bb1e50d0252f --- /dev/null +++ b/usgpo/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529493bf927af1ab9d389c44c761da872ba8d87c65c4a52cd0212c546eead679 +size 4192 diff --git a/usgpo/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88bf914c784b8aef806106a672babddb280ef540 --- /dev/null +++ b/usgpo/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b458ad3d0cd912f4de1977d675d42b8da31e0f6bc386b2e0d922aa52075aa13 +size 8388848 diff --git a/usgpo/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9518ceefd9c959bea575836adca6dd93d26397a7 --- /dev/null +++ b/usgpo/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4010a3efe3611e553e9f7077ae1b22113000fdcc7c5415bd1f74309d26f423ff +size 25166176 diff --git a/usgpo/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc54737eaa2488c2c2468368e3e9bcabfb5f20a1 --- /dev/null +++ b/usgpo/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88929792d551fd29162c0f249c9cc370f16f70c90e2f5cca2b03b26af642c6b1 +size 4192 diff --git a/usgpo/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ad69dcc074de3bcd7a7acf12f660b5a013bb9bf --- /dev/null +++ b/usgpo/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc79c824d0e14ea3720335051cff5d520a354c4f09f9e7d6352f8c8a4fad3ba +size 33554672 diff --git a/usgpo/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a704247e721d18c572a609b8a2190d82773b3904 --- /dev/null +++ b/usgpo/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b3c23ae387d3e0eb58cada239c29e9a41e0ebca20606f77de4d61c49b6ee181 +size 67109160 diff --git a/usgpo/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bdc9589d13ba602f2aa7829e6a09be2fb263e623 --- /dev/null +++ b/usgpo/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edb88d3ac2b9b3465c72af78028118a27521ac24dfa3c333dd2f36aa594d88bd +size 4192 diff --git a/usgpo/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa927e89abdf3f26097f2c2e42feabc80857cbd9 --- /dev/null +++ b/usgpo/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b400a93f54a99e177b0a2156a5f6459c0a1e37c1ee0fa62ee49f7a4d46cf216 +size 8388848 diff --git a/usgpo/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a14acb2d182b9a634384dd023af1e5ce9a024f19 --- /dev/null +++ b/usgpo/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7408c105196bc90a0a3e35210f2d168cf260c50020b6fc80c47a32cbd2852314 +size 25166176 diff --git a/usgpo/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/usgpo/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52eb284335ebfa077cacee1509fc34496731a546 --- /dev/null +++ b/usgpo/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d3611bcb6f8997b808ceb82b7f7ca4d32d030a988c58881f06682ed71337ac0 +size 4192 diff --git a/usgpo/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5db717ccfaf6e7c15fc767ccb702992e11fbaab0 --- /dev/null +++ b/usgpo/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca76e8b5a9e54cfa838d86d934af523cb9b94c8d561a388476f33fdf3f29782 +size 33554672 diff --git a/usgpo/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa13185ccd4aecb5f7bbb1a9249eacf05fd10073 --- /dev/null +++ b/usgpo/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad992c8ab4495743e76a70b43cccdb77a3cca4f2199cb4cfe04a8eb9d9aac0cd +size 67109160 diff --git a/usgpo/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/usgpo/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06be7b169e1dd71f7897b9a6211564b7a45c5dce --- /dev/null +++ b/usgpo/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62e77e998f5f79932a3d9fa712d9dd72c0fb689a2d765e585f19a771364f121 +size 4192 diff --git a/usgpo/model/final_layer_norm/pp_block/model_weight.safetensors b/usgpo/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2dc340ed3b1b966ebce7c8b62de2b06abda8d2cb --- /dev/null +++ b/usgpo/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed58eddda497d469bf72ddc6a529e7970889b3b948fb80f5c5e7168752d042c3 +size 4192 diff --git a/usgpo/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/usgpo/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15eaadde77b35ba9dd27cc9eeb33d75e3368b05d --- /dev/null +++ b/usgpo/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:966ccde204f952b198eb94cb109ee7cd3343d3d365b55be4fb31e24c38431760 +size 205914352 diff --git a/usgpo/model_config.json b/usgpo/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/usgpo/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/uspto/checkpoint_metadata.json b/uspto/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/uspto/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/uspto/config.yaml b/uspto/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b69307083eef95ee7171d15fa6ce28e23f11287 --- /dev/null +++ b/uspto/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filtereduspto-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filtereduspto-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filtereduspto-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filtereduspto-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filtereduspto-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filtereduspto-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/uspto/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a98e391846b597ebffb32513d7f7f430e8c4fec --- /dev/null +++ b/uspto/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f759ca46b79ed776aa014f00663eeb2dae4a9643b447013c7de95393bc3816aa +size 8388848 diff --git a/uspto/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1215af3ce1369d9d5c6616a00ea8261943600d9e --- /dev/null +++ b/uspto/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3983b8db36926408cccd7c746788b74b7c1c78d8b18f5858b6f8f7bad5f028e2 +size 25166176 diff --git a/uspto/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c4f2e14d70c1cbd5d4153cbc84bf36ce1622dec --- /dev/null +++ b/uspto/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be169345d487eea06668469515d79651376047324db7a9baa6b34d1ae79479a8 +size 4192 diff --git a/uspto/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7cefed94b693e4d161ccc203fa841a9e2a19945 --- /dev/null +++ b/uspto/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b573e7cb682f3b35a9401f7632ffc097596472d971521b1b7dbf365b187de9 +size 33554672 diff --git a/uspto/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50920e1583c9e1515254bec66e7edaff6d38e717 --- /dev/null +++ b/uspto/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e7e333e87c736fe9b187b2edd0f36e167f60302cfc0948f981872205e6380b +size 67109160 diff --git a/uspto/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3caec596481e0bcf5f9e927c3d0feb1e0cfda5a9 --- /dev/null +++ b/uspto/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5864c293da042d95bca5f110b9ea895b14edd432dc0f7ef0ebf32df03e043575 +size 4192 diff --git a/uspto/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7233ca6e06a864653e26a0f842f32d2550ed2f24 --- /dev/null +++ b/uspto/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f78023acb4b2aa5a9829ed2b2f63e9198ba86637a0fb84ba10cdf7c0a626c96 +size 8388848 diff --git a/uspto/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3032361a55c83ccec7fb12f273d6b3c492bd43b4 --- /dev/null +++ b/uspto/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1c63f223eb771dc59dec5ac25c4ab3d2834629bf0bf5284604d388b8d58b17e +size 25166176 diff --git a/uspto/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e3a99f14313434b872f9143a258afce17c56155d --- /dev/null +++ b/uspto/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f4bb9a922c3e30f43e6f9373048d935ad3c10ad10e82b51b41b9fd22dbbca25 +size 4192 diff --git a/uspto/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..caefbdf5d95b40a2d91584922f1c8ef0aebc89a9 --- /dev/null +++ b/uspto/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee2dda42fabbe5e075b97728af5053a590a69466109bb8417d521c01017f279b +size 33554672 diff --git a/uspto/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df495fd0639f3a522db33460607c18553aa841fc --- /dev/null +++ b/uspto/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df60db13ccd291581aab21d3d156e6421f1201d1b2768ff0ee6f2db7f6b8f203 +size 67109160 diff --git a/uspto/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d07d891d6cdbf8e4cdc6f5e683c1b3975c36a81 --- /dev/null +++ b/uspto/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea22da4b9315560e6bf3dbc45b197905b482a99356f98689c688594231af1aec +size 4192 diff --git a/uspto/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..613158db7bfd98bd5f1a88ee947aa71f64339b28 --- /dev/null +++ b/uspto/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92a76a85fc08dbf4c02ff55595085a7a1f9dd056fa819dd592bf83ff03eb327f +size 8388848 diff --git a/uspto/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e12072079b2b5a107f63496fc82c23465b17ec2 --- /dev/null +++ b/uspto/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a8a7985d42bb016d77ef6a1f32250a6ce6a4e1c5138f65ddd8f63832a50119c +size 25166176 diff --git a/uspto/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..495758c337120faaeaf7cd460ddb6369f85dae7e --- /dev/null +++ b/uspto/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ce090d8ebf2c408a9988c8708b0e558820f83a6aa93e70ee0407360c36a8cba +size 4192 diff --git a/uspto/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f54e10badcf6a5a4744312e8751bdc74ee42141a --- /dev/null +++ b/uspto/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a0468bb69a564ad25327dffcfb25a1c53f2ad925d6ae7a4f21cc83a86bdab76 +size 33554672 diff --git a/uspto/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c7066856fb21b9e26cc3566226b74ee32a69af1 --- /dev/null +++ b/uspto/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b58170242c53d3626cec593b09ee5aec707a796310308820bc6f117248c7835f +size 67109160 diff --git a/uspto/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9de45f8a4d4cfda7b3b4b9207e16ff114566f3db --- /dev/null +++ b/uspto/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:263a84808d3c4986a97238cb79f89a58f78ce2eeff2ab70a8c2838dc308989a9 +size 4192 diff --git a/uspto/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..212ea17c17d9936b8bf83f2bc2e609975771ba96 --- /dev/null +++ b/uspto/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9caa14a392f458ea361c4c341af4bc2ca1b6b41fb346b91c1c8891f481552862 +size 8388848 diff --git a/uspto/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..660706a14736420519129700031ba6fe8af5bdf8 --- /dev/null +++ b/uspto/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee8e9d56ca9ed7fbbf58ad9fea8c4296dc0cc6f69cf34733ce3609dc2e50f16e +size 25166176 diff --git a/uspto/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5eaedece0df7f84e3a3609baba4993ec8a80db63 --- /dev/null +++ b/uspto/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff50da48b02ed405757c74e0d4975e691263ded94accf2f368a2e6ddccb8a21 +size 4192 diff --git a/uspto/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..058b81b14bc160fc4f1e9458ef201cfd1afbfa0b --- /dev/null +++ b/uspto/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d3a225acbea4afaf77d98d4edc51c359bb8bb4c099565f96426d33951a55489 +size 33554672 diff --git a/uspto/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57f7a330a7bbe269c21217f3913a38ae6e01587b --- /dev/null +++ b/uspto/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3edd2bb263a90ce27ab0cdec5766f2257ccd713304cc523a724146b102941c27 +size 67109160 diff --git a/uspto/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75a0603be239ed60b68495a343f33e4a8b8b1a41 --- /dev/null +++ b/uspto/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2bb766bfbd5856a9be018aea0d701daacfad8f2d5934ecc29e3b7c84b376078 +size 4192 diff --git a/uspto/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8c38e89a55a69072272be922170b52e5d1a4f01 --- /dev/null +++ b/uspto/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bfcb9b798a7f6e5fbb06364104b090236c7868e3b282f1b3f1390876786e6bb +size 8388848 diff --git a/uspto/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..724abb125c939e1e5d8911edaaa3572b060aee16 --- /dev/null +++ b/uspto/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:144845469ac403e209926f48eba6ac6ddfb3416952816d87564aef6c9a82de5b +size 25166176 diff --git a/uspto/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0512972410b4de2013a887100092fb7a12637dd2 --- /dev/null +++ b/uspto/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0057e2e4f828d37654e7db2bf8e83f6a7ebc33a64fa06e0545c947f9380f965 +size 4192 diff --git a/uspto/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..799fd6838aa17d74c4730ddc1f915622adfc2ad5 --- /dev/null +++ b/uspto/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9771448652a18d3e936fa278266c57c57965b7413e88d66f349bd0638c3b4cba +size 33554672 diff --git a/uspto/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ad137f366fc8b58d473d3866daccf91e3e6a3db --- /dev/null +++ b/uspto/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f68e882d10d25d368c5be3b67705a6d39e0ef3d70dc23bc8f64e272a789ec0 +size 67109160 diff --git a/uspto/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3aafb6b3e644ccbb2d0032bf93fc27b5efc1e65d --- /dev/null +++ b/uspto/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d86c54f575be0aaa59c8648690661f682a4a1208b1320bc0044cf46495eb99c +size 4192 diff --git a/uspto/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2fc94d537cb225c5f3d6eacbed456199aa2a1f18 --- /dev/null +++ b/uspto/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ce67f577c1341d8fce784baa636eab823dc05375b5f41131c226f20811f68e8 +size 8388848 diff --git a/uspto/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..591b7f8bd0704e7fd4aed2724cb7fae261daaa89 --- /dev/null +++ b/uspto/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b82012a97de9b67d628dfb643188b3358bc7c290a87db659150f5c6f87d4705 +size 25166176 diff --git a/uspto/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d6063e48da73ed9a48645128b272eef1bb6cabf --- /dev/null +++ b/uspto/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c4335dcc1f4aca3c1e3aed5366ee566dda00cd89bbf9cff06ea47acf51e7c9f +size 4192 diff --git a/uspto/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b14366b29a187fa115967a07d281a0945a759d87 --- /dev/null +++ b/uspto/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ae5697d0943070bbe2c6ec2eb0f8e288ee6fdab85a377cee247aa68209dce4 +size 33554672 diff --git a/uspto/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4fd32d10cf8979de721df3236af2fdc0d0df0a47 --- /dev/null +++ b/uspto/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0246e772e554f48eac662609fb44428df29a10f49229d37e0bb77084c51c32cf +size 67109160 diff --git a/uspto/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1c7e9aa3ade0a43bef3d8098e98e3ce8cfc00a7 --- /dev/null +++ b/uspto/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85a969c0b509e3982e09072bab4a40bfb035975f77f88d3e0e3102ca087d7dc7 +size 4192 diff --git a/uspto/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7c077ba779f5ec3afddd09c3afc6f8a7ebe726b --- /dev/null +++ b/uspto/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee69b9deb154e6f31c4e2faee5fada7380b020b8508f513e35aa57dd0fc9860a +size 8388848 diff --git a/uspto/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebf2a05ffb8d13abb409a45184aba0b46c176e20 --- /dev/null +++ b/uspto/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db412c2fe4ba2f6ca87910995efa25ab35def471f53e727d2548598e4adf296 +size 25166176 diff --git a/uspto/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b987b951daac3e34ec6cb8894ba1e18bc50d4db6 --- /dev/null +++ b/uspto/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1eafa4b115e0bc119088ce6d259a677295b828c6ef250a3a2ac94a083450acc +size 4192 diff --git a/uspto/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..253f3394e43674ae2f02832788426c5ded06f23e --- /dev/null +++ b/uspto/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff22e410668ea02befd9536033c3713848f98551d3c2b058e529c703c99a2ce +size 33554672 diff --git a/uspto/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b02f7b0cac246189e26e71f4796b1eb8d048ea27 --- /dev/null +++ b/uspto/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cddb582a80a27dec281aa3f6bdca65d3152b26c1fdde8b283e8826dadc9c786 +size 67109160 diff --git a/uspto/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee50d56c3a0d4c319c75e3008bf2756415ab8c9b --- /dev/null +++ b/uspto/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d3acbdb609deb560fc73c078e419370f3993deed8158de73268aecae5996ea +size 4192 diff --git a/uspto/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0aa4046f075459fd0d1fa39c5b509a0d06aa79ba --- /dev/null +++ b/uspto/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaba99cad7f389113fa8630ffdd341be7bba1f1dfeba0c32b640caf9f103691e +size 8388848 diff --git a/uspto/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b35b799b42f788e1be6155d78d4d9f37fd3b925b --- /dev/null +++ b/uspto/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:738f71af9c8457b8f87d3bbd83044f5aa385fdd97421d48eb19ba8dcf9b54969 +size 25166176 diff --git a/uspto/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cce9149a7cad1f6c4e958bc2ce6d650296c2b88 --- /dev/null +++ b/uspto/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8c4136f655b1d2dc510846e189d98efd9f012010d08ef75add93f4c3f2c9faa +size 4192 diff --git a/uspto/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e3578ce9b5b38b99321343cd62ad4c4ff1940494 --- /dev/null +++ b/uspto/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdbd6ef37274e7074ceb4a7958cf8da8ea0f1ae1efcb0ba63020d7cbe31ed4be +size 33554672 diff --git a/uspto/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff280614927c5341f5f441a7b3bc258a444b6a16 --- /dev/null +++ b/uspto/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb2b1b092bc704dd7a4b809928b5f594287f36664a0f0f8d414196ef9227fbc +size 67109160 diff --git a/uspto/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b99b3a66baa50db400f4f60f1acf3bde8b9d5db6 --- /dev/null +++ b/uspto/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f82df0707dfdc0c3e442469a578efcf33cedfe7ab568b2eed475a58a9da00cb +size 4192 diff --git a/uspto/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6a8b2c26d053fdaad0fda1c83c0f272c683417f --- /dev/null +++ b/uspto/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffdf49e338cb73cccbaffc4b0eb713e4d96bd45fee5e441680ec14ffe0b4134f +size 8388848 diff --git a/uspto/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5873f64c2a7083e03fa565d0811f49be6e1cce7c --- /dev/null +++ b/uspto/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64b50f300bc717a670a59e1435b9c44caa00f12ae82d3505db0a1b17dd4945a9 +size 25166176 diff --git a/uspto/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9f278d5dc4be6e86ad6b5e55b69ca9cdfd6a3c8 --- /dev/null +++ b/uspto/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b655e0b766cd37e9ee1bb39b4ceaa3213605ca0fdf9b60a428334863373e703 +size 4192 diff --git a/uspto/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f4717f870460b3158a248aeed9542ffb56c0923 --- /dev/null +++ b/uspto/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e605ec549b70e685a5050c443915007d76193c7b6733520615cc24882dae1a7 +size 33554672 diff --git a/uspto/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc496039c88e0c7165069a6697f2865f8b811fd3 --- /dev/null +++ b/uspto/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32812e132799ce6568639e8ce8d703cc3bc32a93226b5cb5927d68fdae4180ac +size 67109160 diff --git a/uspto/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1300d6b9b9d260e37dd31784a3d7731f81c5a11 --- /dev/null +++ b/uspto/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6090f995b1f76c72f0d6bbd7c161d2adbdd146a82e3e3fb543e2dbe40d038a0 +size 4192 diff --git a/uspto/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f031daa8124ec891fcca61460a645b128bcc7f37 --- /dev/null +++ b/uspto/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81a47159d436fb67a49b0c420c7a53b50c159df9d96fcdf224ce2dc4c206e855 +size 8388848 diff --git a/uspto/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b2290bb7d1d9d9d8aee7a38ba04eadc1ecea001 --- /dev/null +++ b/uspto/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8034632457c653dd9dd28003716b9e21e423c43a815de44e3bc573d8fcc6f0e +size 25166176 diff --git a/uspto/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f2aa63ea74cda5eb2e41d1ee99a1a4e02f6a348 --- /dev/null +++ b/uspto/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d038c57820738e850e97f0b04c54d8026b71513628d2c1289b31fa2775b6b6 +size 4192 diff --git a/uspto/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae481cb9c0bb7e481857184bcefac96278fd5eed --- /dev/null +++ b/uspto/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb19a2821d274fddc4e947a8e80447802f541cb09fb487743aa17b1d00747a58 +size 33554672 diff --git a/uspto/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82fbca1fc8fee682abcf226283fede04914cd4df --- /dev/null +++ b/uspto/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:361f944f0e7c4be26632c8752b39f6cf6e543989308d85271f3472a289cc145f +size 67109160 diff --git a/uspto/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0df920920cc597ea161041612d5fd47b291743fa --- /dev/null +++ b/uspto/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12515a27ac11b309ac014cd2e9767911286fe447f6478e91fc253a1e512ae6c4 +size 4192 diff --git a/uspto/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba5c18dae28877f2cd3401ed22d668bb182436fd --- /dev/null +++ b/uspto/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09648b4faf959202649fab7be2b5d70025c5011be1d05e93c2e8b4b2c5181e75 +size 8388848 diff --git a/uspto/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db8b627282f9a14999cdd985a76507bffc875857 --- /dev/null +++ b/uspto/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b943dbf2d6cbce75fc8efc0853961e9b55a48020703c91699a753366403cf17 +size 25166176 diff --git a/uspto/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a365562f50aa57f116ba0e854cc4470c0369b1c6 --- /dev/null +++ b/uspto/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2da50af4e1ed9247a1b45841981caea98960b7d7bc87a8ed08712ff9628bb4b0 +size 4192 diff --git a/uspto/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0300bee8396f6cae397ff0b2caaddac9fb2c1e6 --- /dev/null +++ b/uspto/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b125f009b3ac34e6d20d78ef3072bee411d6694d84380341fde4bf0429df1bb +size 33554672 diff --git a/uspto/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32b81368df7265c2529ceb285cd9ade52e3c0f34 --- /dev/null +++ b/uspto/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99edcb5878dbb7714cfb6706aba253eba39badc35d64a052bc373d845d16e67c +size 67109160 diff --git a/uspto/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90949a07ef747c26da0efdf4b66248ca420d20fe --- /dev/null +++ b/uspto/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59833d9e1885b423abdb49b915c35ea7c2420c64e7414cdd65a7b13085b68c2f +size 4192 diff --git a/uspto/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33b531d45f0376d79c24f5fc0b8349f839d169af --- /dev/null +++ b/uspto/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6226f9095dd18e89fe95b898597ff45e49101610c75674019a85273615509fd8 +size 8388848 diff --git a/uspto/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f970d1b65f0341c48f32864fa568b6db31ff586 --- /dev/null +++ b/uspto/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10aba3b287383334af3c2c8e56edb59c2b72aec8307ebce2a1f9823da7c429bb +size 25166176 diff --git a/uspto/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f29cdae8b83a7a271bcad0dcc789ae19d8acbf1c --- /dev/null +++ b/uspto/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d5ec0effcdba8cc64cde209fcc17db67e914cc6699f847fe5d06e5cfdab637 +size 4192 diff --git a/uspto/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60026f8b708f4ab130c848cf52b5dbccd0f3a06d --- /dev/null +++ b/uspto/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e65f72591c15ebd6029ca4a482f120b798c683fa6626a5696d4c50c1aaa11fb +size 33554672 diff --git a/uspto/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90d2e928922c6e5fd7f4f1111ca5676b998dc12a --- /dev/null +++ b/uspto/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c52a07e599adffa469502868ec507d5cd29cab0351d61df3167122d797a1a8e +size 67109160 diff --git a/uspto/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35818060d307fcac4345c687b811d34f3b373207 --- /dev/null +++ b/uspto/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bec6add1cd765cb1f0200be5deeb580120d9381a9c2a998b111ad1b31e7a14f2 +size 4192 diff --git a/uspto/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d237a3cb8cfa42c9c79aeb4e516bb3596bd6f7e5 --- /dev/null +++ b/uspto/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c8cdd360aef6c80a86b77b161acb317e914ab20421c7f19296d2c1cc1c87b06 +size 8388848 diff --git a/uspto/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de0fd16ec40ee04693befb813d1ef1b1be3bc794 --- /dev/null +++ b/uspto/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1488c66e306a37689f897cee73f387f49b38d2a6002aa2b5aad2f2e10be40e +size 25166176 diff --git a/uspto/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ef4e4a9c6a420bc4a3465747bce0170c454059a --- /dev/null +++ b/uspto/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:003bd89b8b754349cb5bb95f0d53724109c8c4d7b10f311fdd3546e2b032f44b +size 4192 diff --git a/uspto/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc182f149dce08fde5683ae0e168184f74d127af --- /dev/null +++ b/uspto/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:964d25fd65dd69fdd499d6aa87a775349b87abe3de17702be03074e25c8a4c25 +size 33554672 diff --git a/uspto/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2163395507a021cd1b28b3047bd2cfe686eb72c --- /dev/null +++ b/uspto/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387387923a0e2e597c64522bd9c0c5fa64895b1be4e99e53656c647d34cea99d +size 67109160 diff --git a/uspto/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56b8c4b53d1d228d968c3d6cd6053c049c8e74c1 --- /dev/null +++ b/uspto/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cbf52a36c2e26e819b192c7122138673e677084449afeb88f0720be94c53ee0 +size 4192 diff --git a/uspto/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1aa0e187799a480c85a6efef68a80d326559e2bd --- /dev/null +++ b/uspto/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50deca90c1a5093f4fb6cc994243af9fa26a04ab7559fac3c59fe6c320bb420a +size 8388848 diff --git a/uspto/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0309e5765979a9ee719c92811d8cc5b55f0ecb63 --- /dev/null +++ b/uspto/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dbd9a078166bc0eadafff0bd6700ffcc8a1c42caa5a815abf8bb68ab3c7cbbd +size 25166176 diff --git a/uspto/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e99539d5eb1758ada4a519296bb654f34a0d89e8 --- /dev/null +++ b/uspto/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c65886212d4d2eb539bd29f610bea60a812295e5c750ac00187ce7701d0d01ab +size 4192 diff --git a/uspto/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cef62fc376ffebfad16b412401e3ec2c27af1417 --- /dev/null +++ b/uspto/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d6d7dd59f495d0eee5205325b4caf212d4bf5b605ab1e0559521a6f1ae63545 +size 33554672 diff --git a/uspto/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8dae8d39ffc9f869fd13c9717570c78f434e3f52 --- /dev/null +++ b/uspto/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:688340348e623567c58a0a3dd5eb1d4e057d772538835a66721981d0babc5333 +size 67109160 diff --git a/uspto/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6baf31633c32099501f7be154ef3503c20a18500 --- /dev/null +++ b/uspto/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89ef8f5ed9d665f01e9679068776ce90adf9a9eb948bfc2072c21ef45d654a24 +size 4192 diff --git a/uspto/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d48528c4d980f656ac2ac87fad5a562b768dec4f --- /dev/null +++ b/uspto/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab523cf446d56787374ef5f9d93267651c74959f1147d834ed3150d9e6478e63 +size 8388848 diff --git a/uspto/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9deae811ca100e27036532f9d1e462ecb3706e1e --- /dev/null +++ b/uspto/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e900071804306fbeef419d55064f39df970b5f002b7e3595360ef31dfa971cae +size 25166176 diff --git a/uspto/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01c8b63e3b0c27dcf4249a5d4b15bf9bfa14aa20 --- /dev/null +++ b/uspto/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a46f136aa17395489291ec8477ffee672f069b211c55baf304ee48814a511d +size 4192 diff --git a/uspto/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..044ed4245d4dbaab2ddfa03198ccc81ff7a5a4a0 --- /dev/null +++ b/uspto/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473f3b024ebc6bd51460eef064a36b9721242ab7baefedfac40971959666d49f +size 33554672 diff --git a/uspto/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63ba4be3fc64af231cb5dc512239d08aa4b46275 --- /dev/null +++ b/uspto/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ecd8196792002516de3600af8fd4414a8dc80c89ad9c807712739ee220d1e7c +size 67109160 diff --git a/uspto/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..064ea5607d58021dabd6c584ed01ff8cc972c5f4 --- /dev/null +++ b/uspto/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fa9dc90fc46ba82b3637f8965606fde89aab218f9a7c2dae6b2edec32d0b96a +size 4192 diff --git a/uspto/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7634568758907c53b0e04a75ccae10360b3b057d --- /dev/null +++ b/uspto/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:681cd0b720cecd529f5735e0a996770d874318d186c1ffa661263922b995a066 +size 8388848 diff --git a/uspto/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..651f748e4a1f79f34a87fe1baa2b258a89261585 --- /dev/null +++ b/uspto/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1e3ea97ebecf75f1e983420e28eba221e0a4096123a14eba57a813dd8a9ba84 +size 25166176 diff --git a/uspto/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0247915ccebd0a91e107508a3d918f2261e7947 --- /dev/null +++ b/uspto/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e4a2d6b967aefe943b6d85a6e4e2e206e52d6008def06443e4f226793ae7ceb +size 4192 diff --git a/uspto/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..185420a36dcbd2f5e99bc798469a7bf2b6fa00c2 --- /dev/null +++ b/uspto/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d4d44ec1958a30edca18e1b1b1647b3761d221e5d86c171af1a808b21e8cd1 +size 33554672 diff --git a/uspto/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7430a0bfe264a00a04ae9cdcd7c01bd830761765 --- /dev/null +++ b/uspto/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:442f9cac07d33a95ddeec74d8961966933a8b198e903e78f08f5f5814b82b0af +size 67109160 diff --git a/uspto/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37435f3457d0ecb3f8ea4620e27d53fcabcdb528 --- /dev/null +++ b/uspto/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44adad5047702dc94d8d5a798a32b46aeeb4d0bd8d181b6852866dde67d73755 +size 4192 diff --git a/uspto/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cdd50718e0736c022eead08f9f890da6df19ba6 --- /dev/null +++ b/uspto/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7af678a2d5a84eff30f90c819b0b1b81fe27fb6910cd1550d768777eacadd8 +size 8388848 diff --git a/uspto/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..baf3a51f2b3016ba914e8bb5d3d127f251a8d183 --- /dev/null +++ b/uspto/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa600c1b0d4206c43669b8540cd85349ecd54981443ceb4e86331abc34afbdfd +size 25166176 diff --git a/uspto/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5b0edc9143960edbf9fc4e7791d6153bdb5e8b0 --- /dev/null +++ b/uspto/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4acb1ad0fb622b9f062caec125ae21add4d4fb559d7f70f8bde191105509f05e +size 4192 diff --git a/uspto/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a07eb8d42d5a5e26cc5ca1e1955024f80915006 --- /dev/null +++ b/uspto/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbebf7483ea4226bd457f0c49e06bd6729de4cb8ff6e1320d2db3744d26e029c +size 33554672 diff --git a/uspto/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..868ee9dee0f0a6bcb4c9edb60ea2389cdaa88163 --- /dev/null +++ b/uspto/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a793b7ce66a3a4922ae01e44f0b31ca8171a711b0accbb72ea9ba54d5e8cc127 +size 67109160 diff --git a/uspto/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8eeb95d5fb2aa83b39c7b8ed19585bf16198f839 --- /dev/null +++ b/uspto/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d039209828c3bb968c88ec87afcab0ae62fe42f4fd1be456aba2263e5a3049 +size 4192 diff --git a/uspto/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42c6ab4b0b53ecad4323fe88e521bc275f10ab27 --- /dev/null +++ b/uspto/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4a9d8d1fd45a6fe6b1ae43fff5c7f423c856bb8a0707f6a575d0dab48742e90 +size 8388848 diff --git a/uspto/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05e1eb22e6cb8ab2d883eac25dc19de4f84d856a --- /dev/null +++ b/uspto/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26564b8865402db6adc6a98b0fed27c653254746f4a21fba9620dfbcca7b3c19 +size 25166176 diff --git a/uspto/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b9803e747768f6e17dcfe5345703435950cf827 --- /dev/null +++ b/uspto/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc9b5a981240e8372409252bf426cd927b18ebf0990da4fc8f83c38cb765e4c +size 4192 diff --git a/uspto/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f531ff0e1a3bc5daf10e59b5c8113be65bea4a9b --- /dev/null +++ b/uspto/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaea0ea069020f43f1c39ab17e5ef153f4bb4bf46eccac7feadbb2e40c3e1015 +size 33554672 diff --git a/uspto/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0e063ecd2a40a5a30eb493a95cd5443dcc2a474 --- /dev/null +++ b/uspto/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aae2aac6aaa706a3d93dfcc8c11b1ad9ad89376f577ce3cea547fdeb474d6ed +size 67109160 diff --git a/uspto/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..113a6650a7a2a158689fc8d05494688ba6d9591a --- /dev/null +++ b/uspto/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:444f7fbd0985f8ae5c16e1eda840b5f04a1b644e7babfbbebfbf438eb6e530cd +size 4192 diff --git a/uspto/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41e6c035398fffed91a2673855a0130282f1e47e --- /dev/null +++ b/uspto/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d12e413db0e0ac1634f9c3840be7cee6c06fc60edcb4158255910d50d026fd54 +size 8388848 diff --git a/uspto/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8445993acee7636ee91e1e9f0c77b4871bd073d5 --- /dev/null +++ b/uspto/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa684cf648e7f9a86e6ec972cd42ffbd8758a7153c23f5b9e142755c4ae81f7 +size 25166176 diff --git a/uspto/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5f9be5075ed27389c02c165d0e4c5db4a2cde56 --- /dev/null +++ b/uspto/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9a3697062ae0945a7b985ebe2ec994dfdde11ed9db98becf2105acd738ae1a1 +size 4192 diff --git a/uspto/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2753aaf9c83a2a5f402f09f00be1d54f9b26e5b3 --- /dev/null +++ b/uspto/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dadc7fbafa293b3c0c6f24b391769dc3449b0ac6342cd2faf2422a617a0e374 +size 33554672 diff --git a/uspto/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c89b40a10b82228c826533dc21fd0526b611fec --- /dev/null +++ b/uspto/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4cec439a58f6fe7c288245d02fbe89f0bbc1c2ea4663f5b4d0d4fa3c261ec59 +size 67109160 diff --git a/uspto/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4ab0a6709bd7179cc197513e0126004801ae3a6 --- /dev/null +++ b/uspto/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a57b9d580e947dae8c7ed5d4ae6377d0fcb2170ac08ff0b10c0c7582930771a0 +size 4192 diff --git a/uspto/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..563e8275340db0da4c1cfed1d56762465d342ee5 --- /dev/null +++ b/uspto/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b299e064db04ce9fe58f81d4f3db815f59e97c9491c6b941a6f572712afc36b5 +size 8388848 diff --git a/uspto/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99051e32023c2453046667faf45501cebab166de --- /dev/null +++ b/uspto/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b169f4b10d3459f2024958efbaa514b41839dc0ad12e45fe2bdcfb325e50076f +size 25166176 diff --git a/uspto/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37ab9c5502e62e0060f74e68ab2ca4b939f1027e --- /dev/null +++ b/uspto/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36f5fc6b10459a7a0ddcbbde9bfb28a31e7615d0b9fa82b9d40ffbd9228eef7a +size 4192 diff --git a/uspto/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6f75ebe75898e243fd5ffed6b45d12bf812a6ef --- /dev/null +++ b/uspto/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9a57881758c82475ce7ce09d043025e5e06b0711f5c52f4ca99030df5a06aa0 +size 33554672 diff --git a/uspto/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f7ca9a15e5222a9e3a240c8a6a2d25d6d23d532 --- /dev/null +++ b/uspto/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5d7580637678ce221d20053ee0f5a1828c61fc1a2b5d9a72d88b7fc471d9ef1 +size 67109160 diff --git a/uspto/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca9aed5564c01266b5faa46ffdbb90b899692414 --- /dev/null +++ b/uspto/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dba3efbf187ef00048b969c7e1f5963b384eaac5a7ac6a595d35e29ee25c422 +size 4192 diff --git a/uspto/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..caf3d77b91f35f8f340f673f513bc4c1cba30d8c --- /dev/null +++ b/uspto/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d5ac4083f19cbb67d2ca82367ad7360e9283eb4c015d09cac770f667394f2d +size 8388848 diff --git a/uspto/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..601624ba240a5e8d6d34fb3e175d8eaa1c0553a6 --- /dev/null +++ b/uspto/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f2275922d57e4a9e00fa66371c0af5f5458df5fcdf986e02f6618a67c8f2afc +size 25166176 diff --git a/uspto/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f324e0c5a172a69eccf4a9e9164e4df4f6d8fe71 --- /dev/null +++ b/uspto/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e875274f2fc015f3a17ee38a12712adb028530628ae6fd2d6c7587c4c9c69901 +size 4192 diff --git a/uspto/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9d929d7cde9c643ec7b2622b030f8e07d43c918 --- /dev/null +++ b/uspto/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9622a29ca3d02e6ef31d0e19dfa55e389e595094d287abc3ed82eea4f050bed6 +size 33554672 diff --git a/uspto/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd14e60610ace21933070152a0373928cc292e0d --- /dev/null +++ b/uspto/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3060591dc81fc106a5de6c373a3eaac4bb365b52c8690903c8e8a7e877c61fd6 +size 67109160 diff --git a/uspto/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f5db1358623ec584e106dc2a341353d58bcd409 --- /dev/null +++ b/uspto/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe21eddb8f2c8475f7dac3c0d7d46b77473411639a965cf929727791260dfddf +size 4192 diff --git a/uspto/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27315119a5c21c632c48dd8b5d946f893d9c3568 --- /dev/null +++ b/uspto/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c661c5112a5551e689cca0866085e0fa920a3685fdd4450367b55483d1b4d6 +size 8388848 diff --git a/uspto/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66261f97752de65da1e569c40ad3a45f1cd7fac7 --- /dev/null +++ b/uspto/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9174edcdbb3f4bb6d7753019763cc6ffba07d451ee892a75b97335b60d66afe1 +size 25166176 diff --git a/uspto/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2fdbcca65b085ba637c3ac07891cf06b06fe0886 --- /dev/null +++ b/uspto/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce095291998431a6ab4e9a9b03d248f403b2401086765bbbec716a8ee5411678 +size 4192 diff --git a/uspto/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea829d30f763e81ffb5dec78acb340796982bd3a --- /dev/null +++ b/uspto/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70c897982709f81e6e93d3ab606b17cf42d6703cec7efe38a19f781fb4f1d9b8 +size 33554672 diff --git a/uspto/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d27fdafaef18407e21d014ceb0a78955c2324a7 --- /dev/null +++ b/uspto/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9a4e1f9214abf2f510cd9b3b042c336a0fe0efefd90bb6d2f22d0b41f52c209 +size 67109160 diff --git a/uspto/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..700c9703cf29a030c37804e1d518b7d3d5c6ce67 --- /dev/null +++ b/uspto/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85bcf4a2346f49330990058ef3b5ee96746e635202d997f3ccc36b71eb523b31 +size 4192 diff --git a/uspto/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a5db5d067bfae74c04ec9b0e2a9cb17939e6c5a --- /dev/null +++ b/uspto/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc11190d1e61979e679c842dfd8102eecc022651b5bd73c9658d60d3fa343ed3 +size 8388848 diff --git a/uspto/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6291797238ddf13382c743f2fccfcc4e233b1ca1 --- /dev/null +++ b/uspto/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:795874fb355146746ad1ce90f367508f77630293f4a63b2474d10e428487d5bd +size 25166176 diff --git a/uspto/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78cd5396ae0a1dc6327e6f965f207cfbb25ebb65 --- /dev/null +++ b/uspto/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cd2cd797498ee8e718c9399a0e010fac3ac4b5880d92947ed2b6a419992088d +size 4192 diff --git a/uspto/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c985609d9c3b251e0e0c9ca1a553e0a87d3533e --- /dev/null +++ b/uspto/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db8814cb2efdd583391846968c7be8a98bcd3efcab20aede7958f6450580e6ec +size 33554672 diff --git a/uspto/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0728ac65a56e7f5ce797601b99d365bcf62d9d9d --- /dev/null +++ b/uspto/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e934cbee89ae7ba7089631e77fb76efe05aedf750f822ca5437e79e5a95a9cbc +size 67109160 diff --git a/uspto/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a13591ddc0705011a8e14a37c2047f8ed74b780 --- /dev/null +++ b/uspto/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e71735d417664d5b3f44da8864896b3bf03178938b93a8ce65399b9108d3181d +size 4192 diff --git a/uspto/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..906deb30a1b1db7484c002f19ca1f158bef8b213 --- /dev/null +++ b/uspto/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4483b3cd78e54e8bb7315a31783c9f13e8a314322f02645d8afea30d03a24c91 +size 8388848 diff --git a/uspto/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..935d2d524dda15f60d92759ba600fc2141358ede --- /dev/null +++ b/uspto/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ee412381eff513619bfb3f5e4d9e1bf71c0d3937f0a680a8ddfb92478b740c +size 25166176 diff --git a/uspto/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/uspto/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1be8756fba6c4f484565d6fffcbd68a0adf1a24 --- /dev/null +++ b/uspto/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:023226a77631180749a5b79421693bf0c3f5c55343e20e08b0fa1a31359e2593 +size 4192 diff --git a/uspto/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..151a40626c8778a045d4e9c01c52a2e5ef15b8da --- /dev/null +++ b/uspto/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09d67777d90851983632297d4c3123059e76bab828e5351681310bcbe2e6dc5e +size 33554672 diff --git a/uspto/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..72337351001209ac7e034e90976de54a19fc63c5 --- /dev/null +++ b/uspto/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df91a73a1972f4dc6496e5170abba3f20d23bf94db07aecd2077b327d242d90 +size 67109160 diff --git a/uspto/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/uspto/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80880650eeb80cecde537060fda7793f0f6eeca4 --- /dev/null +++ b/uspto/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95beab2ca346d26d24b196cd3b5611b5b84fb886a9f7286a49d1576581a2c976 +size 4192 diff --git a/uspto/model/final_layer_norm/pp_block/model_weight.safetensors b/uspto/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..899be08613dac7535001061f9a1f2874cbe230ef --- /dev/null +++ b/uspto/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:332f020110bc4c02d4ce912d37d10e472c71326d6982edff7caec1b0d47444fe +size 4192 diff --git a/uspto/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/uspto/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..212560dcfe6fd9507ed10ac0cefe08e1c3c385bb --- /dev/null +++ b/uspto/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843abc1935e05036e2cdc5f2af8bf33caf0edb5bda9e46270a7c1c3a48aeb539 +size 205914352 diff --git a/uspto/model_config.json b/uspto/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/uspto/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/wikimedia/checkpoint_metadata.json b/wikimedia/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/wikimedia/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/wikimedia/config.yaml b/wikimedia/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..498b10a5c6beb82179fbc74810b872ad95f413ce --- /dev/null +++ b/wikimedia/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredwikimedia-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredwikimedia-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredwikimedia-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredwikimedia-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredwikimedia-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredwikimedia-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/wikimedia/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e3a05c1b606314fc33bb687cf20c1c2ef695e56 --- /dev/null +++ b/wikimedia/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:510533b7bd0e321881ad822e24b4063619d94f6b2c70d367ee08390eeefd3b0d +size 8388848 diff --git a/wikimedia/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aaecfbd831d55cf61b4fa79b8576860fadf2d4af --- /dev/null +++ b/wikimedia/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fe39f5dbe98a2d3047d28969027636c1cbefcef7fceb5532bd3a7176293f16d +size 25166176 diff --git a/wikimedia/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2309181cbc561439c7f52d30be03765bca53727d --- /dev/null +++ b/wikimedia/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ecc6f13f9226b0c7eb443f21a5cfefe7ad5163a9c1e37f3edb90566e1f3b0ed +size 4192 diff --git a/wikimedia/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dec71576dfb31dcf4bdfc3a626c73ec5c7df48d8 --- /dev/null +++ b/wikimedia/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e2545bab653f0933230c69932fa9f3857097f6a35e7649e38a38a0340addcaa +size 33554672 diff --git a/wikimedia/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14096c103c5094d291e328a2c64bd47a84bd2a5d --- /dev/null +++ b/wikimedia/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d50e288ebe9fe8a4c6c16c57490cf09c042600b91025b0a818c574f476da2960 +size 67109160 diff --git a/wikimedia/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a89163c4d171b5bcedf1d322f9c7f8fe1d0753a --- /dev/null +++ b/wikimedia/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:368e1fe5cdfb7b7bc0d955f462e77ee462bc33260a42f6034c27ff2887c91096 +size 4192 diff --git a/wikimedia/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55248e0d84383d5a6c5da7a82cbfce2f532340e1 --- /dev/null +++ b/wikimedia/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d70d29276ec28d1cdfaca3ff4698e2a6c6be3865ffa89052ef5a3f75561a8fc8 +size 8388848 diff --git a/wikimedia/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4cdbffa89112b1abba9eeb6e6b44361e63d4f265 --- /dev/null +++ b/wikimedia/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83251fb5298343a404dc29e1df654fd66a1e7e292da32d3068f83226c2ca02fd +size 25166176 diff --git a/wikimedia/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9770baeb7084902bf4206184f2ff03516bbcf30 --- /dev/null +++ b/wikimedia/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea1724af1225c062ee99c335e5f3ab83751b6784c7893786c794fe16d68d215 +size 4192 diff --git a/wikimedia/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64279fb76efc17da9519b6c383299d137178a5ab --- /dev/null +++ b/wikimedia/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0377f092be62768e1216afab9b36c7bdcedd9addc13951c8325d01f1f0c47b5d +size 33554672 diff --git a/wikimedia/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6bc8500e62293794c334364112a57a38249db466 --- /dev/null +++ b/wikimedia/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbeab350f9c1153014111b0a47e8a63b1cb5b5b201c05ec8d011d49ca9b7dc38 +size 67109160 diff --git a/wikimedia/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28193655015f1bc0d171002bddbd499cda6060a5 --- /dev/null +++ b/wikimedia/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:614d4d86d47ccf74ab2a4439d72235e7af44fe9f5164d55e3fee16f97743795f +size 4192 diff --git a/wikimedia/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15ca50f3252de5ce45df726a75f4b0b4667be6ed --- /dev/null +++ b/wikimedia/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d06bf8ba3b65811dd73d97c676fc409bf24b1d447df373bbcd0614dd37f1924e +size 8388848 diff --git a/wikimedia/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cce6d01425919e7006fa3bbc57beb536c4b83377 --- /dev/null +++ b/wikimedia/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad4059bba7f1cb8ed71f243dbfb9a4c461b9fc1c0f64bae0ad5667bf4d39182 +size 25166176 diff --git a/wikimedia/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..404217dc42ce5a1a471fdca45646afed0d610f7f --- /dev/null +++ b/wikimedia/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1745076a3254881a6528437f69422a4972e77340b35aedbc5192cab0e4dc2c54 +size 4192 diff --git a/wikimedia/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fecde48f836174e6985ee28ca48df106f9253b54 --- /dev/null +++ b/wikimedia/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3178c0b1c8cd47e7bceadf2411addeb242017d33e9cd7a863d72508f3faf9280 +size 33554672 diff --git a/wikimedia/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cf5aab9553016c7372b5810bf9409da7c95b386 --- /dev/null +++ b/wikimedia/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:977c2529ad933bc8a5f00a5e564d182880e56e967e9ded94eee0ea03a814a953 +size 67109160 diff --git a/wikimedia/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..595074664febda98ebf309701006c8e784c1ba27 --- /dev/null +++ b/wikimedia/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da001c1749f9100bf39c98d5e4392de1d28411c6187fd2f0e017f7bda0d51b6c +size 4192 diff --git a/wikimedia/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b01a5165a808e8264a2f5734f2d11a19c17ff586 --- /dev/null +++ b/wikimedia/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7620844f22f5ebafede1e3ac1899496dc9202dcdf801983044a58ee343386dc2 +size 8388848 diff --git a/wikimedia/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3af4927ed5580c0fda63965eabb9a31b678c491 --- /dev/null +++ b/wikimedia/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5df7cf3d5767433d59bea8e9c094268e1bd98a21a1edc4b85f28369003bfed0 +size 25166176 diff --git a/wikimedia/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fee2c4c6e1a8ecf6173c477fe00c5768a10898e7 --- /dev/null +++ b/wikimedia/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e2638a9c941c48754f212f121c4dae18c6b021b61fc78a378be5ff4077b2044 +size 4192 diff --git a/wikimedia/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed755ab07d506e65bfe68e030a0de8816e04e06e --- /dev/null +++ b/wikimedia/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a199e25214976c3a17aa6e016efe544a8f1b5c41cdb2dce6411399fee4fcbd4c +size 33554672 diff --git a/wikimedia/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0608e1ec312cdc79844b6a81414936122f5cb26b --- /dev/null +++ b/wikimedia/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b21f8d43ca50ee2a3c36cfce843936dcb1291a344c1071c6f5839b9c07fffdf +size 67109160 diff --git a/wikimedia/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f0a612918d916296eb5fa0946ca37e0d924c019 --- /dev/null +++ b/wikimedia/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44889d9bbae2c681c209e07ec9054072879c1e334d8f4130d5100f90e3b48ecc +size 4192 diff --git a/wikimedia/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14895e0d595952fecc6c5baceb8954527690c770 --- /dev/null +++ b/wikimedia/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4065308de5393f935c448aff9601ad12f2f94e70aeb02bbe68c27873cadef3ff +size 8388848 diff --git a/wikimedia/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c53efcb251857f624ecaf1a18080a5cd93a0923 --- /dev/null +++ b/wikimedia/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b84a0fa4da0f575144bff72fcdc37a2c3f8a1865cdf376eb21cd5e5a8ad619 +size 25166176 diff --git a/wikimedia/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d312245cbdcfdaa3ee15e61263c20a4b282f479a --- /dev/null +++ b/wikimedia/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1c81a9b98a6dd15ec1afbe03b797bd565c8705890d4e929938198f65831f565 +size 4192 diff --git a/wikimedia/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c556bdc2195cd1f7e9729394e688211f7771b999 --- /dev/null +++ b/wikimedia/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:156903361867b61492ae900f52a9224efba9b9fe5c777adb9cb280c1df85c2fb +size 33554672 diff --git a/wikimedia/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61f5c54be68baa48a289759bb60a31f404f02ef8 --- /dev/null +++ b/wikimedia/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:948c940e79dfd128e4070d06ffd39c9c025b6180fdd3231f3aefbc37ea7e7b24 +size 67109160 diff --git a/wikimedia/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e2974a67b94123a7dda00884c5410bff5979ab5 --- /dev/null +++ b/wikimedia/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26e20e1d45a25ae78b26406e34d3e96bb6a3f4dca834e2aaa41118106074275e +size 4192 diff --git a/wikimedia/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4d471608e2404923fc847ed9c5d1e5b38c6ab7a --- /dev/null +++ b/wikimedia/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:617c1ecf79275d9e65ff9fb0c053f4e0bee7368a95e80724af411a4cf2b0e1e5 +size 8388848 diff --git a/wikimedia/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b036b257baf5a5a85704bee5bc74e167a94252af --- /dev/null +++ b/wikimedia/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a3b020b6d51d6d71ffb426dd87b629593646cb4f9a02ec787d621e8eb1b6e52 +size 25166176 diff --git a/wikimedia/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88fbde94831dba15184de6d55db93e642bc4e13b --- /dev/null +++ b/wikimedia/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:039d1a301d9de97c655d1bc92180b289fe1381feb645f30c38b5716319248627 +size 4192 diff --git a/wikimedia/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2caf6261f4851fdb5300993f36be3fa5ff34d572 --- /dev/null +++ b/wikimedia/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a4d4323f8f186c365ce13097c258aa03173366e3b5ebf1a4fca34e60fd7b3e +size 33554672 diff --git a/wikimedia/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..752a4847fad31f3e9301f921eaec180cf50d97f4 --- /dev/null +++ b/wikimedia/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0af5696262a50566fccb515284403cb00a8ab59d4b9d53aa28a3bb118ef73a +size 67109160 diff --git a/wikimedia/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..081b576f557dc4b5b6918261cfd1f6ac04b9a3f3 --- /dev/null +++ b/wikimedia/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d791dab3155589ae8d7bd1997a96622962eb022f72684a2bb92950d3f865f4ce +size 4192 diff --git a/wikimedia/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f53239450c9773cfa2a08ef4c139aa2bbcca208d --- /dev/null +++ b/wikimedia/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d67006c64d594d22aa34ec6c7959803fbc82e10d8ecb45a33ae5f25d080507 +size 8388848 diff --git a/wikimedia/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eeeff1e8fec5e32608dbc71b195074cd53292c43 --- /dev/null +++ b/wikimedia/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba4a9f79a5620dd4f39138c7c1a4c3dd29ef4ef1740fd861ed64827735175aed +size 25166176 diff --git a/wikimedia/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a90896715f23240072aa9b1f372546be11729abc --- /dev/null +++ b/wikimedia/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a66997f44bed93823aa0bbbbfc51f55afca18f008125789836f54017a2adae6e +size 4192 diff --git a/wikimedia/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5191165129067c17c82b6edc691d969312dcab5a --- /dev/null +++ b/wikimedia/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d8a63605d50985223213f9921cb3e79721a121e934542e847b005281ac594d2 +size 33554672 diff --git a/wikimedia/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88fd6999ad376a7c57e7caa7585eb89767d10538 --- /dev/null +++ b/wikimedia/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a57bb533bf63a380235a1da1e9e2d3a6558a95545d57ce8cfff43403ed04a898 +size 67109160 diff --git a/wikimedia/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8618bd1a917eab4b7e5e97856bdc9993c2a4ae4 --- /dev/null +++ b/wikimedia/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b61cde81b1abbf373fde67bfcb09765a14370b159ebcbecd4092ff2ae57a7f4 +size 4192 diff --git a/wikimedia/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89abaaa62014c3a28120b4362ce707185f14cf51 --- /dev/null +++ b/wikimedia/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d77fbf502b6ecdf287a1feee92f3ed9309d01d4e13c3f404192ddb68200869 +size 8388848 diff --git a/wikimedia/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88fc815122aed50eb7bca482cd4c18a96279b48c --- /dev/null +++ b/wikimedia/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96142af8ad9be20beb9a5e8145339cf432fa581056b0517e3c02eba755853a58 +size 25166176 diff --git a/wikimedia/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd780db7881345b97a49c6e22d1ea884fc50da53 --- /dev/null +++ b/wikimedia/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61708d0d6cb9007115e7c55bb33b2efd576df15914a3086ef68553edb6aaa41a +size 4192 diff --git a/wikimedia/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9144749cbe94001c96469d74f75172f1ed9b80c6 --- /dev/null +++ b/wikimedia/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42ade6dcd9f1d26c0dc2e861aa330b6079beb134ba17e112e60d1f4863fab7dd +size 33554672 diff --git a/wikimedia/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc7d3e253f9525d9196016c88b9a91cc1b128a6c --- /dev/null +++ b/wikimedia/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b64e3df1e7d0bff007ee09c7efd51995ac31fd1c498209e1ded0d9a439db71dc +size 67109160 diff --git a/wikimedia/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..456cdf9e31c446775ccb5a7916022707f3bc43d8 --- /dev/null +++ b/wikimedia/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61bb034f46b710632d02c04325591c700b260f1042b160d2b531ae25ae4a31dd +size 4192 diff --git a/wikimedia/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d76f0e51118e5801a4a19e7baa2d9b0688065cd --- /dev/null +++ b/wikimedia/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6887f6b0f6c71e7ea4f8af525445880001a0cd25edbda0e2056211aedaa8d4ed +size 8388848 diff --git a/wikimedia/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b57a1b97b7735fa3ed3ff6a8981b058433b771c --- /dev/null +++ b/wikimedia/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b048f7f1d5cfab8419a94cf88569c51c916452c9393f561700719a60b8022ff +size 25166176 diff --git a/wikimedia/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e3edd1899280dc6334945ebcdba0aa05c9d53e8 --- /dev/null +++ b/wikimedia/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:246f1ae9dd402091006f1905d5ea70d334784dacc915d9b9fa8295e446f6afa3 +size 4192 diff --git a/wikimedia/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0beb0223a3282f4e9db66ab3c64554799a559e3 --- /dev/null +++ b/wikimedia/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5112a1c3ff3100d2921fccd5a74ed1096b72a4b44f471eefe80018d198bf3b37 +size 33554672 diff --git a/wikimedia/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73df29a4b771f69432ce6b8b33a9724cd75708d7 --- /dev/null +++ b/wikimedia/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c35b2e26cc190d1affce3584038152d062985af54119be5ace49982453d3ba +size 67109160 diff --git a/wikimedia/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..250f9d989dab89692462a178fb2aaeda950fb412 --- /dev/null +++ b/wikimedia/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd5bf28f28adb865228b71b54fce965fe4c96bb3fb64fb2127851676b4f92327 +size 4192 diff --git a/wikimedia/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ecc468d8a237bb21775c824375cceae23dde0506 --- /dev/null +++ b/wikimedia/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6f3519791e78b3ac08da34a488cfa07a1d08bf3de9d9598ad3b076f5c9005ef +size 8388848 diff --git a/wikimedia/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f4197af40d7250ecddb2d43727c46f4d8892da3 --- /dev/null +++ b/wikimedia/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b90bae76f23f4c848b591272f27c3e2b02f270a23747a90a71b82297b0c4dfc +size 25166176 diff --git a/wikimedia/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..203cdd274f747a2c664b479dc02e00b7ca8ad17a --- /dev/null +++ b/wikimedia/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f03e508eba27affa0c5d3e787aabf63e370d72f243f8f4f011b255374694918 +size 4192 diff --git a/wikimedia/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..480b2cae25f5e8480942993c0b49124cd08ab1b7 --- /dev/null +++ b/wikimedia/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1baea7dd828b0b66e30e9e621ed6b3bdc37e72aaaf982018cdd3c33ae73db51c +size 33554672 diff --git a/wikimedia/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5a27fdb21c7226abc075bb5446dc6a85add73684 --- /dev/null +++ b/wikimedia/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d537b1c8b65680034fa2bdf8a8428be0e7c5ec789bd8d487b01da3dd5f412033 +size 67109160 diff --git a/wikimedia/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eaf5f285a5276d3b190bf31c07373ae40a369497 --- /dev/null +++ b/wikimedia/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17aabdd1b44355edbbb64dd6940b3e05536586eddaf4a03d093447a30acff75b +size 4192 diff --git a/wikimedia/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cce2b5ea8d527c503993bf36ee4a399f3c8554c --- /dev/null +++ b/wikimedia/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21fa3728895421a6abcc1b4b906c75e39d492fe188fefeb1eca174ce81f704e3 +size 8388848 diff --git a/wikimedia/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..301663e4094a95ef56b360cf49674c6d9b720075 --- /dev/null +++ b/wikimedia/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233330eb0560cd81c2dc770cbf9121d8760a6bf8d092dc7315fb84dc0128bafa +size 25166176 diff --git a/wikimedia/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0eb9b2b822f255d4db49b83619ef3b431e2f5d68 --- /dev/null +++ b/wikimedia/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d96fa578feac8f94c382576ed88bb4cde8aece0b53fa62bd7bc0349dd759ecc +size 4192 diff --git a/wikimedia/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2176f38d534c4b733162a7c55c53adf2f4bef5e --- /dev/null +++ b/wikimedia/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81de8fe892de44ccae51b4d9911ba1d1be96597f429f605b5a17c3a77c374b73 +size 33554672 diff --git a/wikimedia/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2094cac562c07531a7c59c44c0e781fa9e5cfe4 --- /dev/null +++ b/wikimedia/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e579214478d3584f82a76dbd0ea3821e4d854f1022946512c0cc178504cc516d +size 67109160 diff --git a/wikimedia/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e33481fb12bfb80b6a9674ad658d834771eb410 --- /dev/null +++ b/wikimedia/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42292b2a3830b1ec6bf2a1c65637a0c3f57bc54442db048afe89cdfb4b4cfaae +size 4192 diff --git a/wikimedia/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad6b40c08d74b322fd548d075f8dee3396ebb70e --- /dev/null +++ b/wikimedia/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:067db98dcdf44602500361f7ca4c93828377368c4b80a28dca6447ac732c22fa +size 8388848 diff --git a/wikimedia/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cbda267c8731dc0455cffeda262a35aa256e9c6 --- /dev/null +++ b/wikimedia/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d50ef3b72ebac1e69ef5380192da79e1ad38afe33565bdabe03c179bda1617 +size 25166176 diff --git a/wikimedia/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2495d11382a2388e4c5b9e0dd98006756d3b5262 --- /dev/null +++ b/wikimedia/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5312bce576b51738849ab86095d2c85fd997a3b3dc07bd5788b24c3a8ba0cb84 +size 4192 diff --git a/wikimedia/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d687104583b74593c44f9d4f168f6c04673e4d9b --- /dev/null +++ b/wikimedia/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce3217481f2452a2683af39307068316a3731134c8504eca407177adff86e8c7 +size 33554672 diff --git a/wikimedia/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06375e5a92d194d5ff7d8df3e0b74dcf37a20fae --- /dev/null +++ b/wikimedia/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a04d40b45491f2cc65e04e8da5845291f02895e4bde98ba2b57b0a4bd9e49c8a +size 67109160 diff --git a/wikimedia/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6543b6e4840ef03b21ecdc4134d0f25fab3ce1b7 --- /dev/null +++ b/wikimedia/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82f170cb041798f6e182594c215ed319f1d7b1cd7363e12a57eb4961eb49da7a +size 4192 diff --git a/wikimedia/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39a8ef8ca2be055a4feb3451a333e4ef7ef2ce31 --- /dev/null +++ b/wikimedia/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:296b8595f7fc0f8f62d5358a4259fd680d018575fdd3017f9574cc4839bd18c0 +size 8388848 diff --git a/wikimedia/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5181f2ad6fdbef160330a463cecff2d00936fc22 --- /dev/null +++ b/wikimedia/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cc3bb7c51cbd3a579f35fa7207d0cee9c2f161bb0f06c84a4bc2eed451a1c02 +size 25166176 diff --git a/wikimedia/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ca5fff23a6b49c280d40311acbb8454e3595dff --- /dev/null +++ b/wikimedia/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:851799dc62e64b253c1b4311dd3430d902df6c5446a31672583497198bfad359 +size 4192 diff --git a/wikimedia/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e69a5f5b15abd925fe7c3394df2c29ad9b5cea7 --- /dev/null +++ b/wikimedia/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a482fdb123be48d5f1a1659b6d74a30fd080384bd386294433da69aae1df378f +size 33554672 diff --git a/wikimedia/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2597d98a12cce8d583829a8b49a5d5b97652751d --- /dev/null +++ b/wikimedia/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8a334ecccfd7c61d2183579ec44f30748068c07c3728cc4924efeed049a5ddb +size 67109160 diff --git a/wikimedia/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a269cf09d066062cca78c4f3be17d05e36c4c80 --- /dev/null +++ b/wikimedia/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155364be43cc2a00e38b1d02ef0900c52f6a4f93e02beaba7e703f93c132f6d8 +size 4192 diff --git a/wikimedia/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c63de47bdeb7b2a2ca8a2dbb853d11cc96b1d26b --- /dev/null +++ b/wikimedia/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b74a5b832398d9e0bb91403d0103fc6cfec4759351649db4dd57aea43c383a6 +size 8388848 diff --git a/wikimedia/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57d1d05da3e3f0f9b0a5e6d32136618c8dea5b18 --- /dev/null +++ b/wikimedia/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e8cdec7b4482d0a322a6981c310e214537da724cad65aaa784466081463d14d +size 25166176 diff --git a/wikimedia/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..809e9dfa0a2e3ed148af58987a17d7a0c8ec8a5f --- /dev/null +++ b/wikimedia/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b5d809f73e699186c2feb79aa834d9f420b4c7ee6a1f514833304b521af36fd +size 4192 diff --git a/wikimedia/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58b622b23c971e791ae31ca63d97806078147ed1 --- /dev/null +++ b/wikimedia/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0953b0ecfdfd8f0cdd98257d80933fe6d53742332c406dc1628c400377afa3 +size 33554672 diff --git a/wikimedia/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9222ce492f124d34680e4d71f4f93520f85bd06a --- /dev/null +++ b/wikimedia/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de98d2e4f66ebaa05c3bdb949aac367d3ec46d9aadcf30e60e665b8283472232 +size 67109160 diff --git a/wikimedia/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf680b1e3f8f26f7cfacd73d736d021075d6b21b --- /dev/null +++ b/wikimedia/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b5002366dc2c637082b6c21dd62253f46bc751b318496a2383e64c8b88b6dc +size 4192 diff --git a/wikimedia/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a1e6a86d2bec700a9178302a406bc8beb866c36 --- /dev/null +++ b/wikimedia/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e9f05284d7a9e953fae43369713b08082ab2ce047567496f23def2a105cd18e +size 8388848 diff --git a/wikimedia/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce60827a199c222def390692610bbe60efa90df2 --- /dev/null +++ b/wikimedia/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:001b46f8064c7b15252408d0bb58054ad64bc6abce9759d6a347542b5e74d0e5 +size 25166176 diff --git a/wikimedia/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7897af1b67995b5c492fa610fac2399e2e0cdb57 --- /dev/null +++ b/wikimedia/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:464012e82b29c27512c2d5c45741ebc55471225230524da2456b751839f24031 +size 4192 diff --git a/wikimedia/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5abc6d25b3bbe6750c73b4eec68febea3a8ab735 --- /dev/null +++ b/wikimedia/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b7e16b8c7680012d4dde20e6242da7b517a38ab8974189c6aa7a47cdf6c547 +size 33554672 diff --git a/wikimedia/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b299fd05addaf909642d349473242a819df104ef --- /dev/null +++ b/wikimedia/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34dbf96a15b492b64a04286a93dc8ebaa8af7de90dd02cf09c10286e40460635 +size 67109160 diff --git a/wikimedia/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0c263e38a6559023c7596109e9a7761de991327 --- /dev/null +++ b/wikimedia/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ae935183b66aada715d2263b34e8939652be415d91b6a76a702ece9e61b0cc +size 4192 diff --git a/wikimedia/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e916699bf8748218d77a8f8f7fb325350e64988b --- /dev/null +++ b/wikimedia/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48742b502eecb8b13733d475f1cb9f1feff2681dcadc7261919b5184442027f0 +size 8388848 diff --git a/wikimedia/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1bbb711360a01a1f676c31e8144c2d0507a29ac --- /dev/null +++ b/wikimedia/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1837ed1d84c4bf6b71a3ad7777f524acc18a8d6fa57675d77a677f925dab4f22 +size 25166176 diff --git a/wikimedia/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18efedb5f43644981048613afd78d8620ff20edc --- /dev/null +++ b/wikimedia/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e7152e90c428b78c0fadcd0820cc5e2bc5f6afec3ccb2eaeb8fceba6d98609e +size 4192 diff --git a/wikimedia/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c60af15e99ac7cda53f8aad9a0dcb069d06c307 --- /dev/null +++ b/wikimedia/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:470ffb975c54461c9e092d728ca68876bac377ed17e027cdaf55025e12e80aef +size 33554672 diff --git a/wikimedia/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc65b67e6743330e4ea8fbc6000f0f2f03cd66ac --- /dev/null +++ b/wikimedia/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c03318176964ab953367d2f622a2a6e28f3677b637a46ed43e0adea38c3580 +size 67109160 diff --git a/wikimedia/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e1e40d9014435d58379ddc290ab3f480bcb5440 --- /dev/null +++ b/wikimedia/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ae90bb1f9232501afd2670be4e0ca2c43337e51abc1c6995b4751aedbf1424a +size 4192 diff --git a/wikimedia/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..352127ae3c5f0dd236a954d4cfb3b6177283129c --- /dev/null +++ b/wikimedia/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e54015076f88b303e70858f12cb9c6150b82ccaadd135ac0c67ae33edb78b59 +size 8388848 diff --git a/wikimedia/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2feed2d96fa82355a7c68fea0c5ada8b5a583daf --- /dev/null +++ b/wikimedia/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e8ead3860f691caf8085362407e5e2eb01ac1e440f1ccefa9c244795199e2a +size 25166176 diff --git a/wikimedia/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2dcc22509519a7ab924a85628147e9f34baa11f4 --- /dev/null +++ b/wikimedia/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5befb15ccb1a086412686338c5e69ff0c1b4ac70f318940086fb89ad3d50e0 +size 4192 diff --git a/wikimedia/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76bdb4f7d3755b37a1f8dedd5da863fa4fcc986e --- /dev/null +++ b/wikimedia/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:744306bd13aeda855b7e0b5b2081ac05266cce5578331b6336a7e56e0596aea5 +size 33554672 diff --git a/wikimedia/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb9ed2a12c34c281cdc6012538adf0ad08e5c8bb --- /dev/null +++ b/wikimedia/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:902451f11a20c9ed3c91755ac60a2461f2ac428bfae7d2bc53db6cca18f1e214 +size 67109160 diff --git a/wikimedia/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af07a4cda48233277e718afeb85ea9c82e73c0cf --- /dev/null +++ b/wikimedia/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33749a5f4e2b490f6e72db4e364053b096944308fdeca8f799b5c8c7174940e4 +size 4192 diff --git a/wikimedia/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2924b8105f5f8040fda438ff22bbf484c9aadb7b --- /dev/null +++ b/wikimedia/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bd3d05a91c57dd3408efb11c5a7be725056f45c9451b23cf2f4db0426d44462 +size 8388848 diff --git a/wikimedia/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6b71f908dae3fa4a522635cd86ce729b8422ebf --- /dev/null +++ b/wikimedia/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eaa3907b3c896b549b6b56355741e1979a3e953e968dcf17ab505c054aa51c1 +size 25166176 diff --git a/wikimedia/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d61fa4dafbaf9bdb8e4bdf77dbfd95dca4691053 --- /dev/null +++ b/wikimedia/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9db9b950329d5112f8bbd9ed1db23e7291045123b35bb0120ba649942779bfad +size 4192 diff --git a/wikimedia/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef9bb2ba1e4dbe6c6e47b87d29e08be604063721 --- /dev/null +++ b/wikimedia/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd4bbde1c5cdb1141b3fe1ea0ac051c2b8ad4bcfa68f7ddbaf74ca08c2afd617 +size 33554672 diff --git a/wikimedia/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b911b08a2a98ea96212e74ee4542011eccb9205 --- /dev/null +++ b/wikimedia/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09dee3a7033401b9eed230c5b73bce3746887d84dc105f8a6a791ad13719b2f +size 67109160 diff --git a/wikimedia/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81de11932aeb5e5d23bd051ec2b4ff79cc1d98fe --- /dev/null +++ b/wikimedia/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a78682aa3ad0bff9d0d82a956a32f99f228ebddd8fc89aaa0ede38c859cf9e21 +size 4192 diff --git a/wikimedia/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3db24c7087fd966a07cec995a429e02dc47ab80e --- /dev/null +++ b/wikimedia/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35636d088bdfee49a76b40a66755f126d7666c01be1fbee89b0d04be8936210e +size 8388848 diff --git a/wikimedia/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98d4f862c1e3049e2174b9b8dbe7973d9ecb384a --- /dev/null +++ b/wikimedia/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0074f673699267f48f4ca16cbf8ee8c79cbfc43f8f071b18da00c92d785b795 +size 25166176 diff --git a/wikimedia/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ad88030607e4b74ff5ceca8e9913e6f3fd6a74f --- /dev/null +++ b/wikimedia/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:285603ff5ed6cb98e387147da20b62130fb2ec41203b573f14f088d0c6921050 +size 4192 diff --git a/wikimedia/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cbe1d6a06ec1fad18f00b120f4aff68f685fa1c --- /dev/null +++ b/wikimedia/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc35b0184d8a14532d5657fd3eb08d9842da31751235d4266f4361a006f1b26f +size 33554672 diff --git a/wikimedia/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60299face65e6e38413f5cf4dc20b99dc615a7ba --- /dev/null +++ b/wikimedia/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08979845eb417ad0b4caff5032dc2bd40a06c55b0c66582a84f4f70ea33d04ab +size 67109160 diff --git a/wikimedia/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b6c977ef2a7461a76ab34d973cd5dc2ad2968eb --- /dev/null +++ b/wikimedia/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b286cc1813701f5a0a43e9c82e3b6b8397f99dd8367115cdb4e383a6100d23 +size 4192 diff --git a/wikimedia/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3b1e00972ef9d909a6b44f3fef7c4f94172dd82 --- /dev/null +++ b/wikimedia/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e23b9b3ab4f867d48a0332b6c36039782ff69b0cd31143ecafd3967156af0a +size 8388848 diff --git a/wikimedia/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12c3a889c5f93ff5370d74f984b0b4d54881d3aa --- /dev/null +++ b/wikimedia/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17c608bf62653fddc9b0d770bac85c7e433527493e6c378e7d3829d6ba1eef31 +size 25166176 diff --git a/wikimedia/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53daee6ff4fec3d08ba1f644864fa83c54d28c93 --- /dev/null +++ b/wikimedia/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:271d4c350e176b093c4cd95476870c29c2ec0cc9e6b95b939ca5144963f491d8 +size 4192 diff --git a/wikimedia/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b17750cbd1168baee8b1de84e41d66ea6ce7c83 --- /dev/null +++ b/wikimedia/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:527783381c1e9881c65fe178dd7b8df644889601f936b6969283a405ca86d24d +size 33554672 diff --git a/wikimedia/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92e6025d3836d11ecf2067ee7e26ec5f7df79e95 --- /dev/null +++ b/wikimedia/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d14e65328212cd23c6cd7900d1a846ec1eebcecc65b1583e875c1f8e2e767b9 +size 67109160 diff --git a/wikimedia/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78efbcd13b70e479b76def7d86ab7ae8e7332c8d --- /dev/null +++ b/wikimedia/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d07a00af2acbc5687a67dbf429f076f6d6eb405fe1efb99c29c1fb1f2cc3b6 +size 4192 diff --git a/wikimedia/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aaf634116e4d97ef97d7e7704b737d4b98d499f8 --- /dev/null +++ b/wikimedia/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7afa9b49d6b3f37f6e8a7fba3b322a82e550d920e7ef54fdcb897d88da1a5fb +size 8388848 diff --git a/wikimedia/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8095df7852d9327b33c4a228eb0b12f293b6c10d --- /dev/null +++ b/wikimedia/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9df27f67f14ac4227e8f39be6fa399d00490c31569f82744ac2f749b6d8c62d +size 25166176 diff --git a/wikimedia/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09e5259d464b7d1091f5e1c033e6e31f901b657e --- /dev/null +++ b/wikimedia/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82878c74d712dfe797c6bc4ff62b623b228e911ed3d7360c517ef91ccf270d00 +size 4192 diff --git a/wikimedia/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..777986a9d9901c279a1d2b8c1bb808c56a4d1311 --- /dev/null +++ b/wikimedia/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:537e397e81dd7d415f1b89269d7a8f92c856c72567f11be514d53b98a8478734 +size 33554672 diff --git a/wikimedia/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9bbbba567cc2a7933050b509fe56bdc3c88dfc7f --- /dev/null +++ b/wikimedia/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6321a422ea372f2a6f6b1cf1a199c84b453be616bcc2d0ca9b9ab4bd5ed43dcd +size 67109160 diff --git a/wikimedia/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47e5cfde579dda4ca07ede0c4c693522a39a0b9e --- /dev/null +++ b/wikimedia/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e3de46aa20c546da14a246b7de8fd7e510149c9bbe29e74a2056652cac008f +size 4192 diff --git a/wikimedia/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c83f301682dad88a3e0344819f2edc36dd7ff81 --- /dev/null +++ b/wikimedia/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3e328385d2f7cae28e267a41b98713923a0b980435d1447d84c370aae7b760d +size 8388848 diff --git a/wikimedia/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3039cd875a27af460bcda9c95aa7d0a008ceb62d --- /dev/null +++ b/wikimedia/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18cb92b8a9c418340c691e7fc0645941ca0557ac5be010fdd51b6ec4a170d6f0 +size 25166176 diff --git a/wikimedia/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..801dbaf901f3774fbc6109f177645e0fac65938c --- /dev/null +++ b/wikimedia/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b2dea157c39ada9c44449abb4135971189aa94d52c833cbc27d7b96d7846ae +size 4192 diff --git a/wikimedia/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52485c746ed1f7c974a8c6f45fc47ce1fc811238 --- /dev/null +++ b/wikimedia/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed1f5d3e37997243327b26a02c92a97d8b5beba8fb07a37185c8366604e7fa7d +size 33554672 diff --git a/wikimedia/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ee2bdfc857e34f11e69c3f7bb13c6946e9ed920 --- /dev/null +++ b/wikimedia/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:850a032cd785977d8bf87ff50884db738c8bb5c6feaf54bf145817ac43e06954 +size 67109160 diff --git a/wikimedia/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24dd04b5bb8526a3f8fcc0b713a8f92a1d6ee192 --- /dev/null +++ b/wikimedia/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71aef34d405b923419702f8c5bc294f0a3d1e995598a0e7b228f0d15d5d6c87a +size 4192 diff --git a/wikimedia/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0cb5ded834b451cacc9553b877dbbbb47963afe --- /dev/null +++ b/wikimedia/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d9024e3e5a0d7f338f5e2e9c0631b73f96580825f197c54147364df6d37261 +size 8388848 diff --git a/wikimedia/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bad17cd70e6f63d7cf1d816de61e6c14025e29a --- /dev/null +++ b/wikimedia/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c76b1ad92c1aa0844c6fd0a6a876ad9704b98fd2da9dc166b8bca431f1a359 +size 25166176 diff --git a/wikimedia/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..804220c109cbdd268ce59f60367032f4350b63bd --- /dev/null +++ b/wikimedia/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac85907c6de2908c816d5416192505490b47ed1366fb9f2bb38ba9169cf8556f +size 4192 diff --git a/wikimedia/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c60cb19739de8bbf3f41efb216b7c760438f6ac9 --- /dev/null +++ b/wikimedia/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52f5a34ea7905ccdd1bfb9b8afdcd16e73fff60cd348c9385974805fc07f180f +size 33554672 diff --git a/wikimedia/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c56fdd91fc3ef7e9d28a3d8cb1628b4c473646d --- /dev/null +++ b/wikimedia/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d252704033b3e851c09e493eb7c924f92ccf1aa0850ffee0027e9d2bb22b0626 +size 67109160 diff --git a/wikimedia/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12606025b943a2990eb2433841b980e0993d18eb --- /dev/null +++ b/wikimedia/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abc6296aa9210b67018566dfce9b0b12b0b0df9abfde6c649f25ceb89efe9ac7 +size 4192 diff --git a/wikimedia/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60c00a4574f9a592c85a48ef03b16dee1f5742f6 --- /dev/null +++ b/wikimedia/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:378d53baa4e5ada91a1b117b72d20582b29bd8eac2dffb6b842b3311cd6f10e8 +size 8388848 diff --git a/wikimedia/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0abfef91d656baf706f5c75b1297edb7a65f4eb1 --- /dev/null +++ b/wikimedia/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89e2b3abe4e04115e08f58346fdb56989587a15b2b9e54ca1c7b5c797403f5dc +size 25166176 diff --git a/wikimedia/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/wikimedia/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a3c2d92785e8d62cfe2dc14ff4e8a94cb326c11 --- /dev/null +++ b/wikimedia/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e1fd0e3b9be9f6682872b2d3deeadfb2047d93d121b90c613f28f6117ab325 +size 4192 diff --git a/wikimedia/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3fe209d3300660f9beacfb6ebfd46a604977558d --- /dev/null +++ b/wikimedia/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c1272813731817eb09c47b09fdaf3f6efc28c7769ba5de26d25094b437dea5 +size 33554672 diff --git a/wikimedia/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2cf135f209ec5aa56c9d5ca19dceb7a0ae8412a --- /dev/null +++ b/wikimedia/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f880eb9b9814cc9d3adca6a7fe982eeea0b60232ff849e09e4e7e514784651a +size 67109160 diff --git a/wikimedia/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/wikimedia/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f18e8f307bf64d1505390de1b2a3c9427bbd76e0 --- /dev/null +++ b/wikimedia/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55656769904860363db7e8e219af4cb2d3cf843c55c859008b69d9e4c2c87835 +size 4192 diff --git a/wikimedia/model/final_layer_norm/pp_block/model_weight.safetensors b/wikimedia/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..053ea85449923e9767d72d85c718b0a95e4d8fbe --- /dev/null +++ b/wikimedia/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cce4f04e0cf4aed82b4d63ff4b22af985cef12c1473127c37956db5979db719 +size 4192 diff --git a/wikimedia/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikimedia/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc43bb58b74e4c7cc3702691ad5c4b08fa57c2d1 --- /dev/null +++ b/wikimedia/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8735446bd856165febbc25753c48d2f15120bb7b514a4431cb58c982470e819b +size 205914352 diff --git a/wikimedia/model_config.json b/wikimedia/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/wikimedia/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/wikiteam/checkpoint_metadata.json b/wikiteam/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/wikiteam/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/wikiteam/config.yaml b/wikiteam/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8eb5b90162bb328a5142c5627c03fb2fb0343eb --- /dev/null +++ b/wikiteam/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredwikiteam-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredwikiteam-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredwikiteam-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredwikiteam-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredwikiteam-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredwikiteam-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/wikiteam/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b46df626ed5135343824ea3fa26c4c64f2936353 --- /dev/null +++ b/wikiteam/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f4a041b80e8e92388039e055e442dd4c69921ea6a5479168eb5c0bb35f9b2d5 +size 8388848 diff --git a/wikiteam/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb3fe3240fba4ad75003ace810822e99326e2907 --- /dev/null +++ b/wikiteam/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df97f26ba4d6f19d632b373c8fda4ad87358f7639b4a4070a87a9f80faaa4368 +size 25166176 diff --git a/wikiteam/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97cd1107322a60b6667bf7f69d3d3b4530663d1f --- /dev/null +++ b/wikiteam/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f188c7c2ceca15954f86b83eee3d17d25692d998ca92858999c0cee2b5d82f0 +size 4192 diff --git a/wikiteam/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e52ad07e7cabe336794ee683dc2dfc56d8028d26 --- /dev/null +++ b/wikiteam/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a62864cbe11cade2fd67c43c0acaeedac591dc48954b88406592dfcbe3833bd +size 33554672 diff --git a/wikiteam/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b85a6167fb54518cafb4f9108b6a776607c5277c --- /dev/null +++ b/wikiteam/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88dfd71de1a9e0fa4eaffd0e055d59458b68702c8637c77dab3063ec22a301fb +size 67109160 diff --git a/wikiteam/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b9112480db9077f0fdb591d3f858682880645c4 --- /dev/null +++ b/wikiteam/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90c38edf06cee01455aee4c424b1c3bb53490eba5ed4c1478f34e3956ccb7a2f +size 4192 diff --git a/wikiteam/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c82789f91a9d47fc9c7866e003d538e6d350d8fd --- /dev/null +++ b/wikiteam/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:800629ee62dd134629dc685f4bef1ce11a0ffd1ff5eafc44e7868171754a1b85 +size 8388848 diff --git a/wikiteam/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f5cccbbbe754b821c8422f8143a8aabf75f22b8 --- /dev/null +++ b/wikiteam/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1901c59d183cc2b98e325878718f332595597e087ba345f1ccd9f58b40f6442 +size 25166176 diff --git a/wikiteam/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a391ee2d489d8fcaf97a5bfd0b50fdd2de860f8c --- /dev/null +++ b/wikiteam/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1def240296baffa1938ee5618addd7551aa354700c65f46513f140d26e5666d +size 4192 diff --git a/wikiteam/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50c30b9d94101f1473412fcb5b8dc5a5e8e969fc --- /dev/null +++ b/wikiteam/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c52c000f38aa2dbc1bf0c65f38f45f36a520b8c50c01c024deee9a82ffd838a +size 33554672 diff --git a/wikiteam/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abc3052998bda7158f882c9b412c24817cb94cb8 --- /dev/null +++ b/wikiteam/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e8779ffa3d22f969def6970ae02ba367b46d651fd05913a9920b7b9440f9b3a +size 67109160 diff --git a/wikiteam/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbc7df0e75568e3faebbe8ba16b4e7b5dd658861 --- /dev/null +++ b/wikiteam/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:159c3ff0646c1bdbb8230afd1af67d8c2e1eea82798dd8a1003917dd9df96a77 +size 4192 diff --git a/wikiteam/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b0e11467275d291e76b09f41a9eadb5d4500a35 --- /dev/null +++ b/wikiteam/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:210536ab1861ab999cedbf9fd14ad118323a978a2d45040121ff6e13ebef5867 +size 8388848 diff --git a/wikiteam/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfad563c0ef206273519b29bff6ca8bf7ed88415 --- /dev/null +++ b/wikiteam/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12d5199cd1464c3e9f622ed012c264cfba99827c45a73b048de7c74d9e306c44 +size 25166176 diff --git a/wikiteam/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..411cc058c634b7b02362bd7651fdea638656ffb9 --- /dev/null +++ b/wikiteam/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e97b7daaf36ed4cfa65c2c03085a79120844c1a34b59698e9b86190da089b30 +size 4192 diff --git a/wikiteam/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c7f26de4daa942d4868ff907f6a4253a7d3d0fc --- /dev/null +++ b/wikiteam/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5053dff75d5021f1e463a0158db4eb74746f05d1f9f45409a3c2ba5c2e90ed6c +size 33554672 diff --git a/wikiteam/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5836ef1086f072a29df8a10ce69880df87393e5 --- /dev/null +++ b/wikiteam/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:393bfa0bf500f80be46b61030ff453262b11bc385b3c21d67bbd70a9876ec137 +size 67109160 diff --git a/wikiteam/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42e5a3d20f3bdc91aff9333158cfe43abda182e2 --- /dev/null +++ b/wikiteam/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a9edbb84d62693758d1d7a82b3f919b53b62996a535eaa280266b3f715414fa +size 4192 diff --git a/wikiteam/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fbac0e12ef8bc66844fd7938fb2255d3a0a0e015 --- /dev/null +++ b/wikiteam/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d78822132f337d4abd4def98e77276c3f343d95a8298091cc5c84a570d2b63e +size 8388848 diff --git a/wikiteam/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e8b9f6ac1f07b9690169fc1ea21a9abf85ee495 --- /dev/null +++ b/wikiteam/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837e6fc0c346ea7fe7300370b9cb45590d427e56fcdf9371c7c17e101b022525 +size 25166176 diff --git a/wikiteam/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a6fef07bb350e1771e9cd35b0cf418d0f326a42 --- /dev/null +++ b/wikiteam/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add747bd3c1f4dcd04965f67ed1c9ff253578723d0fbd7b6ff64330db9543d43 +size 4192 diff --git a/wikiteam/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd0161b013b1b2017ed4910cd19691383fc7ed00 --- /dev/null +++ b/wikiteam/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7dd19f92be067107181b0d5797d46ebe428963782cd44684266e1065ddd944e +size 33554672 diff --git a/wikiteam/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9af0863cb0bdafd21d87b7e3109ce4230c821a14 --- /dev/null +++ b/wikiteam/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b2e7a4189a0bca05b5000444417f1fa5a999658e7404d33627e77a1296af685 +size 67109160 diff --git a/wikiteam/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30d1b8b23145c8d466dfb8a0197985b3f58b7b98 --- /dev/null +++ b/wikiteam/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dd3f7b48192b5832d4ae05c99c51c881079a2f2b0511fb933efac20cb9f780f +size 4192 diff --git a/wikiteam/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f42f4a9e7a9f3f416e44533195fff87189f7ccda --- /dev/null +++ b/wikiteam/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ded0b461b40a404acf589a516bbadc27dc84dbd038f5fbf5d4468c8f3ff9f54d +size 8388848 diff --git a/wikiteam/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3cd27bd385ef861ec0fe47ef1e1d408b846c6d4 --- /dev/null +++ b/wikiteam/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc8938fdd1b6b85d7162f864276d037535c030dc24d948773cd1b463ad4e6f0f +size 25166176 diff --git a/wikiteam/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..109c9067c3414be5c69984e00ef694e06674073f --- /dev/null +++ b/wikiteam/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb77f72d17e6a851ac06c77855f360a1e7e9590bede57fba3062f57f339c2b3 +size 4192 diff --git a/wikiteam/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c909d7347a687f26f46125e3438196b8d1408b90 --- /dev/null +++ b/wikiteam/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93e5dcc7018684cba0b329fadfc55bbd28eccfd563235756f5f198cbcb16ee7 +size 33554672 diff --git a/wikiteam/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee4776471ffa27bd6317cfea77b106991d40bd70 --- /dev/null +++ b/wikiteam/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30dd5fb4f6509253428643e54eb4e2aaa7b5a730af2be82f1cdcef2bf1499eb1 +size 67109160 diff --git a/wikiteam/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd6533867cf10802dbf9ff8bc2cb8823608a1048 --- /dev/null +++ b/wikiteam/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ab2bdd54770dea32e1dd1bc78378d68abb292bff5aaee667f122d38acee4deb +size 4192 diff --git a/wikiteam/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2668eda1d0d3e1e4fb8ee6f9ebdc2e96669311d2 --- /dev/null +++ b/wikiteam/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56016e1853503c321f915467d1810ca4eea68c827c03aad0de9f97bb5eab01d3 +size 8388848 diff --git a/wikiteam/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d3a403285abe5f00e39e191d1187e2a9766bc26 --- /dev/null +++ b/wikiteam/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aadc17dcfd61fe754b25e53e038605046524c8b44784eedf8cf551abb854d9c +size 25166176 diff --git a/wikiteam/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce3c02ab75efa885950d910dc802b0f2ec0fca5a --- /dev/null +++ b/wikiteam/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18b96d7c3ca4126a9264f3a2dbb30d1905ce347d0f736d78596e9650d3e334c9 +size 4192 diff --git a/wikiteam/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71da879d825988bf3b5cb508f279dc15071286ef --- /dev/null +++ b/wikiteam/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd452c40b467b0f46ca4c343614fbee69301f8d0bf02c12e55136ad5866c30be +size 33554672 diff --git a/wikiteam/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd015a6cf68bdf1d07e1c549fe4ae9f6ea93daee --- /dev/null +++ b/wikiteam/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:674cdb0b3a839f271d522ad20dfe9d3e6f7bb71cfc7d0c55899a7416dd937739 +size 67109160 diff --git a/wikiteam/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13c9d66c422908612b334504a9d2d844491006c4 --- /dev/null +++ b/wikiteam/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a9b410b096a84c72b1436f7bbc4e8f33eef195e7a06683f53957f10bd9888b8 +size 4192 diff --git a/wikiteam/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d6a0e70183b7c86b9cf93401e1e5d710d39e5edf --- /dev/null +++ b/wikiteam/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b81ff91d144fa6ba52ff8f8c7b5b50f18ecfbc7791a2aedeb09e16f69bbf798 +size 8388848 diff --git a/wikiteam/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef0451d8bb70bea1980e47cd3510b05ceef088f9 --- /dev/null +++ b/wikiteam/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:000b6fbc41a829999ccebc39031b2711c593e0cc1dc109ae0e2bcd6578cf4f4b +size 25166176 diff --git a/wikiteam/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ea0adce0b8b6cf3142415257d4774eb96d3dc66 --- /dev/null +++ b/wikiteam/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d78a2b55d14168f3e8c567dd5e1e92584d6a7e8aad16ef05c8c2dbaa2e6a2ab +size 4192 diff --git a/wikiteam/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d41eed9ad08f72cc73686e61817a130e6c3c0a3 --- /dev/null +++ b/wikiteam/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daf511758e086df2daed27b21e131be7c299d0d798a7bccc0c7123ea1a9c3041 +size 33554672 diff --git a/wikiteam/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18440e067e79fe8623d23cd3b8ae535da93a7edd --- /dev/null +++ b/wikiteam/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfbb2558f56cdd5696c7005f103f39df209e29e1ce9542d309570a6f27239854 +size 67109160 diff --git a/wikiteam/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3d01a81619a9f6157b92bc825d236d91d8e15b7 --- /dev/null +++ b/wikiteam/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c870070d5a17fa11aac44230063b4aca9e544bcd786acaf9025d16523c6a641 +size 4192 diff --git a/wikiteam/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf65bf076ea1afd1592256d03b979de6f5dd139c --- /dev/null +++ b/wikiteam/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f99d09c04000f39d118fb91058e9091bf9152a90ebf23e18f6df1f39537926ac +size 8388848 diff --git a/wikiteam/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61feacce78dffb18ca6f37c7d57ee8bb2ba8f726 --- /dev/null +++ b/wikiteam/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b93c9ffa4de3ee30c00a567cc4e98e09a041638f90fc6afcf752da67345c49 +size 25166176 diff --git a/wikiteam/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9de2de3a1fc2be1068040bc2d26cb50e0221f274 --- /dev/null +++ b/wikiteam/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46341d097d607a94322965aab6657a5ba82fb2367aa540ad0035785c8f439c94 +size 4192 diff --git a/wikiteam/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75cf8a24841e823a94e217c85505ac449cf56791 --- /dev/null +++ b/wikiteam/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c69944b702d66f7176b5af4fe7be467759272689e2a7523561fe7afc91aca030 +size 33554672 diff --git a/wikiteam/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45a0f91a0a3732864c62d3813a73ca29c32a7937 --- /dev/null +++ b/wikiteam/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8f0066ff02f101f143113dbfca1952407669f278e3faa3c5f27ee2a9ec6be5e +size 67109160 diff --git a/wikiteam/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82223af4efe2c7838661a41e4ec424e85d0c85ce --- /dev/null +++ b/wikiteam/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd6c26b33b116ce73d5aec1e1cc91a48b3ab98480c0072a425af7c3b288c88c2 +size 4192 diff --git a/wikiteam/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63cfdd1ca94a8639fe23e38b188f37e6ed695a04 --- /dev/null +++ b/wikiteam/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4854455271bd28b7fcf6219f0153556329d983b82e4cb2471111d3631fa6cce2 +size 8388848 diff --git a/wikiteam/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd845ebac312e9fd22b9af9b6586599dc7824f61 --- /dev/null +++ b/wikiteam/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2492c47e3c55b7ab987abc10205346b84e183eec4996549232270141415fbc3a +size 25166176 diff --git a/wikiteam/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f52646198179341a487b6b664ffba6534860e53 --- /dev/null +++ b/wikiteam/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9768b01f4eb31fd8dbe2994fb2322d0cc9e38cb0e35db9be6b22514fd30b52d2 +size 4192 diff --git a/wikiteam/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19532cf1c7bbf311d6c457cfe8d8641a5a80a7da --- /dev/null +++ b/wikiteam/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc665b6a4d38ad6b6af2dbdf694df5f8a0c36d467eeff6cd3b31d4c30b0c1ce3 +size 33554672 diff --git a/wikiteam/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cffa92367792726895158d38bf1a96cdb943cf9a --- /dev/null +++ b/wikiteam/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d663c2e5db9a11df4ddea10ec34a14043df63bb5a779940e407fb81637835531 +size 67109160 diff --git a/wikiteam/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..006796554f74bfa82abfb92e47503943950ce8e2 --- /dev/null +++ b/wikiteam/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edcaf215191bf79445ffc7b7d841bff86ba1f506e401641b49916bc7d798caaa +size 4192 diff --git a/wikiteam/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aca786efc9a2f8d2961cfe569580920d3625dc31 --- /dev/null +++ b/wikiteam/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e72953240cf77fccc033b9ab14e5b5efa63f1f8e8e180e5f4c4cca769405441f +size 8388848 diff --git a/wikiteam/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38774c4981d908599af8db05551906aeb3a660a6 --- /dev/null +++ b/wikiteam/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5afa6e8648c731a95d263f9ee741fb9ec1578382b2b0af50c49743036994f02 +size 25166176 diff --git a/wikiteam/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1db5eb2d0d68da3ce48bf0e1d09d2684fdc57685 --- /dev/null +++ b/wikiteam/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac02646d3b21033a48ad310d0ce6579cf3d4780f30644f4c35dc7dd28ada95a +size 4192 diff --git a/wikiteam/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f43b99966810eddf901dc1ae5e2a6daa42c2d7cb --- /dev/null +++ b/wikiteam/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0963c755df688ac2991fe09da59a681029a191bda73faf0375a5f7b7a24cc398 +size 33554672 diff --git a/wikiteam/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e2d41a27b43f1321a12f9efb7bb3356e0af7ed5 --- /dev/null +++ b/wikiteam/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa92a3ae98b642270306e4da52bb0881c5becb752fd309a378367f2beab6f33 +size 67109160 diff --git a/wikiteam/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b7513657f54086fe26dc548392c13f75ec354ba --- /dev/null +++ b/wikiteam/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:873537a443f515ded7b30fc0ec48c9df6bdbbd0651d7cc3aa559bf0487c047c0 +size 4192 diff --git a/wikiteam/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e60cae72e083ef516dc8b13585563eb3808d70a --- /dev/null +++ b/wikiteam/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6504c5786ae034a38fdc2370f3ddcbc9a11d10ceb95c1c0caf44f7a0b9301256 +size 8388848 diff --git a/wikiteam/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20ab66b63d0c6dfa7eea3f624c2e9432e293e234 --- /dev/null +++ b/wikiteam/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9a4db8cf3c5c9de0287ccb357340e5bf957a5b7d41fe6961d4fb330e2e9646f +size 25166176 diff --git a/wikiteam/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29a943f895374316fcced2698e4f1969c9c61f73 --- /dev/null +++ b/wikiteam/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d7f410f9f8c7bda3d848177f086b15f5e2f29127409bb9f962bd83099666de +size 4192 diff --git a/wikiteam/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07cc5519af998b6004cf92ace68b85864c8b946a --- /dev/null +++ b/wikiteam/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c76c7a677acfe28ba9e45e5443ae3847e23fe27c2b3a3aac204659cdfb928ff +size 33554672 diff --git a/wikiteam/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb5e3ce083041b14099e248c3fab29c5e230df9d --- /dev/null +++ b/wikiteam/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19e69a51193d1abd5a9fe8b8845de4f69213591656b4d3d7d51ac3cb1125dba +size 67109160 diff --git a/wikiteam/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0ac3e409755577dc7852739458411965f68bddc --- /dev/null +++ b/wikiteam/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dbb9aa9b83d4d345a992588daf3b2f218f94eceba88900da923cd71eaadcaa6 +size 4192 diff --git a/wikiteam/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f4f685cee0baed7c3636e9d947bfbe2923bbdea --- /dev/null +++ b/wikiteam/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbccd5ea3cef4f7468c282fb2ea3093df6380816026deb817ed1d9dae2eb1c44 +size 8388848 diff --git a/wikiteam/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9810f2047652e22f47eb01a6b47387a0a326084c --- /dev/null +++ b/wikiteam/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f239d4eee56f8f7491f98b30c01dd71f852b49be98070c805116aa3a76c64a4b +size 25166176 diff --git a/wikiteam/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d920734036d0f9989c1af604ee583ec771d4c07 --- /dev/null +++ b/wikiteam/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781b9bdde1fd6b635454fe00830296e1e29f15fa00e46e893f166c6c739eb7a2 +size 4192 diff --git a/wikiteam/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35186af7300545f67cce42f502461bb214ff9ff0 --- /dev/null +++ b/wikiteam/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b23d7833a3035d5c1df02f773737aed8f7b1c18a52ce97975a185092bc49a1 +size 33554672 diff --git a/wikiteam/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bffdce9b8d097022d3aedf1fe0f804216dc9a57 --- /dev/null +++ b/wikiteam/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c24206d6a2a5474c7b675fdecfce406f05910c744683c3555df72c7336c6804 +size 67109160 diff --git a/wikiteam/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1fa7b5861ae02ef66cc9bbcaeb5b8d1fc3d5aaf9 --- /dev/null +++ b/wikiteam/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:976039ff920cf365b35590f865b5392b64afdb529794c85c818b6ecf1cd58a25 +size 4192 diff --git a/wikiteam/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7310323865996b9995eac3ecf94f2ec782898a4a --- /dev/null +++ b/wikiteam/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae883e1079a81269bf57f07a0196e3fa36af820ff152277ace8db4878cb63e8 +size 8388848 diff --git a/wikiteam/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..648e33e409ebf564fb11717580135cf65764d0de --- /dev/null +++ b/wikiteam/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d58089b48eaf4ad5c3970c0e465ad14544bbdcf720ab7009d9266028aec2fd0f +size 25166176 diff --git a/wikiteam/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00fa066d10467225f95f17bbe09e2bb37b848f6f --- /dev/null +++ b/wikiteam/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94886657e82e7be502aaeea9ba59c719f25c288da71f6662a94eec51e9ccf954 +size 4192 diff --git a/wikiteam/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b21ecbc05706bb14f6b509ccbcf3c44637665851 --- /dev/null +++ b/wikiteam/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:744feff40c5bffe2a197942c7b24388a939272e8fc4f48207f349a9feb7d00aa +size 33554672 diff --git a/wikiteam/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a233df09db1df0cd44df4fda55ffa2525bdd960a --- /dev/null +++ b/wikiteam/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9012752c0df8dd410ee858786d80451be5f9a8a5e73a30331bbf3dc0f230698e +size 67109160 diff --git a/wikiteam/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2fe68f270f52d4cbdcc7c4233fd0b10f255d0118 --- /dev/null +++ b/wikiteam/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be4824fc46f845c4053681d28399a8ab50cb0dcb57fde92132d6d20c4730f2b9 +size 4192 diff --git a/wikiteam/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..72a78acfc3ff4cf6db553d9a83a4e1c9bd168b36 --- /dev/null +++ b/wikiteam/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66de216f1dbb464bed6a43464951951c7dd36457d328155b63ee4dd6912e5ea6 +size 8388848 diff --git a/wikiteam/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2c6b5b4f352802541f81c2b22dfa44740c55b0d --- /dev/null +++ b/wikiteam/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8fade4fa902d81aeeaed6b689a719ad9c9c6a14360ab12de91f1289b986fac +size 25166176 diff --git a/wikiteam/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25eeada160e6ee96c144f75ad0e1e90697868a8e --- /dev/null +++ b/wikiteam/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28566bd9bef2c0e3b6bde2b6b3f81ce67ca43cdb1b94f79555c6997ffbb3b081 +size 4192 diff --git a/wikiteam/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65408030ccd0389c06e4ae91aba62021611f764d --- /dev/null +++ b/wikiteam/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f5baee018f1778123d5b71c1fddf2b2846649268aafa9670094f1872e28e201 +size 33554672 diff --git a/wikiteam/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbb489d781c0623426470edcbdffd013d1d1a96a --- /dev/null +++ b/wikiteam/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7500e3054e8eb8f7ec0c6b1faaa318f5b3022a978d724f28d93d84f5d3330768 +size 67109160 diff --git a/wikiteam/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebb4c1b5049216f19f8c10f22d1feceb1aeaa5d2 --- /dev/null +++ b/wikiteam/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1597c92be516d3d3bcaee32ca00c3f03acef13b1dab91e90707c2b532ffbe276 +size 4192 diff --git a/wikiteam/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2afd755044e79eac7e74efea4b328c2e564b60f --- /dev/null +++ b/wikiteam/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6213230e6f7dd8471db6ed6d048387a1f682b938a9324fa1cb72c924dab650b5 +size 8388848 diff --git a/wikiteam/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ccb3b957a97f2499e31851cda613a94ccfd259d --- /dev/null +++ b/wikiteam/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98142d79b48543dcc41cede77f3c3641f6a59920952ebc35f0a550dddeb805d2 +size 25166176 diff --git a/wikiteam/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..147ee0cae6d9828738adcfb62c4db8cc1f8b6e88 --- /dev/null +++ b/wikiteam/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1510b255c567fb64fac29db9c361a004f1ae0b5550268715b45d1c5bccd2d397 +size 4192 diff --git a/wikiteam/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8c787f5f11d78997c547df8b9cea8ddfa0fb087 --- /dev/null +++ b/wikiteam/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da0621522f0848e2419110593d52db7e120ff7b43dfb92c1bdf63e486681f0e0 +size 33554672 diff --git a/wikiteam/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6519a689f19d71c57370ddd9b24471729ddacf1a --- /dev/null +++ b/wikiteam/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fdcbc6eeaf47b4b5a7f8dea6c804507c36574742bdbce0c78e5b70c3cd8a6b +size 67109160 diff --git a/wikiteam/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73cb685308336522ea2ec4e9b77d2e1a27d1ef15 --- /dev/null +++ b/wikiteam/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4953a28c9e49f68ed6a116f35e805bc0bbc8d5f6ad7a1fb7e20151e8f82466bc +size 4192 diff --git a/wikiteam/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5711bb138cb2c1501cdfd9e08f59373c48746815 --- /dev/null +++ b/wikiteam/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:728a7275b6e16df8b03e63e3dd775411901688faa334fc8f19980bdc457ae310 +size 8388848 diff --git a/wikiteam/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a031d1a2162815beb4e077ffcecd29406740ec48 --- /dev/null +++ b/wikiteam/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:841811fcd9ff74628cb2439b19db2c4a2435f4eb3a45353bc02f83b199a9929f +size 25166176 diff --git a/wikiteam/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4c6facee431398e12e03306b22426af626a67f0 --- /dev/null +++ b/wikiteam/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:108261b4cd61edda1b8eb420c67ba7f4f336c67ef367c22a87d74023224a6166 +size 4192 diff --git a/wikiteam/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..690ea7232531ef1644c79220d21c8eb5d11e16af --- /dev/null +++ b/wikiteam/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4a4080a2570dc737c945fd85c6a92073d449fb60370ed01356ad40a96803c1 +size 33554672 diff --git a/wikiteam/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31bddf98127c6dd12f8dd4a85fb59feeece2c2e8 --- /dev/null +++ b/wikiteam/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:722e83b7eec789977d42e761367a62eeb7433d7c8fdaeb910aeb70a0a9c68725 +size 67109160 diff --git a/wikiteam/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b8fc6bf306b4a03c3a6f5840cb97595a8274e42 --- /dev/null +++ b/wikiteam/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03cedf05879cc0ae90ded66ad622a62f40aa5e4deccfd584dcdcfc6a3e10519b +size 4192 diff --git a/wikiteam/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..967255834d205c2d7bbb9a7d63f55cd1dd9cedc6 --- /dev/null +++ b/wikiteam/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f88c838b4643f86e1d765e1b192dc8c60d46ea0052d2ed8dcddd197f001a9c6 +size 8388848 diff --git a/wikiteam/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec9d1e60784d3a84669dd1b57a0ef3aade3a319d --- /dev/null +++ b/wikiteam/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:934c14f690e81716a1d0453fe29e066f4a306d718650da5ca99dea685ae4950a +size 25166176 diff --git a/wikiteam/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..531dfdf0886a7bacfcaed5867e01c4c18f88c129 --- /dev/null +++ b/wikiteam/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdd289932a07f718ca414bd81521fb0e6c157bbf2ee3250d36f21a47c5d38232 +size 4192 diff --git a/wikiteam/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ece2596a2c57a89bbc96673e55cbd70b88e8dde4 --- /dev/null +++ b/wikiteam/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6433bfe3b5bffb170a741a270bb906ed827075710d021f5b0b76bdc2ccf22c25 +size 33554672 diff --git a/wikiteam/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40874614394b4c4c97589b03d701f041f1b87068 --- /dev/null +++ b/wikiteam/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a94c986cbd1961d1ea74919a3292317dd20bca61c2e48800517de4c72844f1 +size 67109160 diff --git a/wikiteam/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..257d3dad6f472566f8e14d119ca3f2b30685ea3a --- /dev/null +++ b/wikiteam/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb74c460ece812600c82bc9644102da190f2651cb498e51c95c4f42981d1767 +size 4192 diff --git a/wikiteam/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..792ca294e5681f85e653e779a28fbbb76f0d67ef --- /dev/null +++ b/wikiteam/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37d248e3ad594adf1d02279b1bac5dc7ec27bd4f67b949dfff4a4c59a50e94ab +size 8388848 diff --git a/wikiteam/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d4762eccff612110aa514c0927c6d8b6b9a9345 --- /dev/null +++ b/wikiteam/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f331ba1c9bc400d1f1cbd663eb79ba8b66570d54e30989a239d84859138785c7 +size 25166176 diff --git a/wikiteam/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26942df089dcabf42113309d77dc7b7f3af948aa --- /dev/null +++ b/wikiteam/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb1056ec7703a1785cc493807b9a57f08f7646903c58e6bd8c0a171827d3fda +size 4192 diff --git a/wikiteam/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82db9be744399857b374cd8db6d17edf4613cb87 --- /dev/null +++ b/wikiteam/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3304f38348fb7e3812c1d323bd3c93d4d98bb5ba9f1f890e50dd921779a5dcb1 +size 33554672 diff --git a/wikiteam/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c96ed34de31cf0770c5b3ae0207db5bf84b68fa --- /dev/null +++ b/wikiteam/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93607df270403c35d0ba752ea9414a1d43f10009ecf6ad5c09740ec61f31d3d7 +size 67109160 diff --git a/wikiteam/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20217cce752d6021188a62df661729934ce5f376 --- /dev/null +++ b/wikiteam/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f32e353440aa5c5f1aadc63a7054fd5500019d06c4f6e3ae48abacc2c6f3f01 +size 4192 diff --git a/wikiteam/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e2604c78b8722e22d03acdecbef74892f4f78ca --- /dev/null +++ b/wikiteam/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e0dfa5343b5d53857dfec12d8e0392e03684c842afbb24e8f8b555c54a425e +size 8388848 diff --git a/wikiteam/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ce5a3a6ae8300fb0db5abe6b52a19c37107032b --- /dev/null +++ b/wikiteam/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8ccc7bb2b417cea0d189498c91f83b1cd5ee1a57f0882959688cdc3db071230 +size 25166176 diff --git a/wikiteam/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..483d679edbb587267912226bfc9ce0fcbce14808 --- /dev/null +++ b/wikiteam/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6e2810da23d30c69105d70596bb8ea5d70b14bd0b0468bb0071189e03a45807 +size 4192 diff --git a/wikiteam/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..953257409e8ad0a73e71ed5a44aae6c7c40cd297 --- /dev/null +++ b/wikiteam/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a178e36bda0adc8469b86c9bd4d98c6e5b83ae7cf348bec34a3884b7d0b358a +size 33554672 diff --git a/wikiteam/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..765e8d1eed066de86058ef46d2c9a59c06b9bcf0 --- /dev/null +++ b/wikiteam/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e78f8244d9b02164b6cffa0ab94ac4a86d0d4d902db13cc2ae2b5b32c50c97f6 +size 67109160 diff --git a/wikiteam/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36b34f68e3be5e8a67b959c51a55446fe0b59efa --- /dev/null +++ b/wikiteam/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecf36995fee0d384152db78c44328825985c883bd99834af7602edb8424bbde0 +size 4192 diff --git a/wikiteam/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..735f7c7725fe73d852f00d0dc9b159436886199a --- /dev/null +++ b/wikiteam/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432d272f643635ef37fbe9a0b28eafe167b06120c0c2de598b85c2c091090350 +size 8388848 diff --git a/wikiteam/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d6a821a6dcd1c25deff3bb87d417c7a1f52ba16 --- /dev/null +++ b/wikiteam/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3549377a906c02ac5f8fe53e88df17e7c6c8a83cfd4fda823969e34f88fb6aa +size 25166176 diff --git a/wikiteam/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d89e7efd3ac6506e2ebc3065143b62439039eee --- /dev/null +++ b/wikiteam/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c56d0d367a70029afe7ccbf821d3aa1dca5f80dd09d5a3af6abac7f05045f19d +size 4192 diff --git a/wikiteam/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9aaf947c316194c9d1395496115836738e60bf91 --- /dev/null +++ b/wikiteam/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be8c80957f0d6de04f8cce3c2785b3a92f6da1475c1a9971885309030d3a163 +size 33554672 diff --git a/wikiteam/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3173b0499da0966c011547d58d28491d29d7dbc7 --- /dev/null +++ b/wikiteam/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e13a2810191567eef3b7756d881caafb7032bf1bb8a44ec7fede150f459759d9 +size 67109160 diff --git a/wikiteam/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7efc16e3cf5435822e47628461dd8fd76da2df23 --- /dev/null +++ b/wikiteam/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:257c72b87ee9be4b4152bca19b8f32b621a96ae8301ad55d542b7f117db52b8b +size 4192 diff --git a/wikiteam/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b8948223e4f7b13016ee657ef7d7366ccd0daf5 --- /dev/null +++ b/wikiteam/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d585923985e55460a125baa42de96cbf169c77ecedca4200b2db55bf64f8110c +size 8388848 diff --git a/wikiteam/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec6f2501abd85c155257c6e0e0ba35638acd91ce --- /dev/null +++ b/wikiteam/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59c0cdef775e0a473f62d1978970e79967082567edc88532b13f65f9dcbb36d1 +size 25166176 diff --git a/wikiteam/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76e1fd2b0689c9e99aaad44b43d059cb118e7a21 --- /dev/null +++ b/wikiteam/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bacb9cd7e8edcbc0627d9a889d15023433185a3f90ec2379389369e2add2636 +size 4192 diff --git a/wikiteam/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91d607ad5db0b01ca7a34a3a1fb57e5771cfad92 --- /dev/null +++ b/wikiteam/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3109aadc2c1a6620eb81c32d3265f937c094a9d8dbb1bb5717a14456f907ce +size 33554672 diff --git a/wikiteam/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a4911a03332f9c4519c4adc2c8ac06a1c252d2d --- /dev/null +++ b/wikiteam/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe9b787c67f8ee00404200c34c2ce65f83c68351401d552da9a827e560355bb +size 67109160 diff --git a/wikiteam/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd580385d9463ca423d19a2e0c15e8cfffd194c4 --- /dev/null +++ b/wikiteam/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8c3784a03c89d370262fd99001a49565aeaa87923257e4152da94c666f2d854 +size 4192 diff --git a/wikiteam/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5eb1a219a935b65f9ef6b6601ecf303041d5ac84 --- /dev/null +++ b/wikiteam/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a40c4e5d0c6fcc205d570ce7a3ca981e0f459daffd5d48de094e8b27723af91f +size 8388848 diff --git a/wikiteam/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a41749c327013fb0105f48a25af5f63e0ff8cde1 --- /dev/null +++ b/wikiteam/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a419ba00546bcbffc8080ed1976774aa4efe3efb349d195a12c98a62aecb92a +size 25166176 diff --git a/wikiteam/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39f406fd764e707b352389248cb8016f9b95b7a0 --- /dev/null +++ b/wikiteam/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cfe20b72a9acced89a453fe101a7abfd0cec6fe4b6fa14762907b0d3ef01e20 +size 4192 diff --git a/wikiteam/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19207c1627d645cdcb4b5346afa2dbe976491702 --- /dev/null +++ b/wikiteam/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ff00a0bdb6f45121ef873c7d0db5af94538b064ae2409b561c679620ce8b836 +size 33554672 diff --git a/wikiteam/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9eef60e06a5dd45955faeb95197210ed2f6bc9a4 --- /dev/null +++ b/wikiteam/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d4779a2dfa3daeef48607351344b8cceb797111bf885dda78b3ae732e7d69eb +size 67109160 diff --git a/wikiteam/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d7a1e3493aa032031af818e626fe2e42ca726be --- /dev/null +++ b/wikiteam/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:807267e8b320b28bc0f5258686cbdc0b85c5d12d08732b236b17f2c656ae7a1f +size 4192 diff --git a/wikiteam/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6a83b71d1bfe7784af7804c9f958fff7ec4bdc5 --- /dev/null +++ b/wikiteam/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f58a7f764620b092a39f77886cd47287312c52686eb63f6fe7a2b03e4fccdc +size 8388848 diff --git a/wikiteam/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5a26b9c46c04373a92491f350b819d58d682dfc --- /dev/null +++ b/wikiteam/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baa77628f32281282424ee5c910dbcd1dfb35f3ed1ef6a7937737a50885c5628 +size 25166176 diff --git a/wikiteam/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e5d8043fd2cb2a551ee9f56a3f24dcbe2f7fa14 --- /dev/null +++ b/wikiteam/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bd43de662e8c3a48144fc36d0dfce466e625171fd5761d5e2584c10c6ea14c +size 4192 diff --git a/wikiteam/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a49b9f11c76885c93f0efab447d88173e4115beb --- /dev/null +++ b/wikiteam/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95a867e918ea87c2a05784bce9a6beda8db4ea7baa4ddb5ac90db339ef9946c5 +size 33554672 diff --git a/wikiteam/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96af99904faa59b6958ec965f30df3864fc71dbb --- /dev/null +++ b/wikiteam/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b30938fb756a6c3516b2ad342383b761e5234b0b90ec2f8138f2f7a3e075e47 +size 67109160 diff --git a/wikiteam/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a86c0ce19484e16af21f6735a70af68c571e57e4 --- /dev/null +++ b/wikiteam/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa0fca39e84b968d1dd1cbfdb07aa9444708ebabb45baf1413f24c0c4111da6f +size 4192 diff --git a/wikiteam/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08aec9d206da61baaea65b34f9de312a6c3c2204 --- /dev/null +++ b/wikiteam/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27fb4b0846adf11f875074b7469097dbd32c222a2d44c42e004c9265b1851cc8 +size 8388848 diff --git a/wikiteam/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4086fa796d9c6d0bbc4dbcc4543c77eb74e54008 --- /dev/null +++ b/wikiteam/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c9a497db88587240b06c40e7715857dc43090f49310f48dd288bce335e1614 +size 25166176 diff --git a/wikiteam/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/wikiteam/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bba77b78aa9384363e267645a2667231b0f33dc5 --- /dev/null +++ b/wikiteam/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:054fa648aba3978fc1892887e910cb395f7655e3171479bb8e89a17ebd4ae54a +size 4192 diff --git a/wikiteam/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab67c43621ac1269855df3372a15be25ba7a97bf --- /dev/null +++ b/wikiteam/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b0cebbbf070445b699f9ead4bed1d643dd6f89e73a546cd598d97bbed5616d9 +size 33554672 diff --git a/wikiteam/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb3fe8ba2bb9614b89622338db53a9f9d8c853f3 --- /dev/null +++ b/wikiteam/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65abc73932d2e1e973cc1c1f2a3553352244eaba855da005f602f89952f6a135 +size 67109160 diff --git a/wikiteam/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/wikiteam/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2d2619dc009d20b6e39c6944913dd76f665619b --- /dev/null +++ b/wikiteam/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e9d0c8a40b1e26516760eb6f705e9180ab2f5181da168fdf8ef513825b31a08 +size 4192 diff --git a/wikiteam/model/final_layer_norm/pp_block/model_weight.safetensors b/wikiteam/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cf0250ddf7a766fac402f40d5fd9e658bf56106 --- /dev/null +++ b/wikiteam/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c0e78ff555b3160578637a151daba6f976ee4a22df4ba74293b6d2484db4eef +size 4192 diff --git a/wikiteam/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/wikiteam/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f297d249236bae4cec24ca615345761e238f3bdc --- /dev/null +++ b/wikiteam/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a409610c61bdd609673f34b489ac22ab2343f82de2f4f3c9e472263f554ad770 +size 205914352 diff --git a/wikiteam/model_config.json b/wikiteam/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/wikiteam/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file diff --git a/youtube/checkpoint_metadata.json b/youtube/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6676045f375558016b4c5f58970761f426321807 --- /dev/null +++ b/youtube/checkpoint_metadata.json @@ -0,0 +1,9 @@ +{ + "dp": 64, + "metas": { + "consumed_train_samples": 14336000, + "last_train_step": 14000 + }, + "tp": 1, + "version": "1.2" +} \ No newline at end of file diff --git a/youtube/config.yaml b/youtube/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..417a41cbf426011ff9a9364c5e70b930fc901f67 --- /dev/null +++ b/youtube/config.yaml @@ -0,0 +1,144 @@ +checkpoints: + checkpoint_interval: 500 + checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredyoutube-seed-6- + checkpoints_path_is_shared_file_system: false + resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredyoutube-seed-6- + save_initial_state: true +data: + dataset: + dataloader_type: single + dataset_max_tokens: null + dataset_weights: null + datasets: + - bits_per_token: 16 + filename_pattern: .*\.ds$ + folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredyoutube-seed-6-/ + original_folder: null + seed: 6 + shuffle: true + skip_tokens: 0 + pad_samples_to_global_batch_size: false + skip_in_stream: true + num_loading_workers: 0 + seed: 6 +experiment_logger: + tensorboard_logger: + push_to_hub_interval: 300 + repo_id: craffel/commav0p1-ablations + repo_public: false + tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations + wandb_logger: null +general: + benchmark_csv_path: null + consumed_train_samples: 14336000 + ignore_sanity_checks: true + project: commav0p1-ablations + run: commav0p1-ablations-1p82G-commonpile0p1filteredyoutube-seed-6- + seed: 42 + step: 14000 +kill_switch_path: null +lighteval: + batch_size: 16 + checkpoints_path: null + generation: null + logging: + hub_repo_details: null + hub_repo_results: null + hub_repo_tensorboard: craffel/commav0p1-ablations + local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredyoutube-seed-6- + push_details_to_hub: false + push_results_to_hub: false + push_results_to_tensorboard: true + tensorboard_metric_prefix: e + parallelism: + dp: 8 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: false + tp_mode: ALL_REDUCE + slurm_script_dir: /fsx/craffel/train/eval-scripts + slurm_template: /fsx/craffel/run_eval.slurm.jinja + tasks: + custom_tasks: brrr.lighteval.evaluation_tasks + dataset_loading_processes: 8 + max_samples: 1000 + multichoice_continuations_start_space: null + no_multichoice_continuations_start_space: null + num_fewshot_seeds: null + tasks: early-signal + wandb: null +logging: + iteration_step_info_interval: 1 + log_level: info + log_level_replica: info +model: + ddp_bucket_cap_mb: 25 + dtype: bfloat16 + init_method: + std: 0.02 + make_vocab_size_divisible_by: 1 + model_config: + bos_token_id: 1 + eos_token_id: 2 + hidden_act: silu + hidden_size: 2048 + initializer_range: 0.02 + intermediate_size: 8192 + is_llama_config: true + max_position_embeddings: 2048 + num_attention_heads: 32 + num_hidden_layers: 24 + num_key_value_heads: 32 + pad_token_id: null + pretraining_tp: 1 + rms_norm_eps: 1.0e-05 + rope_scaling: null + tie_word_embeddings: true + use_cache: true + vocab_size: 50272 +optimizer: + accumulate_grad_in_fp32: true + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + learning_rate_scheduler: + learning_rate: 0.0003 + lr_decay_starting_step: null + lr_decay_steps: null + lr_decay_style: cosine + lr_warmup_steps: 500 + lr_warmup_style: linear + min_decay_lr: 3.0e-05 + torch_adam_is_fused: true + weight_decay: 0.1 + zero_stage: 0 +parallelism: + dp: 64 + expert_parallel_size: 1 + pp: 1 + pp_engine: 1f1b + tp: 1 + tp_linear_async_communication: true + tp_mode: REDUCE_SCATTER +profiler: null +s3_upload: + remove_after_upload: true + s5cmd_concurrency: 5 + s5cmd_numworkers: 16 + s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd + upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredyoutube-seed-6- +tokenizer: + tokenizer_max_length: null + tokenizer_name_or_path: gpt2 + tokenizer_revision: null +tokens: + batch_accumulation_per_replica: 4 + limit_test_batches: 0 + limit_val_batches: 0 + micro_batch_size: 4 + sequence_length: 2048 + train_steps: 14305 + val_check_interval: 100 diff --git a/youtube/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67a0c39e67f2092066fdf2fb2441d34b36f2f068 --- /dev/null +++ b/youtube/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f3811f7f9e96d9e5601e2c53abb259240304850d5b5f8a271ff4dd4da22ab83 +size 8388848 diff --git a/youtube/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bcf050a4df097a81c8a0e75739ada1e91a360e77 --- /dev/null +++ b/youtube/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79600c1e85042b362b51279cab717ad611c3d630d4f60442a41b4c9117819113 +size 25166176 diff --git a/youtube/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb2536affb58b6cc73d431543e14328d58236c00 --- /dev/null +++ b/youtube/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a565b6ac7ebb4be88c8038582f800ba13bf914614a1ee0a5491ab8e22e00505 +size 4192 diff --git a/youtube/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7efd6d35c840c6023131bd74b3333a4dbce46ff1 --- /dev/null +++ b/youtube/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a22095028180c08eba4553c38ed1931f3ce6f4af3989f49432729b78b2ac70b4 +size 33554672 diff --git a/youtube/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b715807c575f1d5ef4eca3809e2f90d614a3398 --- /dev/null +++ b/youtube/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7668fd28d1307cbfcd5f88858afc27ccdca54a13b1a88b7a6883636346bcdd9 +size 67109160 diff --git a/youtube/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd3347f720a64f8eceda34803796b123fd1f10e6 --- /dev/null +++ b/youtube/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d90baf9bfb632259cb6c4a7dd1ce1dbe570d0a8abdf5c9e0d2c89e6d4734dfb5 +size 4192 diff --git a/youtube/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d86b11deb0b99e090887b71fb3077485493299c --- /dev/null +++ b/youtube/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f6f5a90cd99da4f7b5b5edd31baf0e4b59b2487f3d638ad72459b2a62c1398 +size 8388848 diff --git a/youtube/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d98de48e73e873b09a45dc9eed1bc2e1aaf7b836 --- /dev/null +++ b/youtube/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58bd66ecf5d5c7787e44b9bcf99df346ecfad61a8ecb8b85b78a3dc332375bb4 +size 25166176 diff --git a/youtube/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c3ac534314c14d291c57841dcdb4518e2721476 --- /dev/null +++ b/youtube/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a4f44f6e05e6d09c567773f0b4ff87689f054e5cec4d632ed46fd969159f95 +size 4192 diff --git a/youtube/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43f99e51f47480c7fcd4a16ec42de99fb8a77ef4 --- /dev/null +++ b/youtube/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3337058ae231d72ba50c47734cd0aeb5c818a3a3a6c5aaa4b0026b4ffa849ea1 +size 33554672 diff --git a/youtube/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2aa6185a37c46c9d461970fa2dcb256cf62c2830 --- /dev/null +++ b/youtube/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb561f78bad934680780bbc0b9c9e44d0364f3a4b140fc346e188e11f479ba3b +size 67109160 diff --git a/youtube/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e665ee4c3ddbd0344de1052d92066f153dc6f1e8 --- /dev/null +++ b/youtube/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb9758779feaf1da20b68657cea5ced47b394305b2bfea5a5347ad740357eb61 +size 4192 diff --git a/youtube/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69d3ea80bacaf3c1adb5c2dba0a56a9d84fb4575 --- /dev/null +++ b/youtube/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1686fd406a41af2c9d75a361bb70722b2f952f7ac3a36dbae9de8f718cca790 +size 8388848 diff --git a/youtube/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea60f9b7369fbca058569d873b0111917755d65a --- /dev/null +++ b/youtube/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8eec709fd34382cb2564b3dc34fee352c93d67b3b596c84834f20fb14cbd7ad +size 25166176 diff --git a/youtube/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b5d41934904fc6b4782de74b1131c5f34714ef7 --- /dev/null +++ b/youtube/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d39cae8fb5fd534caabf3c6f20a058082b6f0d576c0c760a34694e1616664c82 +size 4192 diff --git a/youtube/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e73262e761b278c592563d696ba181e2fc2d315 --- /dev/null +++ b/youtube/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f70fef62a8a3de0adc07d6a9db458ba28214f90a7552b8e10c05c42c992de7f +size 33554672 diff --git a/youtube/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..265d56c391d31bf11984ac42fa0d0edbbba10bb9 --- /dev/null +++ b/youtube/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7f7467286787d77f8863c9ea6ad0b759e1df0c2020323fd06f3f0cab846fd34 +size 67109160 diff --git a/youtube/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9784afdd611886ed20e6b355db94f15926dd251 --- /dev/null +++ b/youtube/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:735f06d68ba9d73f3f12050d3e8340a4a3c7fcea902004673689aae0d0322db1 +size 4192 diff --git a/youtube/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e220ecee9e6ca5a0b47fa1622a87d3713660741 --- /dev/null +++ b/youtube/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56525b2ae0be38252ccde47a2a530b528a2e4b220914e6b2dfffdf829bc97303 +size 8388848 diff --git a/youtube/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ccccb44a1f727a5c589374ff6319cf11349a9d9 --- /dev/null +++ b/youtube/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdbaeb1581c1f9c87a06173c57a408e4ac60de903332888e12f1037e8dd07062 +size 25166176 diff --git a/youtube/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2113257a599cd5869c5648d5107b597eff492b0a --- /dev/null +++ b/youtube/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7323444bd92e184cd700c9a771184fe3defc1900a247b45709c007b2e74173 +size 4192 diff --git a/youtube/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cbf0f56e954d6a2faf921247c56ccb569f52934 --- /dev/null +++ b/youtube/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788d240f8958218e1a4b3578a5971b7fbfff510b5dd6eacd69abf5fa407b59ad +size 33554672 diff --git a/youtube/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f42bf9eb1f28753dd297907aabc48aa36e65d22 --- /dev/null +++ b/youtube/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e219b20db5917c897346237e0c6b3e2593075abe865fa69c22ad81956b00e7a5 +size 67109160 diff --git a/youtube/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa0195bfb25eebc6cc7ef21245e7530df77d3f1e --- /dev/null +++ b/youtube/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bcdbcff7c34462a60ffe9863264be044872e83b6e5f56e20ba2ba8600031fd0 +size 4192 diff --git a/youtube/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..559cd73dcdc4c69934d680175ea5c98abdddbe4d --- /dev/null +++ b/youtube/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e66c1fe850d57e2dc71fa7b9a6b3e1df9080cc147d353384a9cd753b14777b0d +size 8388848 diff --git a/youtube/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bdb759f5d1f160915ef7366580298e76265fb0ea --- /dev/null +++ b/youtube/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:099abc2650505fd0df140e928a98ea8dddbbc4f9ced5e171dfd7ea4fe84afe22 +size 25166176 diff --git a/youtube/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d96c8634ca48b149dece1fbe6b5d8ad24dbddf15 --- /dev/null +++ b/youtube/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:629c6af7774f21a8b9413706e3ca317433d38ded7df598b164c791aaba67e606 +size 4192 diff --git a/youtube/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..200d9ddef67d26e0ef76ba806404b16f6d55c4fb --- /dev/null +++ b/youtube/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8147095761c51ff5feaf8982deb850770d996adefa2a5cc93963dd15c1d045e6 +size 33554672 diff --git a/youtube/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c5264909c574a63c12573d4b846fd33db67ce06 --- /dev/null +++ b/youtube/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4d49738c36a0260f7a5adc5e6f384701541b9ce9761492418858a1222c298e4 +size 67109160 diff --git a/youtube/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28081af34f631f731b6c915a56f6f5bf981f344a --- /dev/null +++ b/youtube/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b58261974583c9ff9fdf4faddc33fb8ddc7e8034960a116b355001ceaf4323 +size 4192 diff --git a/youtube/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..caef290cb0171950fb912d8315297ea446dde36d --- /dev/null +++ b/youtube/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65bcbb7ac1f8d71e54926f1313b362557d14d924fc6b83df8e9e3e89a4b6d91 +size 8388848 diff --git a/youtube/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8f2dc4f089cb7b0303c73b0cab65069be0924bf --- /dev/null +++ b/youtube/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36d87da2962a9c67e59f2ce7408169695d56c4eea4004c7f061cdfa81edfeb47 +size 25166176 diff --git a/youtube/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c64eebfb33d10faf65b8af23468a64fd343e8dd --- /dev/null +++ b/youtube/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a16b5dfb903c619dd576e36fc9bae694ebedea6b8146051b64098f8bdd61652 +size 4192 diff --git a/youtube/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f39a92ebf9731ccb74e42871419ab117844bb2bd --- /dev/null +++ b/youtube/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e7b94a60acf048793ab3dbf5cbea475823151b2e5f0837657eb4148f82b2698 +size 33554672 diff --git a/youtube/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6700b0879085aca7d18b9dd4feee373a083d2fcf --- /dev/null +++ b/youtube/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26b079ba79032e2d7556e2255d07b38973f2690ea9e0249cd1ee92eb9cea5baf +size 67109160 diff --git a/youtube/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2adf47aa96fecf8e25b1d849d1781cc40dd72c3 --- /dev/null +++ b/youtube/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42e7998fe8bf79570e6f4b8627638a2d70379bbf23d62300de96fbc910967bee +size 4192 diff --git a/youtube/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74182da8c252e256276b4f153e795201b71c753d --- /dev/null +++ b/youtube/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49f543760e8474e26b5a9e0213f5c09eb679ef80738d469a2c871d36e4e311d1 +size 8388848 diff --git a/youtube/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4d97ddd3ea9f1d81975b5ed1ebb4258a7180d86 --- /dev/null +++ b/youtube/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29ec6b70b85bb20b43ea36efa256a5edb08dcd3b1a2f82bf7c2959704a8bb7af +size 25166176 diff --git a/youtube/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01f54581b240f43c0a408746391d52d935bef0cd --- /dev/null +++ b/youtube/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b98d995b420f5ac42e67609e69e1643577fae480f8180cc43fd2e2c9b446b5f +size 4192 diff --git a/youtube/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd6092d897a5e7cbe6f2cc90ca6fdc5c76458ebb --- /dev/null +++ b/youtube/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7bfb85277c93390d53a2f2324c61e57816a59f8f9733df72179cd880bfebab +size 33554672 diff --git a/youtube/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..255e67a481f260be11f3c3f139a0a5787a5a20e5 --- /dev/null +++ b/youtube/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:719fbfed9fecfdba683459e82a31528a7492776618d731528dfe11816b4c0078 +size 67109160 diff --git a/youtube/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c7d8b28eaf3b5c5d16acaae1f0e260c1a718d29 --- /dev/null +++ b/youtube/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faac7b1e7d9d272408f91ba6bd78a4321ab4c204bf25637cd232aaf3fb892547 +size 4192 diff --git a/youtube/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50b53d4f580ec69c09482c8ad39883e06064ec50 --- /dev/null +++ b/youtube/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3411e5ac30ccae973af49f208fb2f6079e0e129d242303305f9a86a6fd58181 +size 8388848 diff --git a/youtube/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b14cef89bf7cccec0d0d73b8e400c4b20f3d934a --- /dev/null +++ b/youtube/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2867e0bc2d0320034aff2c81b192b69f6367b22a28f896fdfa6376b253d4b038 +size 25166176 diff --git a/youtube/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e3f1519ed65e1f5b4a975de665180291a1fecdac --- /dev/null +++ b/youtube/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60a790d8ab51fce6d9bc092d849ae53e5354c57b2ec21c35be53af17ccef2b3d +size 4192 diff --git a/youtube/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0cc2f006435d813e2e495d855b954dea80025c4 --- /dev/null +++ b/youtube/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fc8f2fa2875a568a9d9b1a210c52fb25ae8546add50dd46a5cc8b97c2d15508 +size 33554672 diff --git a/youtube/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b693c1cf742cb4e3c9330be36377b43885f47b4 --- /dev/null +++ b/youtube/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98216138cfc244a3455fdf69bbb5460f297bf0c27cdf1a5247b7622150ec660d +size 67109160 diff --git a/youtube/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b50803eb591f7a14a5702c283c9416732e6528b9 --- /dev/null +++ b/youtube/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67e75be49b76f6f73aa39812a5ad1c50cbaf0566a4012a5fc93a5ec0921454bb +size 4192 diff --git a/youtube/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f52ddcd0013978be9e4cc601dce5f91d1f10535 --- /dev/null +++ b/youtube/model/decoder/16/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:944003aa4f4c834751573286e81e851be523fb9cf8edab8f15a24542e0b56ba3 +size 8388848 diff --git a/youtube/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16eef7655721de502dea913ba41e0c66915143ae --- /dev/null +++ b/youtube/model/decoder/16/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:542e690fd213b28930d0ca9e834d6ec6b85a808b7345c619c01e07491198059b +size 25166176 diff --git a/youtube/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3eac05730734bc7ffb9ee2ff9184dd3533847c1 --- /dev/null +++ b/youtube/model/decoder/16/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a2787acc8139d7be0815b200f9b1c8c3320449f4bd45ead3b4dfff1d6a10161 +size 4192 diff --git a/youtube/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfa728888cb57a188a2bb316e9f8ae975e061f98 --- /dev/null +++ b/youtube/model/decoder/16/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63cc97b83c6a3f3a6f34e3487505ea1a53d8e526d6003da213b295c324b49a0e +size 33554672 diff --git a/youtube/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ffadcba1baaf3f840602919ecf38bd154855f92 --- /dev/null +++ b/youtube/model/decoder/16/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f9542792f78d64326193386686b0aed762ab92d82777c2947ca852e28ec7229 +size 67109160 diff --git a/youtube/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc63d276292ae80e329310d6b579b6c6c4612538 --- /dev/null +++ b/youtube/model/decoder/16/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa86e56818da2c0f16f47d51632edc9779ea96e2cef55ebe757b3c27d8c6a37e +size 4192 diff --git a/youtube/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f465142dca0abf824c4c76109ed85c24c1f7f36 --- /dev/null +++ b/youtube/model/decoder/17/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8057152e28f46704c6fad5b3319b0387ebd43bd9228ab9893bd2796bac543fb +size 8388848 diff --git a/youtube/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e796b1740dc24cc7be0a6b352a819d8d4cb9c744 --- /dev/null +++ b/youtube/model/decoder/17/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb5d2c35fa72902f9a3a65a23ea6fa988ae98d97be3800cd33a91dff42a99ca7 +size 25166176 diff --git a/youtube/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa963ac697d369fb2a7ddbc953fa24ce7156481f --- /dev/null +++ b/youtube/model/decoder/17/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bf4897a9827ae4b9c80107f76ed59351f5e27859b2565d94fa477181b40a18b +size 4192 diff --git a/youtube/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e75f0c3dcf067adf096f03a0c6b4eaec9d47686 --- /dev/null +++ b/youtube/model/decoder/17/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b60a21b19c2a780afe6be2c9cb534a46fd77550e7dd69d74cafe2a2022b0d4cd +size 33554672 diff --git a/youtube/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..228a7f3d1527bc0dac2ec84ed98612f7af63d821 --- /dev/null +++ b/youtube/model/decoder/17/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a48db9b6b1d74c6a6a7a4e1d19727c9e5aedb00d0cc26fe0cebde9166dfbc56 +size 67109160 diff --git a/youtube/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0d75351a436ed7d37c5e7662586be3eb6331573 --- /dev/null +++ b/youtube/model/decoder/17/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ddeb90a1903524aa54049b1644eab69da079ac4533ae1b51da532d8d868ae07 +size 4192 diff --git a/youtube/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f907e4905921cf8b964e8f9f7b3a8c17fc18872 --- /dev/null +++ b/youtube/model/decoder/18/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e26084dd153ce91a6fee58c59f98c5f457953f9443b62be86615d3bfceeb16e3 +size 8388848 diff --git a/youtube/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56de99b7a7baedd939275eb3e3c722e04190c6dd --- /dev/null +++ b/youtube/model/decoder/18/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25c19ef9c5e39ba1f4a64412fe73076c6d6a2afe6b79abad30a3f10aaf617ef +size 25166176 diff --git a/youtube/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a675ee75863cee9bbfb66f156da80b5ae0a320cd --- /dev/null +++ b/youtube/model/decoder/18/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b68e81eddd9eb582a575ce7b16e5396e67b7e364ead9320bc737b793711e886e +size 4192 diff --git a/youtube/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b25bac39ce7027b3779155814ffa61e728095e2 --- /dev/null +++ b/youtube/model/decoder/18/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b32be33950fe0b00135a45bab38bb713940f28158a6216563dc69af1daf2c389 +size 33554672 diff --git a/youtube/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..982ba5d59c124783d40e753c2f11ac1042a55408 --- /dev/null +++ b/youtube/model/decoder/18/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7091ba79b2487739805b4db435e16176ad35df07f649d5230c454bf738d31f42 +size 67109160 diff --git a/youtube/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..296c5839021634a7d72f295d90ddfc10806a9c69 --- /dev/null +++ b/youtube/model/decoder/18/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3275cf25fa68bdaa56dbda0e9d5e1a8fe8d21524a1f334aae316512da01b8ca1 +size 4192 diff --git a/youtube/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43dd1fe709d5011f18bf152248474f56a4955c77 --- /dev/null +++ b/youtube/model/decoder/19/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd82a4a10b1137f3795abc960e675d51cd8867c698eca21caa856260b5c97437 +size 8388848 diff --git a/youtube/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb048bcd713d7f2657daafa16eef7888cbc0485d --- /dev/null +++ b/youtube/model/decoder/19/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3389b836dba2fcbcdb4eef34610955b9509b3aa40fcf6e2ce5ceb2e814aaa297 +size 25166176 diff --git a/youtube/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c854103860f7184f023adcebed1be123d52480d1 --- /dev/null +++ b/youtube/model/decoder/19/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbfd634d2af05040f4fdde0bfbad00aef8f7c42f4afabfec5340818df4f89b89 +size 4192 diff --git a/youtube/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85722d693f7d49f40319f675e94126fd3cd9169f --- /dev/null +++ b/youtube/model/decoder/19/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78ed35be51c7259121bd1b56b750d377a261f102c6fdf02043ca51c41150a45a +size 33554672 diff --git a/youtube/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48b51fedc5a4facc50ed245ce4d0c07582110278 --- /dev/null +++ b/youtube/model/decoder/19/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fafb7faa644df3464333696e69c7e22b46c408b05e5e555b260f4b7f7324874 +size 67109160 diff --git a/youtube/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5052f56ad65d0a2e8bd79b1843716bdaf2084ab8 --- /dev/null +++ b/youtube/model/decoder/19/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8576b197f6e014645a01fa37714ffcacc0f9958cbde073ead5875f31a5f5cc47 +size 4192 diff --git a/youtube/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60b7e410c10e76e2343a91d03763e57061639774 --- /dev/null +++ b/youtube/model/decoder/2/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac77eeb4a6fbea501f2df5333273988050db5bcb65cca91136100b0467d9d99 +size 8388848 diff --git a/youtube/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba1f58d4c031c9913cc95698d72bcb278687f591 --- /dev/null +++ b/youtube/model/decoder/2/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b45e99c1ae8e0e59fb2718ddc600b8e477c77afb47da26c58a23a34a9f580c +size 25166176 diff --git a/youtube/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bdf0cc22c625c0df23e8ccb24af5cd5eff5c1ee7 --- /dev/null +++ b/youtube/model/decoder/2/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fecafef3a24f1d9c41730bab55ecd65749d964807815e12cda6a6f4f09192c3 +size 4192 diff --git a/youtube/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57af741318af80c356db093044f49886d5ae2ee0 --- /dev/null +++ b/youtube/model/decoder/2/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc6a9a56d9e42ac4942689479946d5768ea30efdebe7b91e3d1a083681493c7 +size 33554672 diff --git a/youtube/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01023973dcf3f6ed3c5dfdafa2c7b5377fee5516 --- /dev/null +++ b/youtube/model/decoder/2/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5482a927971b8f28fead61a91772daae190546afc086404198857c425c80cf7d +size 67109160 diff --git a/youtube/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6899688a3c9305b9293c17ec95945798c603897 --- /dev/null +++ b/youtube/model/decoder/2/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5417e89e99b3f28f8e29782e04ecd00174d52025387b7c8c3850ff1612f72372 +size 4192 diff --git a/youtube/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d51e744ebc359ce532f12208d4f93f55ffdc6836 --- /dev/null +++ b/youtube/model/decoder/20/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14ff183598dce760ecc15879c38e700d054ae09c1d6d55c30ead81d8199da6d1 +size 8388848 diff --git a/youtube/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42ff4f13cc0a48527f0a093cbbbb71cb0c2c0115 --- /dev/null +++ b/youtube/model/decoder/20/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a77527ab522162d18d09b2f48a286987532ea7c3f8e659f6855ae1121529142 +size 25166176 diff --git a/youtube/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9eb38dd721b56bda7f67892dfe8c541d6de1bbd6 --- /dev/null +++ b/youtube/model/decoder/20/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df11ee54f21c3bf1967dfa244d3c2776b7d0b40f6cc0cba444457c6af51af019 +size 4192 diff --git a/youtube/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73903b34f70edeb358132f2cf8e20721d2004efd --- /dev/null +++ b/youtube/model/decoder/20/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b54e12f89cf2bc9598a57b470944968b037bb713789e066b5b5f4b41bd1cb5f +size 33554672 diff --git a/youtube/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d4554551dda381477c0ef3c50fc3ab5db721a59 --- /dev/null +++ b/youtube/model/decoder/20/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91ca2d90efcdf003d11d6def799b68ebb253897df663c885bdf67c4281b9341d +size 67109160 diff --git a/youtube/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..565ebac29a9f47360880ff5639723042ebd77da7 --- /dev/null +++ b/youtube/model/decoder/20/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899131ef440bd34f9733510c13da74f9d68e2a60b4469e44abee31fce15992f0 +size 4192 diff --git a/youtube/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4bc4e2e685331990d2dd123d5932bb116029e73 --- /dev/null +++ b/youtube/model/decoder/21/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1182a37e43166482c2ce88c2ba04aefc752feff0d29f2162426bea8729930422 +size 8388848 diff --git a/youtube/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b91a0b775a7817881861d9444ea444f34a78847 --- /dev/null +++ b/youtube/model/decoder/21/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6276a9ba453ab89e2b63fc55b8ba6e70465e129ff71b0f30f73d73fc14650609 +size 25166176 diff --git a/youtube/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea0490061da4c20a228c3cdff9f1c093bb14e864 --- /dev/null +++ b/youtube/model/decoder/21/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:523c608461da8548ed14d69cc8d2411fd600a7bc4a883b60cfc847db8d72331b +size 4192 diff --git a/youtube/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..897fe74eeb4ad02ecaf50ac1d34103d1ffff4818 --- /dev/null +++ b/youtube/model/decoder/21/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f6d71e73d87e6c365249f26221744702f87f93a16820369b6e1d8fe6809e46 +size 33554672 diff --git a/youtube/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bccc3e97f1f2ae67f668f29b476fa1517a65bbf --- /dev/null +++ b/youtube/model/decoder/21/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bd71ed252ea27a68c6fb94eb82ddcb7e9e874e5864aea30fef99f0ddff6c1b8 +size 67109160 diff --git a/youtube/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad5fb15d1895fd57bbac8a7d8cb63df716e2da55 --- /dev/null +++ b/youtube/model/decoder/21/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aba12a740089ade9d910e8ca280f87519229eed720b7a09692a247d7ebdcf555 +size 4192 diff --git a/youtube/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb9a4c1a04c079585acd2e865574426fdee99836 --- /dev/null +++ b/youtube/model/decoder/22/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8ee0d5f9dd920b0bbd8f47fab06d832038aeeff4d9c31b9f43c747fb8ea5cdf +size 8388848 diff --git a/youtube/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dcedb4e311a9b816ffad94433f368a7011c8a623 --- /dev/null +++ b/youtube/model/decoder/22/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18f091b8772045c98355c778fbe04b531b04c44c75972b21bb3f1194135bd50e +size 25166176 diff --git a/youtube/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e436ac02864e5d9aa71490cbbba03a43a531c82 --- /dev/null +++ b/youtube/model/decoder/22/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42098370a4fc11d634f2d41e3af25ee2199b8f60651d85a6a328386bd1d5f92c +size 4192 diff --git a/youtube/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..449f3874c29659e01ee76183c3744cd53ca207e6 --- /dev/null +++ b/youtube/model/decoder/22/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99ce7d76bffa615c8684752d55f33f214f02dbcc57b62709d7f00f950c6de533 +size 33554672 diff --git a/youtube/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef8fe44501c6ec6df0ff2d55dd669c4b162f43e7 --- /dev/null +++ b/youtube/model/decoder/22/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7ec3ac84844a694feb9b1bd18829bea87e74bc95be9d33b482112cd6d79ef9 +size 67109160 diff --git a/youtube/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f58e19e827bf4de04683e44eea36de1048321e65 --- /dev/null +++ b/youtube/model/decoder/22/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79e32d5add7cc9ed7ac2889f7a6840006c371850634d3239b7e0a9e1e616a414 +size 4192 diff --git a/youtube/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9920f198b3278de1540a3aec758449592e90ee0 --- /dev/null +++ b/youtube/model/decoder/23/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba882a31babc83d620f7d335c74f63741b4f14b8d67391ece50183c1e3a539b +size 8388848 diff --git a/youtube/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23e56c2eb5537d6d2f2d8d0bd014518662b24324 --- /dev/null +++ b/youtube/model/decoder/23/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fed4bc57104bd7d481420c10225d406123d63a9933e3937447a4f637ef78af +size 25166176 diff --git a/youtube/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9da08371c0ce6e77eb0cc78fc16ab89720f47f1 --- /dev/null +++ b/youtube/model/decoder/23/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59294441890a7432a95bfa0cf07f667ef2c3145c43a4666eca038e6bd4a85f91 +size 4192 diff --git a/youtube/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e136d5b5dac9be3f234adb62ff0743b79e650036 --- /dev/null +++ b/youtube/model/decoder/23/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec37c77e39bb58d1c960bb96133cb805c4a186d69a95f85f4013da397ba586c5 +size 33554672 diff --git a/youtube/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..561e17be2a75011b1db6258260e3c0a52e50db5b --- /dev/null +++ b/youtube/model/decoder/23/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700cc6ef8d6a5a441f2202ad16651cc73b4bf96897a27a0c7784eb6af2dcfba2 +size 67109160 diff --git a/youtube/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f6bf42379086bbc17a8ee4d04792d25185f6e45 --- /dev/null +++ b/youtube/model/decoder/23/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc0e0a43e2aedeb73dcd8ac9b75d0392236ee6d0a808ac0ba0701cb9f453dce +size 4192 diff --git a/youtube/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65c6db83856937fcf61db3eb5ac4a0b09300ad6e --- /dev/null +++ b/youtube/model/decoder/3/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ce2c1ab4a565baadc41224a76f8516f4dbdf24bd9e45bce79a1f68032856f4b +size 8388848 diff --git a/youtube/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bda4384a0ccc12eb05876805f767fc2b1d2b8fa --- /dev/null +++ b/youtube/model/decoder/3/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93015fd8edced02ff3b95a2251cc6cffed7b80f5882d66a407dc70893c4f5cf3 +size 25166176 diff --git a/youtube/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ccc6b42054a72db65101b56fb57f6daa4e310340 --- /dev/null +++ b/youtube/model/decoder/3/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4abdc3296eb282420bcfa587236d347ab0986da646538b364a7c7ee479f6e6e8 +size 4192 diff --git a/youtube/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8ff5e24688e4c6ac17276e1a9c292fa0cd632bf --- /dev/null +++ b/youtube/model/decoder/3/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b420c1f3e83f0884f851098fcf3dbed81ddf82ed3f938dd3a87b7d567f2b0e4e +size 33554672 diff --git a/youtube/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35695709bf787e56f22b3e0f38b64a4b084b1e6e --- /dev/null +++ b/youtube/model/decoder/3/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0741616da15e7e8e81d21ef33827e51adbe7556a5e412323042a5ea583231665 +size 67109160 diff --git a/youtube/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef23f3b50541687ff9a4111b8cd90f9e1d8b3d78 --- /dev/null +++ b/youtube/model/decoder/3/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f12797277e71bed3fd24cdab50e2bbd0500f564bb16b893fdeca152db89281e +size 4192 diff --git a/youtube/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d056ceb030e33f2bea244362d4b7606f3cd7db4b --- /dev/null +++ b/youtube/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3962609fd0f7c40dc70dfa6718d3cbb71ff625643fc8a4f8d44bba291030008 +size 8388848 diff --git a/youtube/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e81c4de7eb058b4bc49ada6f07b468a25029127 --- /dev/null +++ b/youtube/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5151bee0a130cc1167a7ee2caece6649d088b1b3881588a113a28b3dc1d410bc +size 25166176 diff --git a/youtube/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20f33ea72984dd63b56c4256d077debcc1f92ce8 --- /dev/null +++ b/youtube/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9361be85508cfd54fb9c29c965f50702b28a3fee0634f9f44a79a8256151bd62 +size 4192 diff --git a/youtube/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77d1f58885ff67bc96afc6c18c31cc8207bb3858 --- /dev/null +++ b/youtube/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e40ef250f4d61072245e73a240384ac9c08fd0edab1d540734de46c2a2fe84eb +size 33554672 diff --git a/youtube/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c16d52c4ea31b57ce7a45214bff0759e80da11f --- /dev/null +++ b/youtube/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10e2b3a6e9b7bd7b67557d331d342d5de0471149ef6bdc4877519b79705def59 +size 67109160 diff --git a/youtube/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8951ce85cdf142d45a8ab12ad836bba69e58cfaa --- /dev/null +++ b/youtube/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:722fec9063635e5f998c9e376775e7d5d8695385889b4533ef562dc4318cfa3c +size 4192 diff --git a/youtube/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b59c64757b1ac85fc222d5c86c516e13fd0566c --- /dev/null +++ b/youtube/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b5cb3340baf62ea042381d761c4b3783c51fcf6cdd06e8d2ac492790f78b723 +size 8388848 diff --git a/youtube/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66182dc3ca7dd0024a398653882f399cb3fd62d9 --- /dev/null +++ b/youtube/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8617a311d98cb8ac855542f6636b71242b6344869b0fb3cfa2836980c2a8cae +size 25166176 diff --git a/youtube/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4016ad8560ad91bee8349ef98b2be7a9d05bae2 --- /dev/null +++ b/youtube/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff47b7e9a33afb75f4e29322f0c37cb6122c2681685e4c8e02feab501d903ed1 +size 4192 diff --git a/youtube/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9978cc9f88ed9e0d072e25ba90c244086495a1bb --- /dev/null +++ b/youtube/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff80746a17d5f1833911d0b286734993e192afe9437e73ef0b1d7b59abf714b +size 33554672 diff --git a/youtube/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..963a806adec3d4fe9e8da7f949ba9bfea0a866e8 --- /dev/null +++ b/youtube/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a056cfb186f9cb5fb9319198811a53d886c5a59385d3b86426225fc5d6d9b2b +size 67109160 diff --git a/youtube/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39b2ad58afac34dfe3d88f6a5d25e6e28b485c60 --- /dev/null +++ b/youtube/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03644c5570a640297b6e5d60f8aa37aae7f55589fb5d8de21b078a82b362f58a +size 4192 diff --git a/youtube/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..429cd5632644dc0f46a764b2839683aaaf9648f7 --- /dev/null +++ b/youtube/model/decoder/6/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:397177da323171c273b21573c616587a808a6a6ab6c84c13ee39c61947417859 +size 8388848 diff --git a/youtube/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75ec66ecc57b0ad75d3ab8318c70565d53962685 --- /dev/null +++ b/youtube/model/decoder/6/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd7403f54b4eb59a02cd5efa6190f80fda0271da45dad5f7f7ce8ed6eb421d8c +size 25166176 diff --git a/youtube/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ec0f3f29aaa02f6bc4a0c0d90d4047a7ce59a75 --- /dev/null +++ b/youtube/model/decoder/6/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73289e245e98eaa5ea68cca9a2a39b62e896f52d2b5779ef6166494b98c5ad40 +size 4192 diff --git a/youtube/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4642af147dea77255a4f8b21dc6163ebd156f001 --- /dev/null +++ b/youtube/model/decoder/6/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e329cdd2558b683a18535293bcf29182c87622375aff3c2cbc148d7dca30a8f +size 33554672 diff --git a/youtube/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf11f80343c38e5b13596eca500ed75374ec2910 --- /dev/null +++ b/youtube/model/decoder/6/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ee2a47c5af412068ecac6bae3f6fbfaf86dc80c0908cf0602bd02384bf334a +size 67109160 diff --git a/youtube/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c21d392b6d2de7a1444589836dbfbcd22038439a --- /dev/null +++ b/youtube/model/decoder/6/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f85e038ce40e4df66ae9a36a9bcfa00dfab86598998306e345fd2ef24ae8aae4 +size 4192 diff --git a/youtube/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13d0607affe315aaab9e8b4ce6ed576f216409ad --- /dev/null +++ b/youtube/model/decoder/7/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb402670747ba0bec545c6e147f8625bb27febaff7a575d05431a3b3ff13d779 +size 8388848 diff --git a/youtube/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ed7c620c81b77d6fbd57ba0a9d4f60f425b14c8 --- /dev/null +++ b/youtube/model/decoder/7/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abc8bbdc42ea0f6f25a1c731bc9d6976176227bb3a436cc1ecb4c17cde200a64 +size 25166176 diff --git a/youtube/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a007fe258b539e6267a75e004af97afa7b049683 --- /dev/null +++ b/youtube/model/decoder/7/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cac2543594ee7eb39e57f4842b23e6f62e9960513f58d974cb0bc832aa87368 +size 4192 diff --git a/youtube/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70a2bde24804f60eda9a51c107d4997ee773870b --- /dev/null +++ b/youtube/model/decoder/7/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9f281905cd23aa7ea60ee62b1df9e8e37eea0332e2cc8da4ef4ecf1ec7252e +size 33554672 diff --git a/youtube/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca63d98fc9c599dbed3e1c74e63615b4f488fe6b --- /dev/null +++ b/youtube/model/decoder/7/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df8d5664e747e0e187f0e9fe24c45ca69b259b47c671c8b8cfb109a3ba883fb4 +size 67109160 diff --git a/youtube/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7bed11bb2dea209a15c09965b9dfb308630a7bd --- /dev/null +++ b/youtube/model/decoder/7/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1f9d1edef2b39c3d1650676344436bec64d7ad821bd9b166016cbc17f04e8a1 +size 4192 diff --git a/youtube/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e8e94ae9e8755e9197bc81951ad39204aaa6392 --- /dev/null +++ b/youtube/model/decoder/8/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a320410c8d29d697fa4e556c7336b89e9992d35023fa6aca0e90db244d35ef +size 8388848 diff --git a/youtube/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..362b3724a43c3874e7770e33cc39cbcf8c0ba51d --- /dev/null +++ b/youtube/model/decoder/8/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bfadee38c68283113813f26808681016a4e2f634764e54b7b1c28bf5fe6b7c3 +size 25166176 diff --git a/youtube/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..013d99bf87b960d32d2c0d6af866c36239f96998 --- /dev/null +++ b/youtube/model/decoder/8/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90f7eb2284267561ba4c52ba7abe4d04a62648be2c378263469a917e52dd45f7 +size 4192 diff --git a/youtube/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3499488f040e980919de31fccceb8f58505b3ce --- /dev/null +++ b/youtube/model/decoder/8/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a69408f3df0d8cba7d0bafe4dcfe8298c41a438827286585b80eec1d29f4475 +size 33554672 diff --git a/youtube/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82488d0b6672068a8008c21c707e42578aad77aa --- /dev/null +++ b/youtube/model/decoder/8/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c01fdf9a94dc9b192d45f5c8669ab15e0f08c87956daf97d024f7ba3be200eee +size 67109160 diff --git a/youtube/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..427dea0011ff2b05a106b04b6ca03afae7a5de44 --- /dev/null +++ b/youtube/model/decoder/8/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0087263f4f7fe97ed5cf7348d114750727d561ad1fbdd7ad52462082a1d68136 +size 4192 diff --git a/youtube/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..114947afa3e10e7d4fb034ebcb971e7e6e5f2962 --- /dev/null +++ b/youtube/model/decoder/9/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66abbfc6f5ae6da3b17e36d52ddff229bfd6a3bcdad5c9277a7f9dbdc099afca +size 8388848 diff --git a/youtube/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c661d5b4c61162dfdf0b5d68f55801d6cd0f668f --- /dev/null +++ b/youtube/model/decoder/9/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8dfa2cdb474700f90b54d1e1cbd1b4e97689881016c7b1467a459d207a75c1 +size 25166176 diff --git a/youtube/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors b/youtube/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70f7ec433c2849e76c9b3c1323137c90b22a9de4 --- /dev/null +++ b/youtube/model/decoder/9/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:956c0141e652a984e468b1a23fe755f5a458fb3031bbdecc03ed65298e93cc7f +size 4192 diff --git a/youtube/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59a0b1618d5be85eb3aa0ed153033270d2b823ba --- /dev/null +++ b/youtube/model/decoder/9/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f68ff5ea86a93f4e160101f6ea9c266465f1b9f60e4520938af1c72fa76e93c5 +size 33554672 diff --git a/youtube/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..673a22da73b3dad9de4bf54b86d2aa0444e8046f --- /dev/null +++ b/youtube/model/decoder/9/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd944e6dc512ffe32d6361699ab80f0a5661251a76ae8471ee46715c7c9e391 +size 67109160 diff --git a/youtube/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors b/youtube/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c15979a7f814cab29f98226300a9e969df7a602e --- /dev/null +++ b/youtube/model/decoder/9/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8037f131c7cc48ef1c8e9e3a013cf7092843b1466046537ba4e9ad149cb423 +size 4192 diff --git a/youtube/model/final_layer_norm/pp_block/model_weight.safetensors b/youtube/model/final_layer_norm/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66703237bc79bc4916fb56dd17c353346dbf1afe --- /dev/null +++ b/youtube/model/final_layer_norm/pp_block/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b5e2321fb89b147eec331cc71d47ca4fda6ccd2338007e07f2e21065024bc3c +size 4192 diff --git a/youtube/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/youtube/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8d1101dff71bf1e28b170749c05069007ba3ba1 --- /dev/null +++ b/youtube/model/token_position_embeddings/pp_block/token_embedding/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63da663a1b319d51868fbbb4d3aa5cdb69cd1f889a78a3909a522f09cf71963e +size 205914352 diff --git a/youtube/model_config.json b/youtube/model_config.json new file mode 100644 index 0000000000000000000000000000000000000000..08d82fd8f96f9096be7a049e2b6a2bc54a846de5 --- /dev/null +++ b/youtube/model_config.json @@ -0,0 +1 @@ +{"bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "is_llama_config": true, "max_position_embeddings": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "pad_token_id": null, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "tie_word_embeddings": true, "use_cache": true, "vocab_size": 50272} \ No newline at end of file