Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

README.md +18 -30
model-00001-of-00009.safetensors +1 -1
model-00002-of-00009.safetensors +1 -1
model-00003-of-00009.safetensors +1 -1
model-00004-of-00009.safetensors +1 -1
model-00005-of-00009.safetensors +1 -1
model-00006-of-00009.safetensors +1 -1
model-00007-of-00009.safetensors +1 -1
model-00008-of-00009.safetensors +1 -1
model-00009-of-00009.safetensors +1 -1
model.safetensors.index.json +1 -0
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -7,7 +7,7 @@ tags:
 datasets:
 - HuggingFaceH4/Multilingual-Thinking
 model-index:
-- name: outputs/gpt-oss-out-fft/
   results: []
 ---
@@ -24,47 +24,34 @@ use_kernels: true
 model_quantization_config: Mxfp4Config
 model_quantization_config_kwargs:
   dequantize: true
-  block_size: 32        # default, matches the OCP spec
-  strict: false         # fallback to fp16 on any odd layers
 plugins:
   - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
 experimental_skip_move_to_device: true  # prevent OOM by NOT putting model to GPU before sharding
 datasets:
-  # - path: winglian/pirate-ultrachat-10k
-  #   type: chat_template
-  #   split: train
   - path: HuggingFaceH4/Multilingual-Thinking
     type: chat_template
     field_thinking: thinking
     template_thinking_key: thinking
 dataset_prepared_path: last_run_prepared
 val_set_size: 0
-output_dir: ./outputs/gpt-oss-out-fft/
-sequence_len: 8192
 sample_packing: true
 pad_to_sequence_len: true
-# adapter: lora
-# lora_r: 8
-# lora_alpha: 16
-# lora_dropout: 0.0
-# lora_target_linear: true
 wandb_project: gpt-oss-20b
-wandb_name: multilingual-reasoning
 gradient_accumulation_steps: 1
 micro_batch_size: 2
 num_epochs: 1
-#optimizer: adamw_torch_8bit
-optimizer: adamw_torch_fused  # 8bit optimizers do not work with FSDP2 offload
 lr_scheduler: constant_with_warmup
 learning_rate: 2e-5
@@ -75,7 +62,7 @@ flash_attention: true
 attn_implementation: kernels-community/vllm-flash-attn3
 gradient_checkpointing: true
-activation_offloading: true
 logging_steps: 1
 saves_per_epoch: 1
@@ -87,20 +74,21 @@ eot_tokens:
   - "<|end|>"
   - "<|return|>"
-fsdp_version: 2
-fsdp_config:
-  offload_params: true
-  state_dict_type: SHARDED_STATE_DICT
-  auto_wrap_policy: TRANSFORMER_BASED_WRAP
-  transformer_layer_cls_to_wrap: GptOssDecoderLayer
-  reshard_after_forward: true
-  # cpu_ram_efficient_loading: false
 ```
 </details><br>
-# outputs/gpt-oss-out-fft/
 This model is a fine-tuned version of [openai/gpt-oss-20b](https://huggingface.co/openai/gpt-oss-20b) on the HuggingFaceH4/Multilingual-Thinking dataset.

 datasets:
 - HuggingFaceH4/Multilingual-Thinking
 model-index:
+- name: workspace/data/outputs/gpt-oss-out/
   results: []
 ---
 model_quantization_config: Mxfp4Config
 model_quantization_config_kwargs:
   dequantize: true
 plugins:
   - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
 experimental_skip_move_to_device: true  # prevent OOM by NOT putting model to GPU before sharding
 datasets:
   - path: HuggingFaceH4/Multilingual-Thinking
     type: chat_template
     field_thinking: thinking
     template_thinking_key: thinking
 dataset_prepared_path: last_run_prepared
 val_set_size: 0
+output_dir: /workspace/data/outputs/gpt-oss-out/
+sequence_len: 8196
 sample_packing: true
 pad_to_sequence_len: true
 wandb_project: gpt-oss-20b
+wandb_name: multilingual-reasoning-fft
 gradient_accumulation_steps: 1
 micro_batch_size: 2
 num_epochs: 1
+optimizer: adamw_torch_fused
 lr_scheduler: constant_with_warmup
 learning_rate: 2e-5
 attn_implementation: kernels-community/vllm-flash-attn3
 gradient_checkpointing: true
+#activation_offloading: true
 logging_steps: 1
 saves_per_epoch: 1
   - "<|end|>"
   - "<|return|>"
+deepspeed: /pi-workspace/zero3.json
+# fsdp_version: 2
+# fsdp_config:
+#   offload_params: false
+#   state_dict_type: SHARDED_STATE_DICT
+#   auto_wrap_policy: TRANSFORMER_BASED_WRAP
+#   transformer_layer_cls_to_wrap: GptOssDecoderLayer
+#   reshard_after_forward: true
+# #  cpu_ram_efficient_loading: true
 ```
 </details><br>
+# workspace/data/outputs/gpt-oss-out/
 This model is a fine-tuned version of [openai/gpt-oss-20b](https://huggingface.co/openai/gpt-oss-20b) on the HuggingFaceH4/Multilingual-Thinking dataset.

model-00001-of-00009.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e5b710a03085747c068f0b9fc83d8e43f2f9a3ec3d6bc903594fbdf2899d4cd
 size 4504304664

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e66b52040a9a0777a9c94ad800d7dff662a58c96129efbfd77b54ef553560bd
 size 4504304664

model-00002-of-00009.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b773934f3578f0a413c72fe60d9496996ccad4adb577d725d3ed0d6581980948
 size 4939127656

 version https://git-lfs.github.com/spec/v1
+oid sha256:9cbe12bb05ec8772ff87808f8916a52f9c330075813be3a978e6b968ba2aa52b
 size 4939127656

model-00003-of-00009.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:523ef402ee9d1873d922df48d1db37a27e4e77c7750806abfaf11e3c048b4e70
 size 4939127656

 version https://git-lfs.github.com/spec/v1
+oid sha256:77057a058259fdad6458a2cc253c248a9a308c43390b2e1eaeff108144d1502e
 size 4939127656

model-00004-of-00009.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd562d033a8c9eed46972cebff9957d3b2fc2fb210cad3820d766e0713b61104
 size 4939127680

 version https://git-lfs.github.com/spec/v1
+oid sha256:72cc1b6291eec305ae8b01a4f7ab53eb15489496d4fe36b7613d822cf307ccf2
 size 4939127680

model-00005-of-00009.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd2a2cad8877cba299fc6998489427dcf863550a4f7a0e583f120f4711ad9628
 size 4939127704

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c0775267ff51f292d952a8a06d7d7a4cb709ea9fa48433f5dde5dc1ead0d84f
 size 4939127704

model-00006-of-00009.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6222ca7f362227804c67f68f9405ba05f11afb6c270a59300f95ce9adf40e74f
 size 4939127704

 version https://git-lfs.github.com/spec/v1
+oid sha256:89e821b90576a93e94a9457d21012a184e3d3336cd8a499b5040dcc9fa8791c8
 size 4939127704

model-00007-of-00009.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab19707e4163f4ec06d11f82fdd8aaec555b06044f91508411fec3603db3f0d6
 size 4939127704

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f2b0083866777b6ef7405acca40a46242005bbebb423f09bd3223880cc94ee8
 size 4939127704

model-00008-of-00009.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ac63ac294c4ab76a754786e597ae5a6177b8199cea60b6116a07a43007cfae8
 size 4939127704

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b97d39f740e42cf0bbe9e15a6ad59dae1b93396ca2d85899c4bb9d0b6882192
 size 4939127704

model-00009-of-00009.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d35d808eb3800c3ed4c057f09be24e86bfefc07820ee458486b2048baa0d3d0
 size 2751362856

 version https://git-lfs.github.com/spec/v1
+oid sha256:549eb052e40666b8b016c7a6db7031e752785cec977beed6a315fea12489caef
 size 2751362856

model.safetensors.index.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
   "metadata": {
     "total_size": 41829514368
   },
   "weight_map": {

 {
   "metadata": {
+    "total_parameters": 335424,
     "total_size": 41829514368
   },
   "weight_map": {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fa4b843cb6bcb99be8589958426e121040c1346b3a28c5c5062def1beab2d33
-size 8723

 version https://git-lfs.github.com/spec/v1
+oid sha256:29e1c313582c8dd3b40fa45dcf6d6482aeabf058adc5837643ba6a5b2ecdb37c
+size 9489