Training in progress, step 30

Files changed (6) hide show

README.md CHANGED Viewed

@@ -1,6 +1,5 @@
 ---
 base_model: meta-llama/Llama-3.2-1B-Instruct
-datasets: shoubing35/ones_digit_sft_dataset
 library_name: transformers
 model_name: llama-1B-sft
 tags:
@@ -12,7 +11,7 @@ licence: license
 # Model Card for llama-1B-sft
-This model is a fine-tuned version of [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) on the [shoubing35/ones_digit_sft_dataset](https://huggingface.co/datasets/shoubing35/ones_digit_sft_dataset) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

 ---
 base_model: meta-llama/Llama-3.2-1B-Instruct
 library_name: transformers
 model_name: llama-1B-sft
 tags:
 # Model Card for llama-1B-sft
+This model is a fine-tuned version of [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

adapter_config.json CHANGED Viewed

@@ -14,7 +14,7 @@
   "loftq_config": {},
   "lora_alpha": 32,
   "lora_bias": false,
-  "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": [
@@ -25,7 +25,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
     "v_proj"
   ],
   "task_type": "CAUSAL_LM",

   "loftq_config": {},
   "lora_alpha": 32,
   "lora_bias": false,
+  "lora_dropout": 0.0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": [
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_proj",
+    "up_proj",
     "q_proj",
+    "down_proj",
+    "o_proj",
+    "k_proj",
     "v_proj"
   ],
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d604753d64db2d0f74e32e96ca042320bbabcb8c62844a536db1d6990dbc611a
-size 1057497896

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d56b190a3917505f5e82b811ceafa1ead1c177e29d5c3d7d38224d31ea572c2
+size 1095792688

runs/Apr13_22-23-48_239d878514d9/events.out.tfevents.1744583038.239d878514d9.15324.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e774765fa96acba51f8d3b74eb29220191afdeca5ab36f845b49ecc9dd0b8a3
+size 9772

runs/Apr13_22-25-22_239d878514d9/events.out.tfevents.1744583132.239d878514d9.15791.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:09563963d1748518e08b9bcd9a75340897731e877f360260849be8a59d009902
+size 9772

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fea5f0fc6a51845a0fd1d6d5f2ab0761db1fa6b21b11a7ff0567988de1562e36
 size 5688

 version https://git-lfs.github.com/spec/v1
+oid sha256:06ca4866cf342be84ada3a4bb2c3fcbc06949be107896fed2d818e410b2f7820
 size 5688