shoubing35
/

gpt2-qat

@@ -1,18 +1,17 @@
 ---
 base_model: openai-community/gpt2
-datasets: rajpurkar/squad
 library_name: transformers
 model_name: gpt2-qat
 tags:
 - generated_from_trainer
-- sft
 - trl
 licence: license
 ---
 # Model Card for gpt2-qat
-This model is a fine-tuned version of [openai-community/gpt2](https://huggingface.co/openai-community/gpt2) on the [rajpurkar/squad](https://huggingface.co/datasets/rajpurkar/squad) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -28,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/shoubing-apple/huggingface/runs/3bg819bc)
 This model was trained with SFT.
@@ -39,7 +38,7 @@ This model was trained with SFT.
 - Transformers: 4.52.4
 - Pytorch: 2.6.0+cu124
 - Datasets: 3.6.0
-- Tokenizers: 0.21.1
 ## Citations

 ---
 base_model: openai-community/gpt2
 library_name: transformers
 model_name: gpt2-qat
 tags:
 - generated_from_trainer
 - trl
+- sft
 licence: license
 ---
 # Model Card for gpt2-qat
+This model is a fine-tuned version of [openai-community/gpt2](https://huggingface.co/openai-community/gpt2).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/shoubing-apple/huggingface/runs/afjzjglt)
 This model was trained with SFT.
 - Transformers: 4.52.4
 - Pytorch: 2.6.0+cu124
 - Datasets: 3.6.0
+- Tokenizers: 0.21.2
 ## Citations

adapter_config.json CHANGED Viewed

@@ -15,7 +15,7 @@
   "loftq_config": {},
   "lora_alpha": 32,
   "lora_bias": false,
-  "lora_dropout": 0.0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": [
@@ -26,9 +26,9 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "c_fc",
-    "c_attn",
-    "c_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

   "loftq_config": {},
   "lora_alpha": 32,
   "lora_bias": false,
+  "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": [
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "c_proj",
     "c_fc",
+    "c_attn"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91538ea8849e8c41ef014ee2023e93bcf3b83e5bc72fc44e49721073c2ece758
 size 9449344

 version https://git-lfs.github.com/spec/v1
+oid sha256:876eb5b688ece899825f4020d4c01c332367893bc951b01e32e98b80606861c9
 size 9449344

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5ca29b0a7fd4bc87f72ad65d9c892946b134c6918b192864fab23f0362017db
 size 5752

 version https://git-lfs.github.com/spec/v1
+oid sha256:87c1755905df6180d4ab8a0eb718e64129434adb9d08a083a83430d2c722ba15
 size 5752