noeloco commited on
Commit
1b39776
·
verified ·
1 Parent(s): 7822d67

Training in progress, step 70

Browse files
adapter_config.json CHANGED
@@ -6,27 +6,29 @@
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
 
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
  "loftq_config": {},
12
- "lora_alpha": 16,
13
  "lora_dropout": 0.05,
14
  "megatron_config": null,
15
  "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
- "r": 8,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
  "up_proj",
 
 
23
  "down_proj",
24
  "gate_proj",
25
  "o_proj",
26
- "q_proj",
27
- "k_proj",
28
  "v_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
 
31
  "use_rslora": false
32
  }
 
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
9
+ "layer_replication": null,
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 8,
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 16,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
  "up_proj",
24
+ "q_proj",
25
+ "k_proj",
26
  "down_proj",
27
  "gate_proj",
28
  "o_proj",
 
 
29
  "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
  "use_rslora": false
34
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9314786846103c79129521d1e6078554172c9bbffc72566885f6b2aff8775f2c
3
- size 80013120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cf94ba2d163468bcd1961b5ffd420d151880dd5fda3a4fb93d1152df4a21fa3
3
+ size 80014016
config.json CHANGED
@@ -18,15 +18,18 @@
18
  "num_key_value_heads": 32,
19
  "pretraining_tp": 1,
20
  "quantization_config": {
21
- "bnb_4bit_compute_dtype": "float32",
22
- "bnb_4bit_quant_type": "fp4",
23
- "bnb_4bit_use_double_quant": false,
 
 
 
24
  "llm_int8_enable_fp32_cpu_offload": false,
25
  "llm_int8_has_fp16_weight": false,
26
  "llm_int8_skip_modules": null,
27
  "llm_int8_threshold": 6.0,
28
- "load_in_4bit": false,
29
- "load_in_8bit": true,
30
  "quant_method": "bitsandbytes"
31
  },
32
  "rms_norm_eps": 1e-05,
@@ -34,7 +37,7 @@
34
  "rope_theta": 1000000,
35
  "tie_word_embeddings": false,
36
  "torch_dtype": "bfloat16",
37
- "transformers_version": "4.37.0",
38
  "use_cache": false,
39
  "vocab_size": 32016
40
  }
 
18
  "num_key_value_heads": 32,
19
  "pretraining_tp": 1,
20
  "quantization_config": {
21
+ "_load_in_4bit": true,
22
+ "_load_in_8bit": false,
23
+ "bnb_4bit_compute_dtype": "bfloat16",
24
+ "bnb_4bit_quant_storage": "bfloat16",
25
+ "bnb_4bit_quant_type": "nf4",
26
+ "bnb_4bit_use_double_quant": true,
27
  "llm_int8_enable_fp32_cpu_offload": false,
28
  "llm_int8_has_fp16_weight": false,
29
  "llm_int8_skip_modules": null,
30
  "llm_int8_threshold": 6.0,
31
+ "load_in_4bit": true,
32
+ "load_in_8bit": false,
33
  "quant_method": "bitsandbytes"
34
  },
35
  "rms_norm_eps": 1e-05,
 
37
  "rope_theta": 1000000,
38
  "tie_word_embeddings": false,
39
  "torch_dtype": "bfloat16",
40
+ "transformers_version": "4.40.0.dev0",
41
  "use_cache": false,
42
  "vocab_size": 32016
43
  }
tokenizer_config.json CHANGED
@@ -80,7 +80,6 @@
80
  "suffix_first": false,
81
  "suffix_token": "▁<SUF>",
82
  "tokenizer_class": "CodeLlamaTokenizer",
83
- "trust_remote_code": false,
84
  "unk_token": "<unk>",
85
  "use_default_system_prompt": false,
86
  "use_fast": true
 
80
  "suffix_first": false,
81
  "suffix_token": "▁<SUF>",
82
  "tokenizer_class": "CodeLlamaTokenizer",
 
83
  "unk_token": "<unk>",
84
  "use_default_system_prompt": false,
85
  "use_fast": true
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cf34011f3480c36bc63792fde0cc4fd8f04ef7d4627f8f79baf19897b632d17
3
- size 4795
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1a5e882ae94a52ef640dccf5bce9f5f80fa18103f4e60fb6949adaf0f8b1ac4
3
+ size 5816