George-API commited on
Commit
493e679
·
verified ·
1 Parent(s): 31e9844

Upload transformers_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. transformers_config.json +15 -17
transformers_config.json CHANGED
@@ -9,8 +9,8 @@
9
  },
10
  "training_config": {
11
  "num_train_epochs": 3,
12
- "per_device_train_batch_size": 4,
13
- "gradient_accumulation_steps": 4,
14
  "learning_rate": 2e-5,
15
  "lr_scheduler_type": "cosine",
16
  "warmup_ratio": 0.03,
@@ -31,25 +31,25 @@
31
  "group_by_length": true
32
  },
33
  "hardware_config": {
34
- "fp16": true,
35
- "bf16": false,
36
  "gradient_checkpointing": true,
37
  "device_map": "auto",
38
- "attn_implementation": "eager",
39
- "use_flash_attention": false,
40
  "memory_optimization": {
41
  "expandable_segments": true,
42
- "max_memory_fraction": 0.95
43
  }
44
  },
45
  "quantization_config": {
46
  "load_in_4bit": true,
47
- "bnb_4bit_compute_dtype": "float16",
48
  "bnb_4bit_quant_type": "nf4",
49
  "bnb_4bit_use_double_quant": true
50
  },
51
  "lora_config": {
52
- "r": 16,
53
  "lora_alpha": 32,
54
  "lora_dropout": 0.05,
55
  "bias": "none",
@@ -87,15 +87,13 @@
87
  "allgather_partitions": true,
88
  "allgather_no_copy": true
89
  },
90
- "gradient_accumulation_steps": 4,
91
  "gradient_clipping": 0.3,
92
  "fp16": {
93
- "enabled": true,
94
- "loss_scale": 0,
95
- "loss_scale_window": 1000,
96
- "initial_scale_power": 16,
97
- "hysteresis": 2,
98
- "min_loss_scale": 1
99
  },
100
  "optimizer": {
101
  "type": "AdamW",
@@ -118,7 +116,7 @@
118
  "train_batch_size": "auto",
119
  "train_micro_batch_size_per_gpu": "auto",
120
  "wall_clock_breakdown": false,
121
- "communication_data_type": "fp16",
122
  "comms_logger": {
123
  "enabled": false
124
  },
 
9
  },
10
  "training_config": {
11
  "num_train_epochs": 3,
12
+ "per_device_train_batch_size": 3,
13
+ "gradient_accumulation_steps": 2,
14
  "learning_rate": 2e-5,
15
  "lr_scheduler_type": "cosine",
16
  "warmup_ratio": 0.03,
 
31
  "group_by_length": true
32
  },
33
  "hardware_config": {
34
+ "fp16": false,
35
+ "bf16": true,
36
  "gradient_checkpointing": true,
37
  "device_map": "auto",
38
+ "attn_implementation": "flash_attention_2",
39
+ "use_flash_attention": true,
40
  "memory_optimization": {
41
  "expandable_segments": true,
42
+ "max_memory_fraction": 0.9
43
  }
44
  },
45
  "quantization_config": {
46
  "load_in_4bit": true,
47
+ "bnb_4bit_compute_dtype": "bfloat16",
48
  "bnb_4bit_quant_type": "nf4",
49
  "bnb_4bit_use_double_quant": true
50
  },
51
  "lora_config": {
52
+ "r": 8,
53
  "lora_alpha": 32,
54
  "lora_dropout": 0.05,
55
  "bias": "none",
 
87
  "allgather_partitions": true,
88
  "allgather_no_copy": true
89
  },
90
+ "gradient_accumulation_steps": 2,
91
  "gradient_clipping": 0.3,
92
  "fp16": {
93
+ "enabled": false
94
+ },
95
+ "bf16": {
96
+ "enabled": true
 
 
97
  },
98
  "optimizer": {
99
  "type": "AdamW",
 
116
  "train_batch_size": "auto",
117
  "train_micro_batch_size_per_gpu": "auto",
118
  "wall_clock_breakdown": false,
119
+ "communication_data_type": "bfloat16",
120
  "comms_logger": {
121
  "enabled": false
122
  },