Spaces:
Build error
Build error
updated logging/save/eval steps
Browse files
llama-factory/config/qwen2_72b_lora_sft_4bit-p1.yaml
CHANGED
|
@@ -21,15 +21,15 @@ preprocessing_num_workers: 16
|
|
| 21 |
|
| 22 |
### output
|
| 23 |
output_dir: saves/qwen2-72b/lora/sft_4bit_p1_full
|
| 24 |
-
logging_steps:
|
| 25 |
-
save_steps:
|
| 26 |
plot_loss: true
|
| 27 |
overwrite_output_dir: true
|
| 28 |
# resume_from_checkpoint: true
|
| 29 |
|
| 30 |
### train
|
| 31 |
-
per_device_train_batch_size:
|
| 32 |
-
gradient_accumulation_steps:
|
| 33 |
learning_rate: 1.0e-4
|
| 34 |
num_train_epochs: 3.0
|
| 35 |
lr_scheduler_type: cosine
|
|
@@ -41,7 +41,7 @@ ddp_timeout: 180000000
|
|
| 41 |
val_size: 0.1
|
| 42 |
per_device_eval_batch_size: 1
|
| 43 |
eval_strategy: steps
|
| 44 |
-
eval_steps:
|
| 45 |
|
| 46 |
report_to: wandb
|
| 47 |
run_name: qwen2_72b_4bit_p1_full # optional
|
|
|
|
| 21 |
|
| 22 |
### output
|
| 23 |
output_dir: saves/qwen2-72b/lora/sft_4bit_p1_full
|
| 24 |
+
logging_steps: 100
|
| 25 |
+
save_steps: 2109
|
| 26 |
plot_loss: true
|
| 27 |
overwrite_output_dir: true
|
| 28 |
# resume_from_checkpoint: true
|
| 29 |
|
| 30 |
### train
|
| 31 |
+
per_device_train_batch_size: 1
|
| 32 |
+
gradient_accumulation_steps: 8
|
| 33 |
learning_rate: 1.0e-4
|
| 34 |
num_train_epochs: 3.0
|
| 35 |
lr_scheduler_type: cosine
|
|
|
|
| 41 |
val_size: 0.1
|
| 42 |
per_device_eval_batch_size: 1
|
| 43 |
eval_strategy: steps
|
| 44 |
+
eval_steps: 2109
|
| 45 |
|
| 46 |
report_to: wandb
|
| 47 |
run_name: qwen2_72b_4bit_p1_full # optional
|
llama-factory/config/qwen2_72b_lora_sft_4bit-p2.yaml
CHANGED
|
@@ -21,14 +21,14 @@ preprocessing_num_workers: 16
|
|
| 21 |
|
| 22 |
### output
|
| 23 |
output_dir: saves/qwen2-72b/lora/sft_4bit_p2_full
|
| 24 |
-
logging_steps:
|
| 25 |
-
save_steps:
|
| 26 |
plot_loss: true
|
| 27 |
overwrite_output_dir: true
|
| 28 |
# resume_from_checkpoint: true
|
| 29 |
|
| 30 |
### train
|
| 31 |
-
per_device_train_batch_size:
|
| 32 |
gradient_accumulation_steps: 8
|
| 33 |
learning_rate: 1.0e-4
|
| 34 |
num_train_epochs: 3.0
|
|
@@ -41,7 +41,7 @@ ddp_timeout: 180000000
|
|
| 41 |
val_size: 0.1
|
| 42 |
per_device_eval_batch_size: 1
|
| 43 |
eval_strategy: steps
|
| 44 |
-
eval_steps:
|
| 45 |
|
| 46 |
report_to: wandb
|
| 47 |
run_name: qwen2_72b_4bit_p2_full # optional
|
|
|
|
| 21 |
|
| 22 |
### output
|
| 23 |
output_dir: saves/qwen2-72b/lora/sft_4bit_p2_full
|
| 24 |
+
logging_steps: 100
|
| 25 |
+
save_steps: 2109
|
| 26 |
plot_loss: true
|
| 27 |
overwrite_output_dir: true
|
| 28 |
# resume_from_checkpoint: true
|
| 29 |
|
| 30 |
### train
|
| 31 |
+
per_device_train_batch_size: 1
|
| 32 |
gradient_accumulation_steps: 8
|
| 33 |
learning_rate: 1.0e-4
|
| 34 |
num_train_epochs: 3.0
|
|
|
|
| 41 |
val_size: 0.1
|
| 42 |
per_device_eval_batch_size: 1
|
| 43 |
eval_strategy: steps
|
| 44 |
+
eval_steps: 2109
|
| 45 |
|
| 46 |
report_to: wandb
|
| 47 |
run_name: qwen2_72b_4bit_p2_full # optional
|