Spaces:
Build error
Build error
ready eval qwen2-72b
Browse files
llama-factory/config/mac_template_qwen2_72b.yaml
DELETED
|
@@ -1,43 +0,0 @@
|
|
| 1 |
-
### model
|
| 2 |
-
model_name_or_path: ORG_NAME/MODEL_NAME
|
| 3 |
-
quantization_bit: 4
|
| 4 |
-
|
| 5 |
-
### method
|
| 6 |
-
stage: sft
|
| 7 |
-
do_train: true
|
| 8 |
-
finetuning_type: lora
|
| 9 |
-
lora_target: all
|
| 10 |
-
|
| 11 |
-
### dataset
|
| 12 |
-
dataset: alpaca_mac
|
| 13 |
-
template: CHAT_TEMPLATE
|
| 14 |
-
cutoff_len: 1024
|
| 15 |
-
max_samples: 4528
|
| 16 |
-
overwrite_cache: true
|
| 17 |
-
preprocessing_num_workers: 16
|
| 18 |
-
|
| 19 |
-
### output
|
| 20 |
-
output_dir: saves/MODEL_NAME
|
| 21 |
-
logging_steps: 50
|
| 22 |
-
save_steps: 560
|
| 23 |
-
plot_loss: true
|
| 24 |
-
# overwrite_output_dir: true
|
| 25 |
-
|
| 26 |
-
### train
|
| 27 |
-
per_device_train_batch_size: 1
|
| 28 |
-
gradient_accumulation_steps: 1
|
| 29 |
-
learning_rate: 1.0e-4
|
| 30 |
-
num_train_epochs: 6.0
|
| 31 |
-
lr_scheduler_type: cosine
|
| 32 |
-
warmup_ratio: 0.1
|
| 33 |
-
bf16: true
|
| 34 |
-
ddp_timeout: 180000000
|
| 35 |
-
|
| 36 |
-
### eval
|
| 37 |
-
val_size: 0.01
|
| 38 |
-
per_device_eval_batch_size: 1
|
| 39 |
-
eval_strategy: steps
|
| 40 |
-
eval_steps: 560
|
| 41 |
-
|
| 42 |
-
report_to: wandb
|
| 43 |
-
run_name: MODEL_NAME_lora_sft
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm_toolkit/eval_epochs.py
CHANGED
|
@@ -32,6 +32,7 @@ def evaluate_model_all_epochs(
|
|
| 32 |
end_epoch=-1,
|
| 33 |
batch_size=1,
|
| 34 |
max_new_tokens=300,
|
|
|
|
| 35 |
device="cuda",
|
| 36 |
):
|
| 37 |
if adapter_path_base is None:
|
|
@@ -47,7 +48,9 @@ def evaluate_model_all_epochs(
|
|
| 47 |
]
|
| 48 |
|
| 49 |
subdirs = sorted(subdirs, key=lambda x: int(x.split("-")[-1]))
|
| 50 |
-
num_train_epochs = len(subdirs)
|
|
|
|
|
|
|
| 51 |
print(f"found {num_train_epochs} checkpoints: {subdirs}")
|
| 52 |
|
| 53 |
if end_epoch < 0 or end_epoch > num_train_epochs:
|
|
@@ -89,6 +92,7 @@ def evaluate_model_all_epochs(
|
|
| 89 |
if __name__ == "__main__":
|
| 90 |
model_name = os.getenv("MODEL_NAME")
|
| 91 |
adapter_path_base = os.getenv("ADAPTER_PATH_BASE")
|
|
|
|
| 92 |
start_epoch = int(os.getenv("START_EPOCH", 1))
|
| 93 |
end_epoch = os.getenv("END_EPOCH", -1)
|
| 94 |
load_in_4bit = os.getenv("LOAD_IN_4BIT", "true").lower() == "true"
|
|
@@ -118,7 +122,8 @@ if __name__ == "__main__":
|
|
| 118 |
print(f"(0) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
| 119 |
print(f"{start_gpu_memory} GB of memory reserved.")
|
| 120 |
|
| 121 |
-
model, tokenizer = load_model(model_name, load_in_4bit=load_in_4bit)
|
|
|
|
| 122 |
|
| 123 |
datasets = load_translation_dataset(data_path, tokenizer, num_shots=0)
|
| 124 |
print_row_details(datasets["test"].to_pandas())
|
|
@@ -139,6 +144,7 @@ if __name__ == "__main__":
|
|
| 139 |
adapter_path_base,
|
| 140 |
datasets["test"],
|
| 141 |
results_path,
|
|
|
|
| 142 |
start_epoch=start_epoch,
|
| 143 |
end_epoch=end_epoch,
|
| 144 |
device=device,
|
|
|
|
| 32 |
end_epoch=-1,
|
| 33 |
batch_size=1,
|
| 34 |
max_new_tokens=300,
|
| 35 |
+
checkpoints_per_epoch=1,
|
| 36 |
device="cuda",
|
| 37 |
):
|
| 38 |
if adapter_path_base is None:
|
|
|
|
| 48 |
]
|
| 49 |
|
| 50 |
subdirs = sorted(subdirs, key=lambda x: int(x.split("-")[-1]))
|
| 51 |
+
num_train_epochs = len(subdirs) // checkpoints_per_epoch
|
| 52 |
+
if checkpoints_per_epoch > 1:
|
| 53 |
+
subdirs = subdirs[checkpoints_per_epoch - 1 :: checkpoints_per_epoch]
|
| 54 |
print(f"found {num_train_epochs} checkpoints: {subdirs}")
|
| 55 |
|
| 56 |
if end_epoch < 0 or end_epoch > num_train_epochs:
|
|
|
|
| 92 |
if __name__ == "__main__":
|
| 93 |
model_name = os.getenv("MODEL_NAME")
|
| 94 |
adapter_path_base = os.getenv("ADAPTER_PATH_BASE")
|
| 95 |
+
checkpoints_per_epoch = int(os.getenv("CHECKPOINTS_PER_EPOCH", 1))
|
| 96 |
start_epoch = int(os.getenv("START_EPOCH", 1))
|
| 97 |
end_epoch = os.getenv("END_EPOCH", -1)
|
| 98 |
load_in_4bit = os.getenv("LOAD_IN_4BIT", "true").lower() == "true"
|
|
|
|
| 122 |
print(f"(0) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
| 123 |
print(f"{start_gpu_memory} GB of memory reserved.")
|
| 124 |
|
| 125 |
+
# model, tokenizer = load_model(model_name, load_in_4bit=load_in_4bit)
|
| 126 |
+
model, tokenizer = None, None
|
| 127 |
|
| 128 |
datasets = load_translation_dataset(data_path, tokenizer, num_shots=0)
|
| 129 |
print_row_details(datasets["test"].to_pandas())
|
|
|
|
| 144 |
adapter_path_base,
|
| 145 |
datasets["test"],
|
| 146 |
results_path,
|
| 147 |
+
checkpoints_per_epoch=checkpoints_per_epoch,
|
| 148 |
start_epoch=start_epoch,
|
| 149 |
end_epoch=end_epoch,
|
| 150 |
device=device,
|
scripts/eval-4gpu.sh
CHANGED
|
@@ -13,13 +13,19 @@ grep MemTotal /proc/meminfo
|
|
| 13 |
|
| 14 |
#pip install torch torchvision torchaudio
|
| 15 |
|
| 16 |
-
pip install -r requirements.txt
|
| 17 |
|
| 18 |
export BATCH_SIZE=1
|
| 19 |
export LOAD_IN_4BIT=true
|
| 20 |
|
| 21 |
-
|
| 22 |
|
| 23 |
# ./scripts/eval-model.sh shenzhi-wang/Llama3.1-70B-Chinese-Chat
|
| 24 |
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
#pip install torch torchvision torchaudio
|
| 15 |
|
| 16 |
+
# pip install -r requirements.txt
|
| 17 |
|
| 18 |
export BATCH_SIZE=1
|
| 19 |
export LOAD_IN_4BIT=true
|
| 20 |
|
| 21 |
+
# ./scripts/eval-model.sh Qwen/Qwen2-72B-Instruct
|
| 22 |
|
| 23 |
# ./scripts/eval-model.sh shenzhi-wang/Llama3.1-70B-Chinese-Chat
|
| 24 |
|
| 25 |
+
export CHECKPOINTS_PER_EPOCH=4
|
| 26 |
+
./scripts/eval-epochs.sh Qwen Qwen2-72B-Instruct
|
| 27 |
+
|
| 28 |
+
# export CHECKPOINTS_PER_EPOCH=1
|
| 29 |
+
# ./scripts/eval-epochs.sh shenzhi-wang Llama3.1-70B-Chinese-Chat
|
| 30 |
+
|
| 31 |
+
# ./scripts/eval-rpp.sh shenzhi-wang Llama3.1-70B-Chinese-Chat checkpoint-210
|