Spaces:

inflaton-ai
/

logical-reasoning

Build error

dh-mc commited on Jul 14, 2024

Commit

09e6035

1 Parent(s): d176c35

tune-mgtv-qwen2_72b.sh

Files changed (4) hide show

llama-factory/config/qwen2_72b_lora_sft_4bit-p1.yaml ADDED Viewed

+### model
+model_name_or_path: Qwen/Qwen2-72B-Instruct
+### method
+stage: sft
+do_train: true
+finetuning_type: lora
+lora_target: all
+quantization_bit: 4                     # use 4-bit QLoRA
+loraplus_lr_ratio: 16.0                 # use LoRA+ with lambda=16.0
+# use_unsloth: true                       # use UnslothAI's LoRA optimization for 2x faster training
+### dataset
+dataset: alpaca_mac
+template: chatml
+cutoff_len: 4096
+max_samples: 25000
+overwrite_cache: true
+preprocessing_num_workers: 16
+### output
+output_dir: saves/qwen2-72b/lora/sft_4bit_p1_full
+logging_steps: 10
+save_steps: 88
+plot_loss: true
+overwrite_output_dir: true
+# resume_from_checkpoint: true
+### train
+per_device_train_batch_size: 32
+gradient_accumulation_steps: 8
+learning_rate: 1.0e-4
+num_train_epochs: 4.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+bf16: true
+ddp_timeout: 180000000
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 88
+report_to: wandb
+run_name: qwen2_72b_4bit_p1_full # optional

llama-factory/config/qwen2_72b_lora_sft_4bit-p2.yaml ADDED Viewed

+### model
+model_name_or_path: Qwen/Qwen2-72B-Instruct
+### method
+stage: sft
+do_train: true
+finetuning_type: lora
+lora_target: all
+quantization_bit: 4                     # use 4-bit QLoRA
+loraplus_lr_ratio: 16.0                 # use LoRA+ with lambda=16.0
+# use_unsloth: true                       # use UnslothAI's LoRA optimization for 2x faster training
+### dataset
+dataset: alpaca_mac
+template: chatml
+cutoff_len: 4096
+max_samples: 25000
+overwrite_cache: true
+preprocessing_num_workers: 16
+### output
+output_dir: saves/qwen2-72b/lora/sft_4bit_p2_full
+logging_steps: 10
+save_steps: 88
+plot_loss: true
+overwrite_output_dir: true
+# resume_from_checkpoint: true
+### train
+per_device_train_batch_size: 32
+gradient_accumulation_steps: 8
+learning_rate: 1.0e-4
+num_train_epochs: 4.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+bf16: true
+ddp_timeout: 180000000
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 88
+report_to: wandb
+run_name: qwen2_72b_4bit_p2_full # optional

scripts/tune-mgtv-qwen2_72b.sh ADDED Viewed

+#!/bin/sh
+BASEDIR=$(dirname "$0")
+cd $BASEDIR/..
+echo Current Directory:
+pwd
+BASEDIR=`pwd`
+nvidia-smi
+uname -a
+cat /etc/os-release
+lscpu
+grep MemTotal /proc/meminfo
+#pip install -r requirements.txt
+#cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes]
+export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
+export MODEL_PREFIX=qwen2_72b_lora_sft_4bit
+export CONFIG_FILE=config/$MODEL_PREFIX-p1.yaml
+export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p1.csv
+echo "Tuning with $CONFIG_FILE"
+$BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
+export CONFIG_FILE=config/$MODEL_PREFIX-p2.yaml
+export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p2.csv
+echo "Tuning with $CONFIG_FILE"
+$BASEDIR/scripts/tune-lf.sh $CONFIG_FILE

scripts/tune-mgtv.sh CHANGED Viewed

	@@ -1 +1 @@
1	- tune-mgtv-~~internlm~~.sh


1	+ tune-mgtv-qwen2_72b.sh