Update README.md
Browse files
README.md
CHANGED
@@ -9,4 +9,20 @@
|
|
9 |
|
10 |
# 终端输入
|
11 |
CUDA_VISIBLE_DEVICES=0 python src/infer.py \
|
12 |
-
--checkpoint_dir path_to_checkpoint # repo files
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
# 终端输入
|
11 |
CUDA_VISIBLE_DEVICES=0 python src/infer.py \
|
12 |
+
--checkpoint_dir path_to_checkpoint # repo files
|
13 |
+
|
14 |
+
# PPO训练,创建文件夹path_to_rm_checkpoint,将此repo的文件存入其中,运行下列命令,3090预估50小时
|
15 |
+
CUDA_VISIBLE_DEVICES=0 python src/train_ppo.py \
|
16 |
+
--do_train \
|
17 |
+
--dataset alpaca_gpt4_en \
|
18 |
+
--finetuning_type lora \
|
19 |
+
--reward_model path_to_rm_checkpoint \
|
20 |
+
--output_dir path_to_ppo_checkpoint \
|
21 |
+
--per_device_train_batch_size 4 \
|
22 |
+
--gradient_accumulation_steps 4 \
|
23 |
+
--lr_scheduler_type cosine \
|
24 |
+
--logging_steps 10 \
|
25 |
+
--save_steps 1000 \
|
26 |
+
--learning_rate 5e-5 \
|
27 |
+
--num_train_epochs 1.0 \
|
28 |
+
--fp16
|