Laurie
/

ChatGLM-Efficient-Tuning-RM

Model card Files Files and versions Community

Laurie commited on May 5, 2023

Commit

4dad3de

•

1 Parent(s): ce80594

Update README.md

Files changed (1) hide show

README.md +17 -1

README.md CHANGED Viewed

@@ -9,4 +9,20 @@
     # 终端输入
     CUDA_VISIBLE_DEVICES=0 python src/infer.py \
-    --checkpoint_dir path_to_checkpoint # repo files

     # 终端输入
     CUDA_VISIBLE_DEVICES=0 python src/infer.py \
+    --checkpoint_dir path_to_checkpoint # repo files
+    # PPO训练，创建文件夹path_to_rm_checkpoint，将此repo的文件存入其中，运行下列命令，3090预估50小时
+    CUDA_VISIBLE_DEVICES=0 python src/train_ppo.py \
+    --do_train \
+    --dataset alpaca_gpt4_en \
+    --finetuning_type lora \
+    --reward_model path_to_rm_checkpoint \
+    --output_dir path_to_ppo_checkpoint \
+    --per_device_train_batch_size 4 \
+    --gradient_accumulation_steps 4 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 1000 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 1.0 \
+    --fp16