attn-signs commited on
Commit
efa36ac
·
verified ·
1 Parent(s): bea32b1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -0
README.md CHANGED
@@ -57,6 +57,7 @@ problem_field = "question"
57
  solution_field = "answer"
58
  dataloader_num_workers = 2
59
  test_size = 0.1
 
60
 
61
  [run]
62
  run_name = "rl-zariman-7"
@@ -82,6 +83,10 @@ use_peft = true
82
  # vllm_gpu_memory_utilization = 0.25
83
  num_generations = 4
84
  max_completion_length = 1024
 
 
 
 
85
 
86
  [lora]
87
  lora_target_modules = [
 
57
  solution_field = "answer"
58
  dataloader_num_workers = 2
59
  test_size = 0.1
60
+ extract_hash = true
61
 
62
  [run]
63
  run_name = "rl-zariman-7"
 
83
  # vllm_gpu_memory_utilization = 0.25
84
  num_generations = 4
85
  max_completion_length = 1024
86
+ num_iterations = 4 # https://github.com/huggingface/trl/releases/tag/v0.16.0
87
+ scale_rewards = false
88
+ beta = 0.0 # do not load reference model, do not minimize KL-div. Great memory saving opportunity.
89
+ epsilon_high = 0.28 # Increasing upper bound epsilon leads to higher entropy during generation, promoting better exploration
90
 
91
  [lora]
92
  lora_target_modules = [