Update README.md
Browse files
README.md
CHANGED
@@ -57,6 +57,7 @@ problem_field = "question"
|
|
57 |
solution_field = "answer"
|
58 |
dataloader_num_workers = 2
|
59 |
test_size = 0.1
|
|
|
60 |
|
61 |
[run]
|
62 |
run_name = "rl-zariman-7"
|
@@ -82,6 +83,10 @@ use_peft = true
|
|
82 |
# vllm_gpu_memory_utilization = 0.25
|
83 |
num_generations = 4
|
84 |
max_completion_length = 1024
|
|
|
|
|
|
|
|
|
85 |
|
86 |
[lora]
|
87 |
lora_target_modules = [
|
|
|
57 |
solution_field = "answer"
|
58 |
dataloader_num_workers = 2
|
59 |
test_size = 0.1
|
60 |
+
extract_hash = true
|
61 |
|
62 |
[run]
|
63 |
run_name = "rl-zariman-7"
|
|
|
83 |
# vllm_gpu_memory_utilization = 0.25
|
84 |
num_generations = 4
|
85 |
max_completion_length = 1024
|
86 |
+
num_iterations = 4 # https://github.com/huggingface/trl/releases/tag/v0.16.0
|
87 |
+
scale_rewards = false
|
88 |
+
beta = 0.0 # do not load reference model, do not minimize KL-div. Great memory saving opportunity.
|
89 |
+
epsilon_high = 0.28 # Increasing upper bound epsilon leads to higher entropy during generation, promoting better exploration
|
90 |
|
91 |
[lora]
|
92 |
lora_target_modules = [
|