Update README.md
Browse files
README.md
CHANGED
@@ -56,7 +56,84 @@ Utilized HF.Accelerator
|
|
56 |
--==[MyLLM](https://github.com/Raumberg/myllm)==--
|
57 |
|
58 |
### Model configuration (MyLLM Framework)
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
### Using the model / Как запустить?
|
62 |
|
|
|
56 |
--==[MyLLM](https://github.com/Raumberg/myllm)==--
|
57 |
|
58 |
### Model configuration (MyLLM Framework)
|
59 |
+
```toml
|
60 |
+
[model]
|
61 |
+
model_name_or_path = "attn-signs/GPTR-8-base"
|
62 |
+
|
63 |
+
[datasets]
|
64 |
+
dataset = "d0rj/gsm8k-ru"
|
65 |
+
problem_field = "question"
|
66 |
+
solution_field = "answer"
|
67 |
+
dataloader_num_workers = 2
|
68 |
+
test_size = 0.1
|
69 |
+
extract_hash = true
|
70 |
+
|
71 |
+
[run]
|
72 |
+
run_name = "rl-gptr-8"
|
73 |
+
report_to = "wandb"
|
74 |
+
logging_first_step = true
|
75 |
+
logging_steps = 1
|
76 |
+
save_strategy = "steps"
|
77 |
+
save_steps = 500
|
78 |
+
save_total_limit = 5
|
79 |
+
output_dir = "models/attn-signs-gptr-8-grpo"
|
80 |
+
project_name = "rl-gptr"
|
81 |
+
|
82 |
+
[training]
|
83 |
+
num_train_epochs = 1
|
84 |
+
per_device_train_batch_size = 2
|
85 |
+
learning_rate = 0.00001
|
86 |
+
bf16 = true
|
87 |
+
seed = 42
|
88 |
+
use_peft = true
|
89 |
+
|
90 |
+
[grpo]
|
91 |
+
use_vllm = true
|
92 |
+
num_generations = 2
|
93 |
+
max_completion_length = 2048
|
94 |
+
num_iterations = 1 # https://github.com/huggingface/trl/releases/tag/v0.16.0
|
95 |
+
scale_rewards = false # should be default var
|
96 |
+
beta = 0.04 # reference model beta in vllm
|
97 |
+
epsilon_high = 0.28 # Increasing upper bound epsilon leads to higher entropy during generation, promoting better exploration
|
98 |
+
preload_rm = false
|
99 |
+
|
100 |
+
[lora]
|
101 |
+
lora_target_modules = [
|
102 |
+
"k_proj",
|
103 |
+
"v_proj",
|
104 |
+
"q_proj",
|
105 |
+
"o_proj",
|
106 |
+
"gate_proj",
|
107 |
+
"up_proj",
|
108 |
+
"down_proj",
|
109 |
+
]
|
110 |
+
lora_r = 32
|
111 |
+
lora_alpha = 64
|
112 |
+
|
113 |
+
[fusion]
|
114 |
+
use_liger = false
|
115 |
+
attn_implementation = "flash_attention_2"
|
116 |
+
|
117 |
+
[tokenizer]
|
118 |
+
eos_token = "</s>"
|
119 |
+
pad_token = "<unk>"
|
120 |
+
chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<s>' + message['role'] + '\n' + message['content'] + '</s>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<s>assistant\n' }}{% endif %}"
|
121 |
+
force_chat_template = true
|
122 |
+
added_special_tokens = [
|
123 |
+
"<think>",
|
124 |
+
"</think>"
|
125 |
+
]
|
126 |
+
system_prompt = """
|
127 |
+
[MODE: Reflection]
|
128 |
+
"""
|
129 |
+
```
|
130 |
+
### Rewards:
|
131 |
+
- Equation structure reward
|
132 |
+
- Correctness reward
|
133 |
+
- Multilingual coherence reward
|
134 |
+
- Strict chinese penalty
|
135 |
+
- Format reward
|
136 |
+
- Russian purity reward
|
137 |
|
138 |
### Using the model / Как запустить?
|
139 |
|