attn-signs commited on
Commit
97b3ae5
·
verified ·
1 Parent(s): 261cda6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +78 -1
README.md CHANGED
@@ -56,7 +56,84 @@ Utilized HF.Accelerator
56
  --==[MyLLM](https://github.com/Raumberg/myllm)==--
57
 
58
  ### Model configuration (MyLLM Framework)
59
- TO BE DISCLOSED
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  ### Using the model / Как запустить?
62
 
 
56
  --==[MyLLM](https://github.com/Raumberg/myllm)==--
57
 
58
  ### Model configuration (MyLLM Framework)
59
+ ```toml
60
+ [model]
61
+ model_name_or_path = "attn-signs/GPTR-8-base"
62
+
63
+ [datasets]
64
+ dataset = "d0rj/gsm8k-ru"
65
+ problem_field = "question"
66
+ solution_field = "answer"
67
+ dataloader_num_workers = 2
68
+ test_size = 0.1
69
+ extract_hash = true
70
+
71
+ [run]
72
+ run_name = "rl-gptr-8"
73
+ report_to = "wandb"
74
+ logging_first_step = true
75
+ logging_steps = 1
76
+ save_strategy = "steps"
77
+ save_steps = 500
78
+ save_total_limit = 5
79
+ output_dir = "models/attn-signs-gptr-8-grpo"
80
+ project_name = "rl-gptr"
81
+
82
+ [training]
83
+ num_train_epochs = 1
84
+ per_device_train_batch_size = 2
85
+ learning_rate = 0.00001
86
+ bf16 = true
87
+ seed = 42
88
+ use_peft = true
89
+
90
+ [grpo]
91
+ use_vllm = true
92
+ num_generations = 2
93
+ max_completion_length = 2048
94
+ num_iterations = 1 # https://github.com/huggingface/trl/releases/tag/v0.16.0
95
+ scale_rewards = false # should be default var
96
+ beta = 0.04 # reference model beta in vllm
97
+ epsilon_high = 0.28 # Increasing upper bound epsilon leads to higher entropy during generation, promoting better exploration
98
+ preload_rm = false
99
+
100
+ [lora]
101
+ lora_target_modules = [
102
+ "k_proj",
103
+ "v_proj",
104
+ "q_proj",
105
+ "o_proj",
106
+ "gate_proj",
107
+ "up_proj",
108
+ "down_proj",
109
+ ]
110
+ lora_r = 32
111
+ lora_alpha = 64
112
+
113
+ [fusion]
114
+ use_liger = false
115
+ attn_implementation = "flash_attention_2"
116
+
117
+ [tokenizer]
118
+ eos_token = "</s>"
119
+ pad_token = "<unk>"
120
+ chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<s>' + message['role'] + '\n' + message['content'] + '</s>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<s>assistant\n' }}{% endif %}"
121
+ force_chat_template = true
122
+ added_special_tokens = [
123
+ "<think>",
124
+ "</think>"
125
+ ]
126
+ system_prompt = """
127
+ [MODE: Reflection]
128
+ """
129
+ ```
130
+ ### Rewards:
131
+ - Equation structure reward
132
+ - Correctness reward
133
+ - Multilingual coherence reward
134
+ - Strict chinese penalty
135
+ - Format reward
136
+ - Russian purity reward
137
 
138
  ### Using the model / Как запустить?
139