attn-signs
/

GPTR-8b-v1

Text Generation

text-generation-inference

Model card Files Files and versions Community

attn-signs commited on Apr 15

Commit

97b3ae5

·

verified ·

1 Parent(s): 261cda6

Update README.md

Files changed (1) hide show

README.md +78 -1

README.md CHANGED Viewed

@@ -56,7 +56,84 @@ Utilized HF.Accelerator
 --==[MyLLM](https://github.com/Raumberg/myllm)==--
 ### Model configuration (MyLLM Framework)
-TO BE DISCLOSED
 ### Using the model / Как запустить?

 --==[MyLLM](https://github.com/Raumberg/myllm)==--
 ### Model configuration (MyLLM Framework)
+```toml
+[model]
+model_name_or_path = "attn-signs/GPTR-8-base"
+[datasets]
+dataset = "d0rj/gsm8k-ru"
+problem_field = "question"
+solution_field = "answer"
+dataloader_num_workers = 2
+test_size = 0.1
+extract_hash = true
+[run]
+run_name = "rl-gptr-8"
+report_to = "wandb"
+logging_first_step = true
+logging_steps = 1
+save_strategy = "steps"
+save_steps = 500
+save_total_limit = 5
+output_dir = "models/attn-signs-gptr-8-grpo"
+project_name = "rl-gptr"
+[training]
+num_train_epochs = 1
+per_device_train_batch_size = 2
+learning_rate = 0.00001
+bf16 = true
+seed = 42
+use_peft = true
+[grpo]
+use_vllm = true
+num_generations = 2
+max_completion_length = 2048
+num_iterations = 1          # https://github.com/huggingface/trl/releases/tag/v0.16.0
+scale_rewards = false       # should be default var
+beta = 0.04                 # reference model beta in vllm
+epsilon_high = 0.28         # Increasing upper bound epsilon leads to higher entropy during generation, promoting better exploration
+preload_rm = false
+[lora]
+lora_target_modules = [
+    "k_proj",
+    "v_proj",
+    "q_proj",
+    "o_proj",
+    "gate_proj",
+    "up_proj",
+    "down_proj",
+]
+lora_r = 32
+lora_alpha = 64
+[fusion]
+use_liger = false
+attn_implementation = "flash_attention_2"
+[tokenizer]
+eos_token =  "</s>"
+pad_token = "<unk>"
+chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<s>' + message['role'] + '\n' + message['content'] + '</s>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<s>assistant\n' }}{% endif %}"
+force_chat_template = true
+added_special_tokens = [
+    "<think>",
+    "</think>"
+]
+system_prompt = """
+[MODE: Reflection]
+"""
+```
+### Rewards:
+- Equation structure reward
+- Correctness reward
+- Multilingual coherence reward
+- Strict chinese penalty
+- Format reward
+- Russian purity reward
 ### Using the model / Как запустить?