RabotniKuma commited on
Commit
cbb3d80
·
verified ·
1 Parent(s): cc6b91b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +22 -3
README.md CHANGED
@@ -43,20 +43,39 @@ Technical details can be found in [Kaggle Discussion](https://www.kaggle.com/com
43
  ## vLLM
44
  ```python
45
  from vllm import LLM, SamplingParams
 
46
 
47
 
 
48
  vllm_engine = LLM(
49
- model='RabotniKuma/Fast-Math-R1-14B',
50
  max_model_len=8192,
51
  gpu_memory_utilization=0.9,
52
  trust_remote_code=True,
53
  )
 
 
 
54
  sampling_params = SamplingParams(
55
  temperature=1.0,
56
  top_p=0.90,
57
  min_p=0.05,
58
  max_tokens=8192,
59
- stop='</think>', # Important: early stop at </think> to save output tokens
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  )
61
- vllm_engine.generate('1+1=', sampling_params=sampling_params)
62
  ```
 
43
  ## vLLM
44
  ```python
45
  from vllm import LLM, SamplingParams
46
+ from transformers import AutoTokenizer
47
 
48
 
49
+ model_path = 'RabotniKuma/Fast-Math-R1-14B'
50
  vllm_engine = LLM(
51
+ model=model_path,
52
  max_model_len=8192,
53
  gpu_memory_utilization=0.9,
54
  trust_remote_code=True,
55
  )
56
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
57
+
58
+
59
  sampling_params = SamplingParams(
60
  temperature=1.0,
61
  top_p=0.90,
62
  min_p=0.05,
63
  max_tokens=8192,
64
+ stop='</think>', # Important!: early stop at </think> to save output tokens
65
+ )
66
+ messages = [
67
+ {
68
+ 'role': 'user',
69
+ 'content': (
70
+ 'Solve the problem, and put the answer in \boxed{{}}. '
71
+ 'Sarah is twice as old as her youngest brother. If the difference between their ages is 15 years. How old is her youngest brother?'
72
+ )
73
+ }
74
+ ]
75
+ messages = tokenizer.apply_chat_template(
76
+ conversation=messages,
77
+ tokenize=False,
78
+ add_generation_prompt=True
79
  )
80
+ response = vllm_engine.generate(messages, sampling_params=sampling_params)
81
  ```