Update README.md
Browse files
README.md
CHANGED
@@ -108,6 +108,31 @@ response = client.chat.completions.create(
|
|
108 |
# reasoning_effort="low", # set either of 3 values to enable reasoning
|
109 |
)
|
110 |
print(response.choices[0].message.content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
```
|
112 |
|
113 |
# VLLM Deployment
|
|
|
108 |
# reasoning_effort="low", # set either of 3 values to enable reasoning
|
109 |
)
|
110 |
print(response.choices[0].message.content)
|
111 |
+
|
112 |
+
response1 = client.chat.completions.create(
|
113 |
+
model=model_name,
|
114 |
+
messages=[
|
115 |
+
{"role": "system", "content": "You're a helpful AI assistant"},
|
116 |
+
{"role": "user", "content": "Explain quantum computing in simple terms"}
|
117 |
+
],
|
118 |
+
max_completion_tokens=4096,
|
119 |
+
reasoning_effort="medium" # Optional reasoning mode
|
120 |
+
)
|
121 |
+
print("First response:", response1.choices[0].message.content)
|
122 |
+
|
123 |
+
# Second turn (using previous response as context)
|
124 |
+
response2 = client.chat.completions.create(
|
125 |
+
model=model_name,
|
126 |
+
messages=[
|
127 |
+
{"role": "system", "content": "You're a helpful AI assistant"},
|
128 |
+
{"role": "user", "content": "Explain quantum computing in simple terms"},
|
129 |
+
{"role": "assistant", "content": response1.choices[0].message.content}, # Previous response
|
130 |
+
{"role": "user", "content": "Can you give an analogy for superposition?"}
|
131 |
+
],
|
132 |
+
reasoning_effort="high",
|
133 |
+
max_completion_tokens=8192,
|
134 |
+
)
|
135 |
+
print("Follow-up response:", response2.choices[0].message.content)
|
136 |
```
|
137 |
|
138 |
# VLLM Deployment
|