Update README.md
#2
by
kurianbenoy
- opened
README.md
CHANGED
@@ -82,6 +82,59 @@ print("reasoning content:", reasoning_content)
|
|
82 |
print("content:", content)
|
83 |
```
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
# VLLM Deployment
|
86 |
|
87 |
For easy deployment, we can use `vllm>=0.8.5` and create an OpenAI-compatible API endpoint with `vllm serve sarvamai/sarvam-m`
|
|
|
82 |
print("content:", content)
|
83 |
```
|
84 |
|
85 |
+
# How to use with Sarvam APIs
|
86 |
+
|
87 |
+
```python
|
88 |
+
from openai import OpenAI
|
89 |
+
|
90 |
+
base_url = "https://api.sarvam.ai/v1"
|
91 |
+
model_name = "sarvam-m"
|
92 |
+
api_key = "Your-API-Key" # get it from https://dashboard.sarvam.ai/
|
93 |
+
|
94 |
+
|
95 |
+
client = OpenAI(
|
96 |
+
base_url=base_url,
|
97 |
+
api_key=api_key,
|
98 |
+
).with_options(max_retries=1)
|
99 |
+
|
100 |
+
response = client.chat.completions.create(
|
101 |
+
model=model_name,
|
102 |
+
messages=[
|
103 |
+
{"role": "system", "content": "say hi"},
|
104 |
+
{"role": "user", "content": "say hi"},
|
105 |
+
],
|
106 |
+
stream=False,
|
107 |
+
max_completion_tokens=2048,
|
108 |
+
# reasoning_effort="low", # set either of 3 values to enable reasoning
|
109 |
+
)
|
110 |
+
print(response.choices[0].message.content)
|
111 |
+
|
112 |
+
response1 = client.chat.completions.create(
|
113 |
+
model=model_name,
|
114 |
+
messages=[
|
115 |
+
{"role": "system", "content": "You're a helpful AI assistant"},
|
116 |
+
{"role": "user", "content": "Explain quantum computing in simple terms"}
|
117 |
+
],
|
118 |
+
max_completion_tokens=4096,
|
119 |
+
reasoning_effort="medium" # Optional reasoning mode
|
120 |
+
)
|
121 |
+
print("First response:", response1.choices[0].message.content)
|
122 |
+
|
123 |
+
# Second turn (using previous response as context)
|
124 |
+
response2 = client.chat.completions.create(
|
125 |
+
model=model_name,
|
126 |
+
messages=[
|
127 |
+
{"role": "system", "content": "You're a helpful AI assistant"},
|
128 |
+
{"role": "user", "content": "Explain quantum computing in simple terms"},
|
129 |
+
{"role": "assistant", "content": response1.choices[0].message.content}, # Previous response
|
130 |
+
{"role": "user", "content": "Can you give an analogy for superposition?"}
|
131 |
+
],
|
132 |
+
reasoning_effort="high",
|
133 |
+
max_completion_tokens=8192,
|
134 |
+
)
|
135 |
+
print("Follow-up response:", response2.choices[0].message.content)
|
136 |
+
```
|
137 |
+
|
138 |
# VLLM Deployment
|
139 |
|
140 |
For easy deployment, we can use `vllm>=0.8.5` and create an OpenAI-compatible API endpoint with `vllm serve sarvamai/sarvam-m`
|