sarvamai
/

sarvam-m

@@ -82,6 +82,59 @@ print("reasoning content:", reasoning_content)
 print("content:", content)
 ```
 # VLLM Deployment
 For easy deployment, we can use `vllm>=0.8.5` and create an OpenAI-compatible API endpoint with `vllm serve sarvamai/sarvam-m`

 print("content:", content)
 ```
+# How to use with Sarvam APIs
+```python
+from openai import OpenAI
+base_url = "https://api.sarvam.ai/v1"
+model_name = "sarvam-m"
+api_key = "Your-API-Key"  # get it from https://dashboard.sarvam.ai/
+client = OpenAI(
+    base_url=base_url,
+    api_key=api_key,
+).with_options(max_retries=1)
+response = client.chat.completions.create(
+    model=model_name,
+    messages=[
+        {"role": "system", "content": "say hi"},
+        {"role": "user", "content": "say hi"},
+    ],
+    stream=False,
+    max_completion_tokens=2048,
+    # reasoning_effort="low",  # set either of 3 values to enable reasoning
+)
+print(response.choices[0].message.content)
+response1 = client.chat.completions.create(
+    model=model_name,
+    messages=[
+        {"role": "system", "content": "You're a helpful AI assistant"},
+        {"role": "user", "content": "Explain quantum computing in simple terms"}
+    ],
+    max_completion_tokens=4096,
+    reasoning_effort="medium"  # Optional reasoning mode
+)
+print("First response:", response1.choices[0].message.content)
+# Second turn (using previous response as context)
+response2 = client.chat.completions.create(
+    model=model_name,
+    messages=[
+        {"role": "system", "content": "You're a helpful AI assistant"},
+        {"role": "user", "content": "Explain quantum computing in simple terms"},
+        {"role": "assistant", "content": response1.choices[0].message.content},  # Previous response
+        {"role": "user", "content": "Can you give an analogy for superposition?"}
+    ],
+    reasoning_effort="high",
+    max_completion_tokens=8192,
+)
+print("Follow-up response:", response2.choices[0].message.content)
+```
 # VLLM Deployment
 For easy deployment, we can use `vllm>=0.8.5` and create an OpenAI-compatible API endpoint with `vllm serve sarvamai/sarvam-m`