Files changed (1) hide show
  1. README.md +53 -0
README.md CHANGED
@@ -82,6 +82,59 @@ print("reasoning content:", reasoning_content)
82
  print("content:", content)
83
  ```
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  # VLLM Deployment
86
 
87
  For easy deployment, we can use `vllm>=0.8.5` and create an OpenAI-compatible API endpoint with `vllm serve sarvamai/sarvam-m`
 
82
  print("content:", content)
83
  ```
84
 
85
+ # How to use with Sarvam APIs
86
+
87
+ ```python
88
+ from openai import OpenAI
89
+
90
+ base_url = "https://api.sarvam.ai/v1"
91
+ model_name = "sarvam-m"
92
+ api_key = "Your-API-Key" # get it from https://dashboard.sarvam.ai/
93
+
94
+
95
+ client = OpenAI(
96
+ base_url=base_url,
97
+ api_key=api_key,
98
+ ).with_options(max_retries=1)
99
+
100
+ response = client.chat.completions.create(
101
+ model=model_name,
102
+ messages=[
103
+ {"role": "system", "content": "say hi"},
104
+ {"role": "user", "content": "say hi"},
105
+ ],
106
+ stream=False,
107
+ max_completion_tokens=2048,
108
+ # reasoning_effort="low", # set either of 3 values to enable reasoning
109
+ )
110
+ print(response.choices[0].message.content)
111
+
112
+ response1 = client.chat.completions.create(
113
+ model=model_name,
114
+ messages=[
115
+ {"role": "system", "content": "You're a helpful AI assistant"},
116
+ {"role": "user", "content": "Explain quantum computing in simple terms"}
117
+ ],
118
+ max_completion_tokens=4096,
119
+ reasoning_effort="medium" # Optional reasoning mode
120
+ )
121
+ print("First response:", response1.choices[0].message.content)
122
+
123
+ # Second turn (using previous response as context)
124
+ response2 = client.chat.completions.create(
125
+ model=model_name,
126
+ messages=[
127
+ {"role": "system", "content": "You're a helpful AI assistant"},
128
+ {"role": "user", "content": "Explain quantum computing in simple terms"},
129
+ {"role": "assistant", "content": response1.choices[0].message.content}, # Previous response
130
+ {"role": "user", "content": "Can you give an analogy for superposition?"}
131
+ ],
132
+ reasoning_effort="high",
133
+ max_completion_tokens=8192,
134
+ )
135
+ print("Follow-up response:", response2.choices[0].message.content)
136
+ ```
137
+
138
  # VLLM Deployment
139
 
140
  For easy deployment, we can use `vllm>=0.8.5` and create an OpenAI-compatible API endpoint with `vllm serve sarvamai/sarvam-m`