Spaces:
Sleeping
Sleeping
File size: 659 Bytes
33080cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
from huggingface_hub import InferenceClient
import os
class ServerlessInference:
def __init__(self):
self.model:str = "HuggingFaceH4/zephyr-7b-beta"
self.client = InferenceClient(api_key=os.getenv("HF_SERVELESS_API"))
def test(self, query:str) -> str:
'''Responds to query using llm'''
messages:str = [
{
"role": "user",
"content": query
}
]
completion = self.client.chat.completions.create(
model=self.model,
messages=messages,
max_tokens=500
)
return completion.choices[0].message.content |