Spaces:
Sleeping
Sleeping
from huggingface_hub import InferenceClient | |
import os | |
class ServerlessInference: | |
def __init__(self): | |
self.model:str = "HuggingFaceH4/zephyr-7b-beta" | |
self.client = InferenceClient(api_key=os.getenv("HF_SERVELESS_API")) | |
def test(self, query:str) -> str: | |
'''Responds to query using llm''' | |
messages:str = [ | |
{ | |
"role": "user", | |
"content": query | |
} | |
] | |
completion = self.client.chat.completions.create( | |
model=self.model, | |
messages=messages, | |
max_tokens=500 | |
) | |
return completion.choices[0].message.content |