from huggingface_hub import InferenceClient import os class ServerlessInference: def __init__(self): self.model:str = "HuggingFaceH4/zephyr-7b-beta" self.client = InferenceClient(api_key=os.getenv("HF_SERVELESS_API")) def test(self, query:str) -> str: '''Responds to query using llm''' messages:str = [ { "role": "user", "content": query } ] completion = self.client.chat.completions.create( model=self.model, messages=messages, max_tokens=500 ) return completion.choices[0].message.content