rag / z_generate.py
Deepak Sahu
first look
33080cc
raw
history blame
659 Bytes
from huggingface_hub import InferenceClient
import os
class ServerlessInference:
def __init__(self):
self.model:str = "HuggingFaceH4/zephyr-7b-beta"
self.client = InferenceClient(api_key=os.getenv("HF_SERVELESS_API"))
def test(self, query:str) -> str:
'''Responds to query using llm'''
messages:str = [
{
"role": "user",
"content": query
}
]
completion = self.client.chat.completions.create(
model=self.model,
messages=messages,
max_tokens=500
)
return completion.choices[0].message.content