Spaces:

LunaticMaestro
/

rag

Sleeping

rag / z_generate.py

Deepak Sahu

first look

33080cc 3 months ago

659 Bytes

	from huggingface_hub import InferenceClient
	import os

	class ServerlessInference:
	def __init__(self):
	self.model:str = "HuggingFaceH4/zephyr-7b-beta"
	self.client = InferenceClient(api_key=os.getenv("HF_SERVELESS_API"))

	def test(self, query:str) -> str:
	'''Responds to query using llm'''
	messages:str = [
	{
	"role": "user",
	"content": query
	}
	]
	completion = self.client.chat.completions.create(
	model=self.model,
	messages=messages,
	max_tokens=500
	)

	return completion.choices[0].message.content