Spaces:
Sleeping
Sleeping
from huggingface_hub import InferenceClient | |
import os | |
class ServerlessInference: | |
def __init__(self, vector_store_text = None, vector_store_images = None): | |
self.model:str = "HuggingFaceH4/zephyr-7b-beta" | |
self.client = InferenceClient(api_key=os.getenv("HF_SERVELESS_API")) | |
self.vs_text = vector_store_text | |
self.vs_images = vector_store_images | |
def test(self, query:str) -> str: | |
'''Responds to query using llm''' | |
messages:str = [ | |
{ | |
"role": "user", | |
"content": query | |
} | |
] | |
completion = self.client.chat.completions.create( | |
model=self.model, | |
messages=messages, | |
max_tokens=500 | |
) | |
return completion.choices[0].message.content | |
def perform_rag(self, query:str): | |
# First perform text search | |
relevant_docs = self.vs_text.similarity_search(query=query, k=5) | |
return relevant_docs[0].page_content |