rag / z_generate.py
Deepak Sahu
adding vector store
2fe32bb
raw
history blame
997 Bytes
from huggingface_hub import InferenceClient
import os
class ServerlessInference:
def __init__(self, vector_store_text = None, vector_store_images = None):
self.model:str = "HuggingFaceH4/zephyr-7b-beta"
self.client = InferenceClient(api_key=os.getenv("HF_SERVELESS_API"))
self.vs_text = vector_store_text
self.vs_images = vector_store_images
def test(self, query:str) -> str:
'''Responds to query using llm'''
messages:str = [
{
"role": "user",
"content": query
}
]
completion = self.client.chat.completions.create(
model=self.model,
messages=messages,
max_tokens=500
)
return completion.choices[0].message.content
def perform_rag(self, query:str):
# First perform text search
relevant_docs = self.vs_text.similarity_search(query=query, k=5)
return relevant_docs[0].page_content