from huggingface_hub import InferenceClient import os class ServerlessInference: def __init__(self, vector_store_text = None, vector_store_images = None): self.model:str = "HuggingFaceH4/zephyr-7b-beta" self.client = InferenceClient(api_key=os.getenv("HF_SERVELESS_API")) self.vs_text = vector_store_text self.vs_images = vector_store_images def test(self, query:str) -> str: '''Responds to query using llm''' messages:str = [ { "role": "user", "content": query } ] completion = self.client.chat.completions.create( model=self.model, messages=messages, max_tokens=500 ) return completion.choices[0].message.content def perform_rag(self, query:str): # First perform text search relevant_docs = self.vs_text.similarity_search(query=query, k=5) return relevant_docs[0].page_content