Spaces:
Sleeping
Sleeping
from huggingface_hub import InferenceClient | |
import os | |
class ServerlessInference: | |
def __init__(self, vector_store_text = None, vector_store_images = None): | |
self.model:str = "HuggingFaceH4/zephyr-7b-beta" | |
self.client = InferenceClient(api_key=os.getenv("HF_SERVELESS_API")) | |
self.vs_text = vector_store_text | |
self.vs_images = vector_store_images | |
def test(self, query:str) -> str: | |
'''Responds to query using llm''' | |
messages:list = [ | |
{ | |
"role": "user", | |
"content": query | |
} | |
] | |
completion = self.client.chat.completions.create( | |
model=self.model, | |
messages=messages, | |
max_tokens=500 | |
) | |
return completion.choices[0].message.content | |
def perform_rag(self, query:str): | |
# First perform text search | |
# Retrieval | |
retrieved_docs = self.vs_text.similarity_search(query=query, k=5) | |
retrieved_docs_text = [doc.page_content for doc in retrieved_docs] # We only need the text of the documents | |
context = "\nExtracted documents:\n" | |
context += "".join([f"Document {str(i)}:::\n" + doc for i, doc in enumerate(retrieved_docs_text)]) | |
# Augmented Generation | |
messages:list = [ | |
{ | |
"role": "system", | |
"content": """Using the information contained in the context, | |
give a comprehensive answer to the question. | |
Respond only to the question asked, response should be concise and relevant to the question. | |
If the answer cannot be deduced from the context, do not give an answer. Instead say `Theres lack of information in document source.`""", | |
}, | |
{ | |
"role": "user", | |
"content": """Context: | |
{context} | |
--- | |
Now here is the question you need to answer. | |
Question: {question}""".format(context=context, question=query), | |
}, | |
] | |
completion = self.client.chat.completions.create( | |
model=self.model, | |
messages=messages, | |
max_tokens=500 | |
) | |
response_text = completion.choices[0].message.content | |
# Image retrieval | |
retrieved_image = self.vs_images.similarity_search(query=query, k=5) | |
retrieved_docs_text = [doc.page_content for doc in retrieved_image] # We only need the text of the documents | |
context = "\nExtracted Images:\n" | |
context += "".join([f"Document {str(i)}:::\n" + doc for i, doc in enumerate(retrieved_docs_text)]) | |
messages:list = [ | |
{ | |
"role": "system", | |
"content": """Using the information contained in the context about the images stored in the database, | |
give a list of identifiers of the image that best represent the kind of information seeked by the question. | |
Respond only to the question asked. Provide only number(s) of the source images relevant to the question. | |
If the image is relevant to the question then output format should be a list [1, 3, 0] | |
otherwise just reply empty list that is []""", | |
}, | |
{ | |
"role": "user", | |
"content": """Context: | |
Extracted Images: | |
Document 0::: | |
Rahuls playing football | |
Document 1::: | |
Rahul recieving award in Archery. | |
--- | |
Now here is the question you need to answer. | |
Question: Who is Rahul""" | |
}, | |
{ | |
"role": "assistant", | |
"content": "[1, ]" | |
}, | |
{ | |
"role": "user", | |
"content": """Context: | |
{context} | |
--- | |
Now here is the question you need to answer. | |
Question: {question}""".format(context=context, question=query), | |
}, | |
] | |
completion = self.client.chat.completions.create( | |
model=self.model, | |
messages=messages, | |
max_tokens=500 | |
) | |
images_list = completion.choices[0].message.content | |
return response_text + str(images_list) |