Spaces:
Runtime error
Runtime error
| from langchain_community.document_loaders import ( | |
| PyPDFLoader, | |
| TextLoader, | |
| DirectoryLoader, | |
| CSVLoader, | |
| UnstructuredExcelLoader, | |
| Docx2txtLoader, | |
| ) | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter | |
| import tiktoken | |
| import chroma | |
| import gradio as gr | |
| import os, tempfile, glob, random | |
| from pathlib import Path | |
| from IPython.display import Markdown | |
| from PIL import Image | |
| from getpass import getpass | |
| import numpy as np | |
| from itertools import combinations | |
| # LLM: openai and google_genai | |
| import openai | |
| from langchain_openai import OpenAI, OpenAIEmbeddings, ChatOpenAI | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| # LLM: HuggingFace | |
| from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings | |
| from langchain_community.llms import HuggingFaceHub | |
| # langchain prompts, memory, chains... | |
| from langchain.prompts import PromptTemplate, ChatPromptTemplate | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain_community.chat_message_histories import StreamlitChatMessageHistory | |
| from operator import itemgetter | |
| from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough | |
| from langchain.schema import Document, format_document | |
| from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string | |
| from langchain.retrievers.document_compressors import DocumentCompressorPipeline | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain_community.document_transformers import EmbeddingsRedundantFilter,LongContextReorder | |
| from langchain.retrievers.document_compressors import EmbeddingsFilter | |
| from langchain.retrievers import ContextualCompressionRetriever | |
| from langchain.retrievers import ContextualCompressionRetriever | |
| from langchain.retrievers.document_compressors import CohereRerank | |
| from langchain_community.llms import Cohere | |
| from langchain.memory import ConversationSummaryBufferMemory,ConversationBufferMemory | |
| from langchain.schema import Document | |
| def langchain_document_loader(TMP_DIR): | |
| """ | |
| Load documents from the temporary directory (TMP_DIR). | |
| Files can be in txt, pdf, CSV or docx format. | |
| """ | |
| documents = [] | |
| txt_loader = DirectoryLoader( | |
| TMP_DIR.as_posix(), glob="**/*.txt", loader_cls=TextLoader, show_progress=True | |
| ) | |
| documents.extend(txt_loader.load()) | |
| pdf_loader = DirectoryLoader( | |
| TMP_DIR.as_posix(), glob="**/*.pdf", loader_cls=PyPDFLoader, show_progress=True | |
| ) | |
| documents.extend(pdf_loader.load()) | |
| csv_loader = DirectoryLoader( | |
| TMP_DIR.as_posix(), glob="**/*.csv", loader_cls=CSVLoader, show_progress=True, | |
| loader_kwargs={"encoding":"utf8"} | |
| ) | |
| documents.extend(csv_loader.load()) | |
| doc_loader = DirectoryLoader( | |
| TMP_DIR.as_posix(), | |
| glob="**/*.docx", | |
| loader_cls=Docx2txtLoader, | |
| show_progress=True, | |
| ) | |
| documents.extend(doc_loader.load()) | |
| return documents | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| separators = ["\n\n", "\n", " ", ""], | |
| chunk_size = 1600, | |
| chunk_overlap= 200 | |
| ) | |
| # Text splitting | |
| chunks = text_splitter.split_documents(documents=documents) | |
| def tiktoken_tokens(documents,model="gpt-3.5-turbo"): | |
| """Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document.""" | |
| encoding = tiktoken.encoding_for_model(model) # returns the encoding used by the model. | |
| tokens_length = [len(encoding.encode(documents[i].page_content)) for i in range(len(documents))] | |
| return tokens_length | |
| chunks_length = tiktoken_tokens(chunks,model="gpt-3.5-turbo") | |
| print(f"Number of tokens - Average : {int(np.mean(chunks_length))}") | |
| print(f"Number of tokens - 25% percentile : {int(np.quantile(chunks_length,0.25))}") | |
| print(f"Number of tokens - 50% percentile : {int(np.quantile(chunks_length,0.5))}") | |
| print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}") | |
| def select_embeddings_model(LLM_service="HuggingFace"): | |
| """Connect to the embeddings API endpoint by specifying | |
| the name of the embedding model. | |
| if LLM_service == "OpenAI": | |
| embeddings = OpenAIEmbeddings( | |
| model='text-embedding-ada-002', | |
| api_key=openai_api_key) | |
| if LLM_service == "Google": | |
| embeddings = GoogleGenerativeAIEmbeddings( | |
| model="models/embedding-001", | |
| google_api_key=google_api_key | |
| )""" | |
| if LLM_service == "HuggingFace": | |
| embeddings = HuggingFaceInferenceAPIEmbeddings( | |
| api_key=HF_key, | |
| model_name="thenlper/gte-large" | |
| ) | |
| return embeddings | |
| #embeddings_OpenAI = select_embeddings_model(LLM_service="OpenAI") | |
| #embeddings_google = select_embeddings_model(LLM_service="Google") | |
| embeddings_HuggingFace = select_embeddings_model(LLM_service="HuggingFace") | |
| def create_vectorstore(embeddings,documents,vectorstore_name): | |
| """Create a Chroma vector database.""" | |
| persist_directory = (LOCAL_VECTOR_STORE_DIR.as_posix() + "/" + vectorstore_name) | |
| vector_store = Chroma.from_documents( | |
| documents=documents, | |
| embedding=embeddings, | |
| persist_directory=persist_directory | |
| ) | |
| return vector_store | |
| create_vectorstores = True # change to True to create vectorstores | |
| if create_vectorstores: | |
| """ | |
| vector_store_OpenAI,_ = create_vectorstore( | |
| embeddings=embeddings_OpenAI, | |
| documents = chunks, | |
| vectorstore_name="Vit_All_OpenAI_Embeddings", | |
| ) | |
| print("vector_store_OpenAI:",vector_store_OpenAI._collection.count(),"chunks.") | |
| vector_store_google,new_vectorstore_name = create_vectorstore( | |
| embeddings=embeddings_google, | |
| documents = chunks, | |
| vectorstore_name="Vit_All_Google_Embeddings" | |
| ) | |
| print("vector_store_google:",vector_store_google._collection.count(),"chunks.") | |
| """ | |
| vector_store_HF = create_vectorstore( | |
| embeddings=embeddings_HuggingFace, | |
| documents = chunks, | |
| vectorstore_name="Vit_All_HF_Embeddings" | |
| ) | |
| print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.") | |
| print("") | |
| """ | |
| vector_store_OpenAI = Chroma( | |
| persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_OpenAI_Embeddings", | |
| embedding_function=embeddings_OpenAI) | |
| print("vector_store_OpenAI:",vector_store_OpenAI._collection.count(),"chunks.") | |
| vector_store_google = Chroma( | |
| persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_Google_Embeddings", | |
| embedding_function=embeddings_google) | |
| print("vector_store_google:",vector_store_google._collection.count(),"chunks.") | |
| """ | |
| vector_store_HF = Chroma( | |
| persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_HF_Embeddings", | |
| embedding_function=embeddings_HuggingFace) | |
| print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.") | |
| def Vectorstore_backed_retriever( | |
| vectorstore,search_type="similarity",k=4,score_threshold=None | |
| ): | |
| """create a vectorsore-backed retriever | |
| Parameters: | |
| search_type: Defines the type of search that the Retriever should perform. | |
| Can be "similarity" (default), "mmr", or "similarity_score_threshold" | |
| k: number of documents to return (Default: 4) | |
| score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None) | |
| """ | |
| search_kwargs={} | |
| if k is not None: | |
| search_kwargs['k'] = k | |
| if score_threshold is not None: | |
| search_kwargs['score_threshold'] = score_threshold | |
| retriever = vectorstore.as_retriever( | |
| search_type=search_type, | |
| search_kwargs=search_kwargs | |
| ) | |
| return retriever | |
| # similarity search | |
| #base_retriever_OpenAI = Vectorstore_backed_retriever(vector_store_OpenAI,"similarity",k=10) | |
| #base_retriever_google = Vectorstore_backed_retriever(vector_store_google,"similarity",k=10) | |
| base_retriever_HF = Vectorstore_backed_retriever(vector_store_HF,"similarity",k=10) | |
| def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None): | |
| """Build a ContextualCompressionRetriever. | |
| We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever. | |
| The compressor here is a Document Compressor Pipeline, which splits documents | |
| into smaller chunks, removes redundant documents, filters out the most relevant documents, | |
| and reorder the documents so that the most relevant are at the top and bottom of the list. | |
| Parameters: | |
| embeddings: OpenAIEmbeddings, GoogleGenerativeAIEmbeddings or HuggingFaceInferenceAPIEmbeddings. | |
| base_retriever: a vectorstore-backed retriever. | |
| chunk_size (int): Documents will be splitted into smaller chunks using a CharacterTextSplitter with a default chunk_size of 500. | |
| k (int): top k relevant chunks to the query are filtered using the EmbeddingsFilter. default =16. | |
| similarity_threshold : minimum relevance threshold used by the EmbeddingsFilter. default =None. | |
| """ | |
| # 1. splitting documents into smaller chunks | |
| splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0, separator=". ") | |
| # 2. removing redundant documents | |
| redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings) | |
| # 3. filtering based on relevance to the query | |
| relevant_filter = EmbeddingsFilter(embeddings=embeddings, k=k, similarity_threshold=similarity_threshold) # similarity_threshold and top K | |
| # 4. Reorder the documents | |
| # Less relevant document will be at the middle of the list and more relevant elements at the beginning or end of the list. | |
| # Reference: https://python.langchain.com/docs/modules/data_connection/retrievers/long_context_reorder | |
| reordering = LongContextReorder() | |
| # 5. Create compressor pipeline and retriever | |
| pipeline_compressor = DocumentCompressorPipeline( | |
| transformers=[splitter, redundant_filter, relevant_filter, reordering] | |
| ) | |
| compression_retriever = ContextualCompressionRetriever( | |
| base_compressor=pipeline_compressor, | |
| base_retriever=base_retriever | |
| ) | |
| return compression_retriever | |
| def CohereRerank_retriever( | |
| base_retriever, | |
| cohere_api_key,cohere_model="rerank-multilingual-v2.0", top_n=8 | |
| ): | |
| """Build a ContextualCompressionRetriever using Cohere Rerank endpoint to reorder the results based on relevance. | |
| Parameters: | |
| base_retriever: a Vectorstore-backed retriever | |
| cohere_api_key: the Cohere API key | |
| cohere_model: The Cohere model can be either 'rerank-english-v2.0' or 'rerank-multilingual-v2.0', with the latter being the default. | |
| top_n: top n results returned by Cohere rerank, default = 8. | |
| """ | |
| compressor = CohereRerank( | |
| cohere_api_key=cohere_api_key, | |
| model=cohere_model, | |
| top_n=top_n | |
| ) | |
| retriever_Cohere = ContextualCompressionRetriever( | |
| base_compressor=compressor, | |
| base_retriever=base_retriever | |
| ) | |
| return retriever_Cohere | |
| def instantiate_LLM(LLM_provider,api_key,temperature=0.5,top_p=0.95,model_name=None): | |
| """Instantiate LLM in Langchain. | |
| Parameters: | |
| LLM_provider (str): the LLM provider; in ["OpenAI","Google","HuggingFace"] | |
| model_name (str): in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview", | |
| "gemini-pro", "mistralai/Mistral-7B-Instruct-v0.2"]. | |
| api_key (str): google_api_key or openai_api_key or huggingfacehub_api_token | |
| temperature (float): Range: 0.0 - 1.0; default = 0.5 | |
| top_p (float): : Range: 0.0 - 1.0; default = 1. | |
| """ | |
| if LLM_provider == "OpenAI": | |
| llm = ChatOpenAI( | |
| api_key=api_key, | |
| model=model_name, # in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview"] | |
| temperature=temperature, | |
| model_kwargs={ | |
| "top_p": top_p | |
| } | |
| ) | |
| if LLM_provider == "Google": | |
| llm = ChatGoogleGenerativeAI( | |
| google_api_key=api_key, | |
| model=gemini-pro, # "gemini-pro" | |
| temperature=temperature, | |
| top_p=top_p, | |
| convert_system_message_to_human=True | |
| ) | |
| if LLM_provider == "HuggingFace": | |
| llm = HuggingFaceHub( | |
| repo_id="mistralai/Mistral-7B-Instruct-v0.2", # "mistralai/Mistral-7B-Instruct-v0.2" | |
| huggingfacehub_api_token=api_key, | |
| model_kwargs={ | |
| "temperature":temperature, | |
| "top_p": top_p, | |
| "do_sample": True, | |
| "max_new_tokens":1024 | |
| }, | |
| ) | |
| return llm | |
| def get_environment_variable(key): | |
| if key in os.environ: | |
| value = os.environ.get(key) | |
| print(f"\n[INFO]: {key} retrieved successfully.") | |
| else : | |
| print(f"\n[ERROR]: {key} is not found in your environment variables.") | |
| value = getpass(f"Insert your {key}") | |
| return value | |
| openai_api_key = os.environ['openai_key'] | |
| google_api_key = os.environ['gemini_key'] | |
| HF_key = os.environ['HF_token'] | |
| cohere_api_key = os.environ['cohere_api'] | |
| def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None): | |
| """Creates a ConversationSummaryBufferMemory for gpt-3.5-turbo. | |
| Creates a ConversationBufferMemory for the other models.""" | |
| if model_name=="gpt-3.5-turbo": | |
| if memory_max_token is None: | |
| memory_max_token = 1024 # max_tokens for 'gpt-3.5-turbo' = 4096 | |
| memory = ConversationSummaryBufferMemory( | |
| max_token_limit=memory_max_token, | |
| llm=ChatOpenAI(model_name="gpt-3.5-turbo",openai_api_key=openai_api_key,temperature=0.1), | |
| return_messages=True, | |
| memory_key='chat_history', | |
| output_key="answer", | |
| input_key="question" | |
| ) | |
| else: | |
| memory = ConversationBufferMemory( | |
| return_messages=True, | |
| memory_key='chat_history', | |
| output_key="answer", | |
| input_key="question", | |
| ) | |
| return memory | |
| memory.save_context(inputs={"question":"..."},outputs={"answer":"...."}) | |
| standalone_question_template = """Given the following conversation and a follow up question, | |
| rephrase the follow up question to be a standalone question, in its original language.\n\n | |
| Chat History:\n{chat_history}\n | |
| Follow Up Input: {question}\n | |
| Standalone question:""" | |
| standalone_question_prompt = PromptTemplate( | |
| input_variables=['chat_history', 'question'], | |
| template=standalone_question_template | |
| ) | |
| def answer_template(language="english"): | |
| """Pass the standalone question along with the chat history and context | |
| to the `LLM` wihch will answer""" | |
| template = f"""Answer the question at the end, using only the following context (delimited by <context></context>). | |
| Your answer must be in the language at the end. | |
| <context> | |
| {{chat_history}} | |
| {{context}} | |
| </context> | |
| Question: {{question}} | |
| Language: {language}. | |
| """ | |
| return template | |
| chain = ConversationalRetrievalChain.from_llm( | |
| condense_question_prompt=standalone_question_prompt, | |
| combine_docs_chain_kwargs={'prompt': answer_prompt}, | |
| condense_question_llm=instantiate_LLM( | |
| LLM_provider="Google",api_key=HF_key,temperature=0.1, | |
| model_name="gemini-pro"), | |
| memory=create_memory("gemini-pro"), | |
| retriever = retriever, | |
| llm=instantiate_LLM( | |
| LLM_provider="Google",api_key=HF_key,temperature=0.5, | |
| model_name="gemini-pro"), | |
| chain_type= "stuff", | |
| verbose= False, | |
| return_source_documents=True | |
| ) | |
| # 1. load memory using RunnableLambda. Retrieves the chat_history attribute using itemgetter. | |
| # `RunnablePassthrough.assign` adds the chat_history to the assign function | |
| loaded_memory = RunnablePassthrough.assign( | |
| chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("chat_history"), | |
| ) | |
| # 2. Pass the follow-up question along with the chat history to the LLM, and parse the answer (standalone_question). | |
| condense_question_prompt = PromptTemplate( | |
| input_variables=['chat_history', 'question'], | |
| template=standalone_question_template | |
| ) | |
| condense_question_llm = instantiate_LLM( | |
| LLM_provider="Google",api_key=google_api_key,temperature=0.1, | |
| model_name="gemini-pro" | |
| ) | |
| standalone_question_chain = { | |
| "standalone_question": { | |
| "question": lambda x: x["question"], | |
| "chat_history": lambda x: get_buffer_string(x["chat_history"]), | |
| } | |
| | condense_question_prompt | |
| | condense_question_llm | |
| | StrOutputParser(), | |
| } | |
| # 3. Combine load_memory and standalone_question_chain | |
| chain_question = loaded_memory | standalone_question_chain | |
| memory.clear() | |
| memory.save_context( | |
| {"question": "What does DTC stand for?"}, | |
| {"answer": "Diffuse to Choose."} | |
| ) | |
| print("Chat history:\n",memory.load_memory_variables({})) | |
| follow_up_question = "plaese give more details about it, including its use cases and implementation." | |
| print("\nFollow-up question:\n",follow_up_question) | |
| # invoke chain_question | |
| response = chain_question.invoke({"question":follow_up_question})["standalone_question"] | |
| print("\nStandalone_question:\n",response) | |
| def _combine_documents(docs, document_prompt, document_separator="\n\n"): | |
| doc_strings = [format_document(doc, document_prompt) for doc in docs] | |
| return document_separator.join(doc_strings) | |
| # 1. Retrieve relevant documents | |
| retrieved_documents = { | |
| "docs": itemgetter("standalone_question") | retriever, | |
| "question": lambda x: x["standalone_question"], | |
| } | |
| # 2. Get variables ['chat_history', 'context', 'question'] that will be passed to `answer_prompt` | |
| DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}") | |
| answer_prompt = ChatPromptTemplate.from_template(answer_template()) # 3 variables are expected ['chat_history', 'context', 'question'] | |
| answer_prompt_variables = { | |
| "context": lambda x: _combine_documents(docs=x["docs"],document_prompt=DEFAULT_DOCUMENT_PROMPT), | |
| "question": itemgetter("question"), | |
| "chat_history": itemgetter("chat_history") # get chat_history from `loaded_memory` variable | |
| } | |
| llm = instantiate_LLM( | |
| LLM_provider="Google",api_key=google_api_key,temperature=0.5, | |
| model_name="gemini-pro" | |
| ) | |
| # 3. Load memory, format `answer_prompt` with variables (context, question and chat_history) and pass the `answer_prompt to LLM. | |
| # return answer, docs and standalone_question | |
| chain_answer = { | |
| "answer": loaded_memory | answer_prompt_variables | answer_prompt | llm, | |
| "docs": lambda x: [ | |
| Document(page_content=doc.page_content,metadata=doc.metadata) # return only page_content and metadata | |
| for doc in x["docs"] | |
| ], | |
| "standalone_question": lambda x:x["question"] # return standalone_question | |
| } | |
| conversational_retriever_chain = chain_question | retrieved_documents | chain_answer | |
| follow_up_question = "plaese give more details about it, including its use cases and implementation." | |
| response = conversational_retriever_chain.invoke({"question":follow_up_question}) | |
| Markdown(response['answer'].content) | |
| memory.save_context( | |
| {"question": follow_up_question}, | |
| {"answer": response['answer'].content} | |
| ) | |
| css = """ | |
| #col-container {max-width: 80%; margin-left: auto; margin-right: auto;} | |
| #chatbox {min-height: 400px;} | |
| #header {text-align: center;} | |
| #prompt_template_preview {padding: 1em; border-width: 1px; border-style: solid; border-color: #e0e0e0; border-radius: 4px; min-height: 150px;} | |
| #total_tokens_str {text-align: right; font-size: 0.8em; color: #666;} | |
| #label {font-size: 0.8em; padding: 0.5em; margin: 0;} | |
| .message { font-size: 1.2em; } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| state = gr.State(get_empty_state()) | |
| with gr.Column(elem_id="col-container"): | |
| gr.Markdown("""## Ask questions of *needs assessment* experts, | |
| ## get responses from a *needs assessment experts* version of ChatGPT. | |
| Ask questions of all of them, or pick your expert below. | |
| This is a free resource but it does cost us money to run. Unfortunately someone has been abusing this approach. | |
| In response, we have had to temporarily turn it off until we can put improve the monitoring. Sorry for the inconvenience.""" , | |
| elem_id="header") | |
| with gr.Row(): | |
| with gr.Column(): | |
| chatbot = gr.Chatbot(elem_id="chatbox") | |
| input_message = gr.Textbox(show_label=False, placeholder="Enter your needs assessment question", visible=True).style(container=False) | |
| btn_submit = gr.Button("Submit") | |
| #total_tokens_str = gr.Markdown(elem_id="total_tokens_str") | |
| btn_clear_conversation = gr.Button("Start New Conversation") | |
| with gr.Column(): | |
| prompt_template = gr.Dropdown(label="Choose an Expert:", choices=list(prompt_templates.keys())) | |
| prompt_template_preview = gr.Markdown(elem_id="prompt_template_preview") | |
| with gr.Accordion("Advanced parameters", open=False): | |
| temperature = gr.Slider(minimum=0, maximum=2.0, value=0.7, step=0.1, label="Flexibility", info="Higher = More AI, Lower = More Expert") | |
| max_tokens = gr.Slider(minimum=100, maximum=400, value=200, step=1, label="Length of Response.") | |
| context_length = gr.Slider(minimum=1, maximum=5, value=2, step=1, label="Context Length", info="Number of previous questions you have asked.") | |
| btn_submit.click(submit_message, [ input_message, prompt_template, temperature, max_tokens, context_length, state], [input_message, chatbot, state]) | |
| input_message.submit(submit_message, [ input_message, prompt_template, temperature, max_tokens, context_length, state], [input_message, chatbot, state]) | |
| btn_clear_conversation.click(clear_conversation, [], [input_message, chatbot, state]) | |
| prompt_template.change(on_prompt_template_change_description, inputs=[prompt_template], outputs=[prompt_template_preview]) | |
| demo.load(download_prompt_templates, inputs=None, outputs=[prompt_template], queur=False) | |
| demo.queue(concurrency_count=10) | |
| demo.launch(height='800px') | |