Spaces:

ryanrwatkins
/

needs

Runtime error

App Files Files Community

ryanrwatkins commited on Mar 31, 2024

Commit

92c2a99

verified ·

1 Parent(s): e100484

Create app2.py

Browse files

Files changed (1) hide show

app2.py +601 -0

app2.py ADDED Viewed

	@@ -0,0 +1,601 @@

+from langchain_community.document_loaders import (
+    PyPDFLoader,
+    TextLoader,
+    DirectoryLoader,
+    CSVLoader,
+    UnstructuredExcelLoader,
+    Docx2txtLoader,
+)
+from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
+import tiktoken
+import chroma
+import gradio as gr
+import os
+import numpy as np
+# LLM: openai and google_genai
+import openai
+from langchain_openai import OpenAI, OpenAIEmbeddings, ChatOpenAI
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+# LLM: HuggingFace
+from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
+from langchain_community.llms import HuggingFaceHub
+# langchain prompts, memory, chains...
+from langchain.prompts import PromptTemplate, ChatPromptTemplate
+from langchain.chains import ConversationalRetrievalChain
+from langchain_community.chat_message_histories import StreamlitChatMessageHistory
+from operator import itemgetter
+from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough
+from langchain.schema import Document, format_document
+from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
+from langchain.retrievers.document_compressors import DocumentCompressorPipeline
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.document_transformers import EmbeddingsRedundantFilter,LongContextReorder
+from langchain.retrievers.document_compressors import EmbeddingsFilter
+from langchain.retrievers import ContextualCompressionRetriever
+from langchain.retrievers import ContextualCompressionRetriever
+from langchain.retrievers.document_compressors import CohereRerank
+from langchain_community.llms import Cohere
+from langchain.memory import ConversationSummaryBufferMemory,ConversationBufferMemory
+from langchain.schema import Document
+def langchain_document_loader(TMP_DIR):
+    """
+    Load documents from the temporary directory (TMP_DIR).
+    Files can be in txt, pdf, CSV or docx format.
+    """
+    documents = []
+    txt_loader = DirectoryLoader(
+        TMP_DIR.as_posix(), glob="**/*.txt", loader_cls=TextLoader, show_progress=True
+    )
+    documents.extend(txt_loader.load())
+    pdf_loader = DirectoryLoader(
+        TMP_DIR.as_posix(), glob="**/*.pdf", loader_cls=PyPDFLoader, show_progress=True
+    )
+    documents.extend(pdf_loader.load())
+    csv_loader = DirectoryLoader(
+        TMP_DIR.as_posix(), glob="**/*.csv", loader_cls=CSVLoader, show_progress=True,
+        loader_kwargs={"encoding":"utf8"}
+    )
+    documents.extend(csv_loader.load())
+    doc_loader = DirectoryLoader(
+        TMP_DIR.as_posix(),
+        glob="**/*.docx",
+        loader_cls=Docx2txtLoader,
+        show_progress=True,
+    )
+    documents.extend(doc_loader.load())
+    return documents
+text_splitter = RecursiveCharacterTextSplitter(
+    separators = ["\n\n", "\n", " ", ""],
+    chunk_size = 1600,
+    chunk_overlap= 200
+)
+# Text splitting
+chunks = text_splitter.split_documents(documents=documents)
+def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
+    """Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document."""
+    encoding = tiktoken.encoding_for_model(model) # returns the encoding used by the model.
+    tokens_length = [len(encoding.encode(documents[i].page_content)) for i in range(len(documents))]
+    return tokens_length
+chunks_length = tiktoken_tokens(chunks,model="gpt-3.5-turbo")
+print(f"Number of tokens - Average : {int(np.mean(chunks_length))}")
+print(f"Number of tokens - 25% percentile : {int(np.quantile(chunks_length,0.25))}")
+print(f"Number of tokens - 50% percentile : {int(np.quantile(chunks_length,0.5))}")
+print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}")
+def select_embeddings_model(LLM_service="HuggingFace"):
+    """Connect to the embeddings API endpoint by specifying
+    the name of the embedding model.
+    if LLM_service == "OpenAI":
+        embeddings = OpenAIEmbeddings(
+            model='text-embedding-ada-002',
+            api_key=openai_api_key)
+    if LLM_service == "Google":
+        embeddings = GoogleGenerativeAIEmbeddings(
+            model="models/embedding-001",
+            google_api_key=google_api_key
+        )"""
+    if LLM_service == "HuggingFace":
+        embeddings = HuggingFaceInferenceAPIEmbeddings(
+            api_key=HF_key,
+            model_name="thenlper/gte-large"
+        )
+    return embeddings
+#embeddings_OpenAI = select_embeddings_model(LLM_service="OpenAI")
+#embeddings_google = select_embeddings_model(LLM_service="Google")
+embeddings_HuggingFace = select_embeddings_model(LLM_service="HuggingFace")
+def create_vectorstore(embeddings,documents,vectorstore_name):
+    """Create a Chroma vector database."""
+    persist_directory = (LOCAL_VECTOR_STORE_DIR.as_posix() + "/" + vectorstore_name)
+    vector_store = Chroma.from_documents(
+        documents=documents,
+        embedding=embeddings,
+        persist_directory=persist_directory
+    )
+    return vector_store
+%%time
+create_vectorstores = True # change to True to create vectorstores
+if create_vectorstores:
+    """
+    vector_store_OpenAI,_ = create_vectorstore(
+        embeddings=embeddings_OpenAI,
+        documents = chunks,
+        vectorstore_name="Vit_All_OpenAI_Embeddings",
+    )
+    print("vector_store_OpenAI:",vector_store_OpenAI._collection.count(),"chunks.")
+    vector_store_google,new_vectorstore_name = create_vectorstore(
+        embeddings=embeddings_google,
+        documents = chunks,
+        vectorstore_name="Vit_All_Google_Embeddings"
+    )
+    print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
+    """
+    vector_store_HF = create_vectorstore(
+        embeddings=embeddings_HuggingFace,
+        documents = chunks,
+        vectorstore_name="Vit_All_HF_Embeddings"
+    )
+    print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.")
+    print("")
+"""
+vector_store_OpenAI = Chroma(
+    persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_OpenAI_Embeddings",
+    embedding_function=embeddings_OpenAI)
+print("vector_store_OpenAI:",vector_store_OpenAI._collection.count(),"chunks.")
+vector_store_google = Chroma(
+    persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_Google_Embeddings",
+    embedding_function=embeddings_google)
+print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
+"""
+vector_store_HF = Chroma(
+    persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_HF_Embeddings",
+    embedding_function=embeddings_HuggingFace)
+print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.")
+def Vectorstore_backed_retriever(
+vectorstore,search_type="similarity",k=4,score_threshold=None
+):
+    """create a vectorsore-backed retriever
+    Parameters:
+        search_type: Defines the type of search that the Retriever should perform.
+            Can be "similarity" (default), "mmr", or "similarity_score_threshold"
+        k: number of documents to return (Default: 4)
+        score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None)
+    """
+    search_kwargs={}
+    if k is not None:
+        search_kwargs['k'] = k
+    if score_threshold is not None:
+        search_kwargs['score_threshold'] = score_threshold
+    retriever = vectorstore.as_retriever(
+        search_type=search_type,
+        search_kwargs=search_kwargs
+    )
+    return retriever
+# similarity search
+#base_retriever_OpenAI = Vectorstore_backed_retriever(vector_store_OpenAI,"similarity",k=10)
+#base_retriever_google = Vectorstore_backed_retriever(vector_store_google,"similarity",k=10)
+base_retriever_HF = Vectorstore_backed_retriever(vector_store_HF,"similarity",k=10)
+def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None):
+    """Build a ContextualCompressionRetriever.
+    We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever.
+    The compressor here is a Document Compressor Pipeline, which splits documents
+    into smaller chunks, removes redundant documents, filters out the most relevant documents,
+    and reorder the documents so that the most relevant are at the top and bottom of the list.
+    Parameters:
+        embeddings: OpenAIEmbeddings, GoogleGenerativeAIEmbeddings or HuggingFaceInferenceAPIEmbeddings.
+        base_retriever: a vectorstore-backed retriever.
+        chunk_size (int): Documents will be splitted into smaller chunks using a CharacterTextSplitter with a default chunk_size of 500.
+        k (int): top k relevant chunks to the query are filtered using the EmbeddingsFilter. default =16.
+        similarity_threshold : minimum relevance threshold used by the EmbeddingsFilter. default =None.
+    """
+    # 1. splitting documents into smaller chunks
+    splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0, separator=". ")
+    # 2. removing redundant documents
+    redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
+    # 3. filtering based on relevance to the query
+    relevant_filter = EmbeddingsFilter(embeddings=embeddings, k=k, similarity_threshold=similarity_threshold) # similarity_threshold and top K
+    # 4. Reorder the documents
+    # Less relevant document will be at the middle of the list and more relevant elements at the beginning or end of the list.
+    # Reference: https://python.langchain.com/docs/modules/data_connection/retrievers/long_context_reorder
+    reordering = LongContextReorder()
+    # 5. Create compressor pipeline and retriever
+    pipeline_compressor = DocumentCompressorPipeline(
+        transformers=[splitter, redundant_filter, relevant_filter, reordering]
+    )
+    compression_retriever = ContextualCompressionRetriever(
+        base_compressor=pipeline_compressor,
+        base_retriever=base_retriever
+    )
+    return compression_retriever
+def CohereRerank_retriever(
+    base_retriever,
+    cohere_api_key,cohere_model="rerank-multilingual-v2.0", top_n=8
+):
+    """Build a ContextualCompressionRetriever using Cohere Rerank endpoint to reorder the results based on relevance.
+    Parameters:
+       base_retriever: a Vectorstore-backed retriever
+       cohere_api_key: the Cohere API key
+       cohere_model: The Cohere model can be either 'rerank-english-v2.0' or 'rerank-multilingual-v2.0', with the latter being the default.
+       top_n: top n results returned by Cohere rerank, default = 8.
+    """
+    compressor = CohereRerank(
+        cohere_api_key=cohere_api_key,
+        model=cohere_model,
+        top_n=top_n
+    )
+    retriever_Cohere = ContextualCompressionRetriever(
+        base_compressor=compressor,
+        base_retriever=base_retriever
+    )
+    return retriever_Cohere
+def instantiate_LLM(LLM_provider,api_key,temperature=0.5,top_p=0.95,model_name=None):
+    """Instantiate LLM in Langchain.
+    Parameters:
+        LLM_provider (str): the LLM provider; in ["OpenAI","Google","HuggingFace"]
+        model_name (str): in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview",
+            "gemini-pro", "mistralai/Mistral-7B-Instruct-v0.2"].
+        api_key (str): google_api_key or openai_api_key or huggingfacehub_api_token
+        temperature (float): Range: 0.0 - 1.0; default = 0.5
+        top_p (float): : Range: 0.0 - 1.0; default = 1.
+    """
+    if LLM_provider == "OpenAI":
+        llm = ChatOpenAI(
+            api_key=api_key,
+            model=model_name, # in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview"]
+            temperature=temperature,
+            model_kwargs={
+                "top_p": top_p
+            }
+        )
+    if LLM_provider == "Google":
+        llm = ChatGoogleGenerativeAI(
+            google_api_key=api_key,
+            model=gemini-pro, # "gemini-pro"
+            temperature=temperature,
+            top_p=top_p,
+            convert_system_message_to_human=True
+        )
+    if LLM_provider == "HuggingFace":
+        llm = HuggingFaceHub(
+            repo_id=mistralai/Mistral-7B-Instruct-v0.2, # "mistralai/Mistral-7B-Instruct-v0.2"
+            huggingfacehub_api_token=api_key,
+            model_kwargs={
+                "temperature":temperature,
+                "top_p": top_p,
+                "do_sample": True,
+                "max_new_tokens":1024
+            },
+        )
+    return llm
+def get_environment_variable(key):
+    if key in os.environ:
+        value = os.environ.get(key)
+        print(f"\n[INFO]: {key} retrieved successfully.")
+    else :
+        print(f"\n[ERROR]: {key} is not found in your environment variables.")
+        value = getpass(f"Insert your {key}")
+    return value
+openai_api_key = os.environ['openai_key']
+google_api_key = os.environ['gemini_key']
+HF_key = os.environ['HF_token']
+cohere_api_key = os.environ['cohere_api']
+def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None):
+    """Creates a ConversationSummaryBufferMemory for gpt-3.5-turbo.
+    Creates a ConversationBufferMemory for the other models."""
+    if model_name=="gpt-3.5-turbo":
+        if memory_max_token is None:
+            memory_max_token = 1024 # max_tokens for 'gpt-3.5-turbo' = 4096
+        memory = ConversationSummaryBufferMemory(
+            max_token_limit=memory_max_token,
+            llm=ChatOpenAI(model_name="gpt-3.5-turbo",openai_api_key=openai_api_key,temperature=0.1),
+            return_messages=True,
+            memory_key='chat_history',
+            output_key="answer",
+            input_key="question"
+        )
+    else:
+        memory = ConversationBufferMemory(
+            return_messages=True,
+            memory_key='chat_history',
+            output_key="answer",
+            input_key="question",
+        )
+    return memory
+memory.save_context(inputs={"question":"..."},outputs={"answer":"...."}
+standalone_question_template = """Given the following conversation and a follow up question,
+rephrase the follow up question to be a standalone question, in its original language.\n\n
+Chat History:\n{chat_history}\n
+Follow Up Input: {question}\n
+Standalone question:"""
+standalone_question_prompt = PromptTemplate(
+    input_variables=['chat_history', 'question'],
+    template=standalone_question_template
+)
+def answer_template(language="english"):
+    """Pass the standalone question along with the chat history and context
+    to the `LLM` wihch will answer"""
+    template = f"""Answer the question at the end, using only the following context (delimited by <context></context>).
+Your answer must be in the language at the end.
+<context>
+{{chat_history}}
+{{context}}
+</context>
+Question: {{question}}
+Language: {language}.
+"""
+    return template
+chain = ConversationalRetrievalChain.from_llm(
+    condense_question_prompt=standalone_question_prompt,
+    combine_docs_chain_kwargs={'prompt': answer_prompt},
+    condense_question_llm=instantiate_LLM(
+        LLM_provider="Google",api_key=HF_key,temperature=0.1,
+        model_name="gemini-pro"),
+    memory=create_memory("gemini-pro"),
+    retriever = retriever,
+    llm=instantiate_LLM(
+        LLM_provider="Google",api_key=HF_key,temperature=0.5,
+        model_name="gemini-pro"),
+    chain_type= "stuff",
+    verbose= False,
+    return_source_documents=True
+)
+# 1. load memory using RunnableLambda. Retrieves the chat_history attribute using itemgetter.
+# `RunnablePassthrough.assign` adds the chat_history to the assign function
+loaded_memory = RunnablePassthrough.assign(
+    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("chat_history"),
+)
+# 2. Pass the follow-up question along with the chat history to the LLM, and parse the answer (standalone_question).
+condense_question_prompt = PromptTemplate(
+    input_variables=['chat_history', 'question'],
+    template=standalone_question_template
+)
+condense_question_llm = instantiate_LLM(
+    LLM_provider="Google",api_key=google_api_key,temperature=0.1,
+    model_name="gemini-pro"
+)
+standalone_question_chain = {
+    "standalone_question": {
+        "question": lambda x: x["question"],
+        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
+    }
+    | condense_question_prompt
+    | condense_question_llm
+    | StrOutputParser(),
+}
+# 3. Combine load_memory and standalone_question_chain
+chain_question = loaded_memory | standalone_question_chain
+memory.clear()
+memory.save_context(
+    {"question": "What does DTC stand for?"},
+    {"answer": "Diffuse to Choose."}
+)
+print("Chat history:\n",memory.load_memory_variables({}))
+follow_up_question = "plaese give more details about it, including its use cases and implementation."
+print("\nFollow-up question:\n",follow_up_question)
+# invoke chain_question
+response = chain_question.invoke({"question":follow_up_question})["standalone_question"]
+print("\nStandalone_question:\n",response)
+def _combine_documents(docs, document_prompt, document_separator="\n\n"):
+    doc_strings = [format_document(doc, document_prompt) for doc in docs]
+    return document_separator.join(doc_strings)
+# 1. Retrieve relevant documents
+retrieved_documents = {
+    "docs": itemgetter("standalone_question") | retriever,
+    "question": lambda x: x["standalone_question"],
+}
+# 2. Get variables ['chat_history', 'context', 'question'] that will be passed to `answer_prompt`
+DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
+answer_prompt = ChatPromptTemplate.from_template(answer_template()) # 3 variables are expected ['chat_history', 'context', 'question']
+answer_prompt_variables = {
+    "context": lambda x: _combine_documents(docs=x["docs"],document_prompt=DEFAULT_DOCUMENT_PROMPT),
+    "question": itemgetter("question"),
+    "chat_history": itemgetter("chat_history") # get chat_history from `loaded_memory` variable
+}
+llm = instantiate_LLM(
+    LLM_provider="Google",api_key=google_api_key,temperature=0.5,
+    model_name="gemini-pro"
+)
+# 3. Load memory, format `answer_prompt` with variables (context, question and chat_history) and pass the `answer_prompt to LLM.
+# return answer, docs and standalone_question
+chain_answer = {
+    "answer": loaded_memory | answer_prompt_variables | answer_prompt | llm,
+    "docs": lambda x: [
+        Document(page_content=doc.page_content,metadata=doc.metadata) # return only page_content and metadata
+        for doc in x["docs"]
+    ],
+    "standalone_question": lambda x:x["question"] # return standalone_question
+}
+conversational_retriever_chain = chain_question | retrieved_documents | chain_answer
+follow_up_question = "plaese give more details about it, including its use cases and implementation."
+response = conversational_retriever_chain.invoke({"question":follow_up_question})
+Markdown(response['answer'].content)
+memory.save_context(
+ {"question": follow_up_question},
+ {"answer": response['answer'].content}
+)
+css = """
+      #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
+      #chatbox {min-height: 400px;}
+      #header {text-align: center;}
+      #prompt_template_preview {padding: 1em; border-width: 1px; border-style: solid; border-color: #e0e0e0; border-radius: 4px; min-height: 150px;}
+      #total_tokens_str {text-align: right; font-size: 0.8em; color: #666;}
+      #label {font-size: 0.8em; padding: 0.5em; margin: 0;}
+      .message { font-size: 1.2em; }
+      """
+with gr.Blocks(css=css) as demo:
+    state = gr.State(get_empty_state())
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown("""## Ask questions of *needs assessment* experts,
+                    ## get responses from a *needs assessment experts* version of ChatGPT.
+                    Ask questions of all of them, or pick your expert below.
+                    This is a free resource but it does cost us money to run. Unfortunately someone has been abusing this approach.
+                    In response, we have had to temporarily turn it off until we can put improve the monitoring. Sorry for the inconvenience.""" ,
+                    elem_id="header")
+        with gr.Row():
+            with gr.Column():
+                chatbot = gr.Chatbot(elem_id="chatbox")
+                input_message = gr.Textbox(show_label=False, placeholder="Enter your needs assessment question", visible=True).style(container=False)
+                btn_submit = gr.Button("Submit")
+                #total_tokens_str = gr.Markdown(elem_id="total_tokens_str")
+                btn_clear_conversation = gr.Button("Start New Conversation")
+            with gr.Column():
+                prompt_template = gr.Dropdown(label="Choose an Expert:", choices=list(prompt_templates.keys()))
+                prompt_template_preview = gr.Markdown(elem_id="prompt_template_preview")
+                with gr.Accordion("Advanced parameters", open=False):
+                    temperature = gr.Slider(minimum=0, maximum=2.0, value=0.7, step=0.1, label="Flexibility", info="Higher = More AI, Lower = More Expert")
+                    max_tokens = gr.Slider(minimum=100, maximum=400, value=200, step=1, label="Length of Response.")
+                    context_length = gr.Slider(minimum=1, maximum=5, value=2, step=1, label="Context Length", info="Number of previous questions you have asked.")
+    btn_submit.click(submit_message, [ input_message, prompt_template, temperature, max_tokens, context_length, state], [input_message, chatbot,  state])
+    input_message.submit(submit_message, [ input_message, prompt_template, temperature, max_tokens, context_length, state], [input_message, chatbot,  state])
+    btn_clear_conversation.click(clear_conversation, [], [input_message, chatbot,  state])
+    prompt_template.change(on_prompt_template_change_description, inputs=[prompt_template], outputs=[prompt_template_preview])
+    demo.load(download_prompt_templates, inputs=None, outputs=[prompt_template], queur=False)
+demo.queue(concurrency_count=10)
+demo.launch(height='800px')