Spaces:

ryanrwatkins
/

needs

Runtime error

App Files Files Community

ryanrwatkins commited on Apr 25, 2024

Commit

1e1ca0a

verified ·

1 Parent(s): df1c955

Create app_backup.py

Browse files

Files changed (1) hide show

app_backup.py +698 -0

app_backup.py ADDED Viewed

	@@ -0,0 +1,698 @@

+#https://medium.com/thedeephub/rag-chatbot-powered-by-langchain-openai-google-generative-ai-and-hugging-face-apis-6a9b9d7d59db
+#https://github.com/AlaGrine/RAG_chatabot_with_Langchain/blob/main/RAG_notebook.ipynb
+from langchain_community.document_loaders import (
+    PyPDFLoader,
+    TextLoader,
+    DirectoryLoader,
+    CSVLoader,
+    UnstructuredExcelLoader,
+    Docx2txtLoader,
+)
+from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
+import tiktoken
+import gradio as gr
+import csv
+import os, tempfile, glob, random
+from pathlib import Path
+#from IPython.display import Markdown
+from PIL import Image
+from getpass import getpass
+import numpy as np
+from itertools import combinations
+import pypdf
+import requests
+# LLM: openai and google_genai
+import openai
+from langchain_openai import OpenAI, OpenAIEmbeddings, ChatOpenAI
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+# LLM: HuggingFace
+from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
+from langchain_community.llms import HuggingFaceHub
+# langchain prompts, memory, chains...
+from langchain.prompts import PromptTemplate, ChatPromptTemplate
+from langchain.chains import ConversationalRetrievalChain
+from langchain_community.chat_message_histories import StreamlitChatMessageHistory
+from operator import itemgetter
+from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough
+from langchain.schema import Document, format_document
+from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
+from langchain_google_genai import (
+    ChatGoogleGenerativeAI,
+    HarmBlockThreshold,
+    HarmCategory,
+)
+# OutputParser
+from langchain_core.output_parsers import StrOutputParser
+# Chroma: vectorstore
+from langchain_community.vectorstores import Chroma
+# Contextual Compression
+from langchain.retrievers.document_compressors import DocumentCompressorPipeline
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.document_transformers import EmbeddingsRedundantFilter,LongContextReorder
+from langchain.retrievers.document_compressors import EmbeddingsFilter
+from langchain.retrievers import ContextualCompressionRetriever
+from langchain.retrievers import ContextualCompressionRetriever
+from langchain.retrievers.document_compressors import CohereRerank
+from langchain_community.llms import Cohere
+from langchain.memory import ConversationSummaryBufferMemory,ConversationBufferMemory
+from langchain.schema import Document
+# Cohere (not currently in use)
+from langchain.retrievers.document_compressors import CohereRerank
+from langchain_community.llms import Cohere
+# Get API keys
+openai_api_key = os.environ['openai_key']
+google_api_key = os.environ['gemini_key']
+HF_key = os.environ['HF_token']
+cohere_api_key = os.environ['cohere_api']
+current_dir = os.getcwd()
+# Not currently in use
+prompt_templates = {"All Needs Experts": "Respond as if you are combination of all needs assessment experts."}
+actor_description = {"All Needs Experts": "<div style='float: left;margin: 0px 5px 0px 5px;'><img src='https://na.weshareresearch.com/wp-content/uploads/2023/04/experts2.jpg' alt='needs expert image' style='width:70px;align:top;'></div>A combination of all needs assessment experts."}
+# Initiates the UI features
+def get_empty_state():
+    return { "messages": []}
+def download_prompt_templates():
+    url = "https://huggingface.co/spaces/ryanrwatkins/needs/raw/main/gurus.txt"
+    try:
+        response = requests.get(url)
+        reader = csv.reader(response.text.splitlines())
+        next(reader)  # skip the header row
+        for row in reader:
+            if len(row) >= 2:
+                act = row[0].strip('"')
+                prompt = row[1].strip('"')
+                description = row[2].strip('"')
+                prompt_templates[act] = prompt
+                actor_description[act] = description
+    except requests.exceptions.RequestException as e:
+        print(f"An error occurred while downloading prompt templates: {e}")
+        return
+    choices = list(prompt_templates.keys())
+    choices = choices[:1] + sorted(choices[1:])
+    return gr.update(value=choices[0], choices=choices)
+def on_prompt_template_change(prompt_template):
+    if not isinstance(prompt_template, str): return
+    return prompt_templates[prompt_template]
+def on_prompt_template_change_description(prompt_template):
+    if not isinstance(prompt_template, str): return
+    return actor_description[prompt_template]
+# set to load only PDF, but could change to set to specific directory, so that other files don't get embeddings
+def langchain_document_loader():
+    """
+    Load documents from the temporary directory (TMP_DIR).
+    Files can be in txt, pdf, CSV or docx format.
+    """
+    #current_dir = os.getcwd()
+    #TMP_DIR = current_dir
+    global documents
+    documents = []
+    """
+    txt_loader = DirectoryLoader(
+        TMP_DIR.as_posix(), glob="**/*.txt", loader_cls=TextLoader, show_progress=True
+    )
+    documents.extend(txt_loader.load())
+    """
+    pdf_loader = DirectoryLoader(
+        current_dir, glob="*.pdf", loader_cls=PyPDFLoader, show_progress=True
+    )
+    documents.extend(pdf_loader.load())
+    """
+    csv_loader = DirectoryLoader(
+        TMP_DIR.as_posix(), glob="**/*.csv", loader_cls=CSVLoader, show_progress=True,
+        loader_kwargs={"encoding":"utf8"}
+    )
+    documents.extend(csv_loader.load())
+    doc_loader = DirectoryLoader(
+        #TMP_DIR.as_posix(),
+        current_dir,
+        glob="**/*.docx",
+        loader_cls=Docx2txtLoader,
+        show_progress=True,
+    )
+    documents.extend(doc_loader.load())
+    """
+    return documents
+langchain_document_loader()
+# Text splitting of the uploaded documents, the chunks will become vectors
+text_splitter = RecursiveCharacterTextSplitter(
+    separators = ["\n\n", "\n", " ", ""],
+    chunk_size = 1500,
+    chunk_overlap= 200
+)
+chunks = text_splitter.split_documents(documents=documents)
+# just FYI, does not impact anything it is just for information when re-starting the app
+def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
+    """Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document."""
+    encoding = tiktoken.encoding_for_model(model) # returns the encoding used by the model.
+    tokens_length = [len(encoding.encode(documents[i].page_content)) for i in range(len(documents))]
+    return tokens_length
+chunks_length = tiktoken_tokens(chunks,model="gpt-3.5-turbo")
+print(f"Number of tokens - Average : {int(np.mean(chunks_length))}")
+print(f"Number of tokens - 25% percentile : {int(np.quantile(chunks_length,0.25))}")
+print(f"Number of tokens - 50% percentile : {int(np.quantile(chunks_length,0.5))}")
+print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}")
+# For embeddings I am just using the free HF model so others are turned off
+def select_embeddings_model(LLM_service="HuggingFace"):
+    """Connect to the embeddings API endpoint by specifying
+    the name of the embedding model.
+    if LLM_service == "OpenAI":
+        embeddings = OpenAIEmbeddings(
+            model='text-embedding-ada-002',
+            api_key=openai_api_key)
+    """
+    """
+    if LLM_service == "Google":
+        embeddings = GoogleGenerativeAIEmbeddings(
+            model="models/embedding-001",
+            google_api_key=google_api_key,
+        )
+    """
+    if LLM_service == "HuggingFace":
+        embeddings = HuggingFaceInferenceAPIEmbeddings(
+            api_key=HF_key,
+            #model_name="thenlper/gte-large"
+            model_name="sentence-transformers/all-MiniLM-l6-v2"
+        )
+    print("embedding model selected")
+    return embeddings
+#embeddings_OpenAI = select_embeddings_model(LLM_service="OpenAI")
+#embeddings_google = select_embeddings_model(LLM_service="Google")
+embeddings_HuggingFace = select_embeddings_model(LLM_service="HuggingFace")
+# Creates the Database that will hold the embedding vectors
+def create_vectorstore(embeddings,documents,vectorstore_name):
+    """Create a Chroma vector database."""
+    persist_directory = (current_dir + "/" + vectorstore_name)
+    embedding_function=embeddings
+    vector_store = Chroma.from_documents(
+        documents=documents,
+        embedding=embeddings,
+        persist_directory=persist_directory
+    )
+    print("created Chroma vector database")
+    return vector_store
+create_vectorstores = True # change to True to create vectorstores
+# Then we tell it to store the embeddings in the VectorStore (stickiong with HF for this)
+if create_vectorstores:
+    """
+    vector_store_OpenAI,_ = create_vectorstore(
+        embeddings=embeddings_OpenAI,
+        documents = chunks,
+        vectorstore_name="Vit_All_OpenAI_Embeddings",
+    )
+    print("vector_store_OpenAI:",vector_store_OpenAI._collection.count(),"chunks.")
+    """
+    """
+    vector_store_google,new_vectorstore_name = create_vectorstore(
+        embeddings=embeddings_google,
+        documents = chunks,
+        vectorstore_name="Vit_All_Google_Embeddings"
+    )
+    print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
+    """
+    vector_store_HF = create_vectorstore(
+        embeddings=embeddings_HuggingFace,
+        documents = chunks,
+        vectorstore_name="Vit_All_HF_Embeddings"
+    )
+    print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.")
+    print("")
+# Now we tell it to keep the chromadb persistent so that it can be referenced at any time
+"""
+vector_store_OpenAI = Chroma(
+    persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_OpenAI_Embeddings",
+    embedding_function=embeddings_OpenAI)
+print("vector_store_OpenAI:",vector_store_OpenAI._collection.count(),"chunks.")
+"""
+"""
+vector_store_google = Chroma(
+    persist_directory = current_dir + "/Vit_All_Google_Embeddings",
+    embedding_function=embeddings_google)
+print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
+"""
+vector_store_HF = Chroma(
+    persist_directory = current_dir + "/Vit_All_HF_Embeddings",
+    embedding_function=embeddings_HuggingFace)
+print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.")
+# Now we create the code to retrieve embeddings from the vectorstore (again, sticking with HF)
+def Vectorstore_backed_retriever(
+vectorstore,search_type="similarity",k=10,score_threshold=None
+):
+    """create a vectorsore-backed retriever
+    Parameters:
+        search_type: Defines the type of search that the Retriever should perform.
+            Can be "similarity" (default), "mmr", or "similarity_score_threshold"
+        k: number of documents to return (Default: 4)
+        score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None)
+    """
+    print("vector_backed retriever started")
+    search_kwargs={}
+    if k is not None:
+        search_kwargs['k'] = k
+    if score_threshold is not None:
+        search_kwargs['score_threshold'] = score_threshold
+    global retriever
+    retriever = vectorstore.as_retriever(
+        search_type=search_type,
+        search_kwargs=search_kwargs
+    )
+    print("vector_backed retriever done")
+    return retriever
+# similarity search
+#base_retriever_OpenAI = Vectorstore_backed_retriever(vector_store_OpenAI,"similarity",k=10)
+#base_retriever_google = Vectorstore_backed_retriever(vector_store_google,"similarity",k=10)
+base_retriever_HF = Vectorstore_backed_retriever(vector_store_HF,"similarity",k=10)
+# This next code takes the retrieved embeddings, gets rid of redundant ones, takes out non-useful information, and provides back a shorter embedding for use
+def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None):
+    """Build a ContextualCompressionRetriever.
+    We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever.
+    The compressor here is a Document Compressor Pipeline, which splits documents
+    into smaller chunks, removes redundant documents, filters out the most relevant documents,
+    and reorder the documents so that the most relevant are at the top and bottom of the list.
+    Parameters:
+        embeddings: OpenAIEmbeddings, GoogleGenerativeAIEmbeddings or HuggingFaceInferenceAPIEmbeddings.
+        base_retriever: a vectorstore-backed retriever.
+        chunk_size (int): Documents will be splitted into smaller chunks using a CharacterTextSplitter with a default chunk_size of 500.
+        k (int): top k relevant chunks to the query are filtered using the EmbeddingsFilter. default =16.
+        similarity_threshold : minimum relevance threshold used by the EmbeddingsFilter. default =None.
+    """
+    print("compression retriever started")
+    # 1. splitting documents into smaller chunks
+    splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0, separator=". ")
+    # 2. removing redundant documents
+    redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
+    # 3. filtering based on relevance to the query
+    relevant_filter = EmbeddingsFilter(embeddings=embeddings, k=k, similarity_threshold=similarity_threshold) # similarity_threshold and top K
+    # 4. Reorder the documents
+    # Less relevant document will be at the middle of the list and more relevant elements at the beginning or end of the list.
+    # Reference: https://python.langchain.com/docs/modules/data_connection/retrievers/long_context_reorder
+    reordering = LongContextReorder()
+    # 5. Create compressor pipeline and retriever
+    pipeline_compressor = DocumentCompressorPipeline(
+        transformers=[splitter, redundant_filter, relevant_filter, reordering]
+    )
+    compression_retriever = ContextualCompressionRetriever(
+        base_compressor=pipeline_compressor,
+        base_retriever=base_retriever
+    )
+    print("compression retriever done")
+    return compression_retriever
+compression_retriever_HF = create_compression_retriever(
+    embeddings=embeddings_HuggingFace,
+    base_retriever=base_retriever_HF,
+    k=16)
+# Can use the following to rank the returned embeddings in order of relevance but all are used anyway so I am skipping for now (can test later)
+'''
+def CohereRerank_retriever(
+    base_retriever,
+    cohere_api_key,cohere_model="rerank-multilingual-v2.0", top_n=8
+):
+    """Build a ContextualCompressionRetriever using Cohere Rerank endpoint to reorder the results based on relevance.
+    Parameters:
+       base_retriever: a Vectorstore-backed retriever
+       cohere_api_key: the Cohere API key
+       cohere_model: The Cohere model can be either 'rerank-english-v2.0' or 'rerank-multilingual-v2.0', with the latter being the default.
+       top_n: top n results returned by Cohere rerank, default = 8.
+    """
+    print("cohere rerank started")
+    compressor = CohereRerank(
+        cohere_api_key=cohere_api_key,
+        model=cohere_model,
+        top_n=top_n
+    )
+    retriever_Cohere = ContextualCompressionRetriever(
+        base_compressor=compressor,
+        base_retriever=base_retriever
+    )
+    print("cohere rerank done")
+    return retriever_Cohere
+'''
+# Can use any of these LLMs for responses, for now I am Gemini-Pro for the bot  (this is for responses now, not embeddings)
+def instantiate_LLM(LLM_provider,api_key,temperature=0.8,top_p=0.95,model_name=None):
+    """Instantiate LLM in Langchain.
+    Parameters:
+        LLM_provider (str): the LLM provider; in ["OpenAI","Google","HuggingFace"]
+        model_name (str): in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview",
+            "gemini-pro", "mistralai/Mistral-7B-Instruct-v0.2"].
+        api_key (str): google_api_key or openai_api_key or huggingfacehub_api_token
+        temperature (float): Range: 0.0 - 1.0; default = 0.5
+        top_p (float): : Range: 0.0 - 1.0; default = 1.
+    """
+    if LLM_provider == "OpenAI":
+        llm = ChatOpenAI(
+            api_key=api_key,
+            model="gpt-3.5-turbo", # in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview"]
+            temperature=temperature,
+            model_kwargs={
+                "top_p": top_p
+            }
+        )
+    if LLM_provider == "Google":
+        llm = ChatGoogleGenerativeAI(
+            google_api_key=api_key,
+            model="gemini-pro", # "gemini-pro"
+            temperature=temperature,
+            top_p=top_p,
+            convert_system_message_to_human=True,
+            safety_settings={
+                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
+                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
+                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
+                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
+                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE},
+        )
+    if LLM_provider == "HuggingFace":
+        llm = HuggingFaceHub(
+            repo_id="mistralai/Mistral-7B-Instruct-v0.2", # "mistralai/Mistral-7B-Instruct-v0.2"
+            huggingfacehub_api_token=api_key,
+            model_kwargs={
+                "temperature":temperature,
+                "top_p": top_p,
+                "do_sample": True,
+                "max_new_tokens":1024
+            },
+        )
+    return llm
+#  This creates history (memory) of prior questions. I am using Gemini for this but I left the code if I decide to go to GPT later on.
+def create_memory(model_name='gemini-pro',memory_max_token=None):
+#def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None):
+    """Creates a ConversationSummaryBufferMemory for gpt-3.5-turbo.
+    Creates a ConversationBufferMemory for the other models."""
+    if model_name=="gpt-3.5-turbo":
+        if memory_max_token is None:
+            memory_max_token = 1024 # max_tokens for 'gpt-3.5-turbo' = 4096
+        memory = ConversationSummaryBufferMemory(
+            max_token_limit=memory_max_token,
+            llm=ChatOpenAI(model_name="gpt-3.5-turbo",openai_api_key=openai_api_key,temperature=0.1),
+            return_messages=True,
+            memory_key='chat_history',
+            output_key="answer",
+            input_key="question"
+        )
+    else:
+        memory = ConversationBufferMemory(
+            return_messages=True,
+            memory_key='chat_history',
+            output_key="answer",
+            input_key="question",
+        )
+    return memory
+# Set a small memory_max_token, just to show how older messages are summarized if max_token_limit is exceeded.
+memory = create_memory(model_name='gemini-pro',memory_max_token=None)
+#memory = create_memory(model_name='gpt-3.5-turbo',memory_max_token=20)
+# save history as context for the conversation
+memory.save_context(
+    inputs={"question":"sample"},
+    outputs={"answer":"sample"}
+)
+# loads the template above
+memory.load_memory_variables({})
+# Create the prompt template for the conversation
+standalone_question_template = """Given the following conversation and a follow up question,
+rephrase the follow up question to be a standalone question, in the English language.\n\n
+Chat History:\n{chat_history}\n
+Follow Up Input: {question}\n
+Standalone question: {question}"""
+#standalone_question_prompt = PromptTemplate(
+#    input_variables=['chat_history', 'question'],
+#    template=standalone_question_template
+#)
+def answer_template(language="english"):
+    """Pass the standalone question along with the chat history and context
+    to the `LLM` which will answer"""
+    template = f"""You are a professor who is an expert in needs assessment.
+    Answer the question at the end (convert the queestion to {language} language if it is not). But do not include the question in the response.
+    Use only the following context (delimited by <context></context>) in responding to the question.
+    Be polite and end by asking if you can answer any other questions.
+    If you can't answer the question, then you should say that it is not within your knowledge base and that you can only answer needs assessment related questions.
+    Your answer must be in the language at the end.
+    <context>
+    {{chat_history}}
+    {{context}}
+    </context>
+    Question: {{question}}
+    Language: {language}.
+    """
+    return template
+answer_prompt = ChatPromptTemplate.from_template(answer_template())
+# This begins the whole process and gives the parameters
+chain = ConversationalRetrievalChain.from_llm(
+    condense_question_prompt=PromptTemplate(
+        input_variables=['chat_history', 'question'],
+        template=standalone_question_template),
+    combine_docs_chain_kwargs={'prompt': answer_prompt},
+    condense_question_llm=instantiate_LLM(
+        LLM_provider="Google",api_key=google_api_key,temperature=0.3,
+        model_name="gemini-pro"),
+    memory=create_memory("gemini-pro"),
+    retriever = compression_retriever_HF,
+    #retriever = base_retriever_HF,  #base_retriever_HF
+    llm=instantiate_LLM(
+        LLM_provider="Google",api_key=google_api_key,temperature=0.8,
+        model_name="gemini-pro"),
+    chain_type= "stuff",
+    verbose= True,
+    return_source_documents=True
+)
+# This below is for the interface
+def submit_message(prompt, prompt_template, temperature, max_tokens, context_length, state):
+    history = state['messages']
+    # this could be used later if I want to let users set it to different experts and use different documents based on preferred expert
+    #global prompt_template_name
+    #prompt_template_name = prompt_template
+    #print(prompt_template)  # prints who is responding if I move to multiple experts
+    #print(prompt_templates[prompt_template])
+    completion = chain.invoke({"question":prompt})
+    chain.memory.load_memory_variables({})
+    get_empty_state()
+    state['content'] = completion
+    #state.append(completion.copy())
+    completion = { "content": completion }
+    print("Prompt/question:", prompt)
+    answer = completion['content']['answer']
+    print("Answer:", answer)
+    print("Embeddings utlized:")
+    for document in completion['content']['source_documents']:
+        page_content = document.page_content  # Use dot notation to access an attribute
+        print("Embedding_content:", page_content)
+        metadata = document.metadata  # Use dot notation to access an attribute
+        print("Metadata:", metadata)
+        similarity_score = document.state['query_similarity_score']
+        print("Similarity_score:", similarity_score)
+        print("")
+    highest_similarity_score = -1  # Initialize with a score lower than possible
+    selected_document = None  # To hold the document with the highest similarity score
+    for document in completion['content']['source_documents']:
+        if document.state['query_similarity_score'] > highest_similarity_score:
+            highest_similarity_score = document.state['query_similarity_score']
+            selected_document = document
+    if selected_document is not None:
+        # Remove the "/home/user/app/" part from the document name
+        modified_source = selected_document.metadata['source'].replace('/home/user/app/', '').replace('.pdf', '')
+        source_info = f"\n**Lead source:** {modified_source}, **Page:** {selected_document.metadata['page']} "
+    else:
+        source_info = "Lead source: not determined"
+    #chat_messages = [(prompt_msg['content'], completion['content'])]
+    chat_messages = [(prompt, completion['content']['answer'] + source_info )]
+    return '', chat_messages,  state    # total_tokens_used_msg,
+def clear_conversation():
+    return gr.update(value=None, visible=True), None, "", get_empty_state()
+css = """
+      #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
+      #chatbox {min-height: 400px;}
+      #header {text-align: center;}
+      #prompt_template_preview {padding: 1em; border-width: 1px; border-style: solid; border-color: #e0e0e0; border-radius: 4px; min-height: 150px;}
+      #total_tokens_str {text-align: right; font-size: 0.8em; color: #666;}
+      #label {font-size: 0.8em; padding: 0.5em; margin: 0;}
+      .message { font-size: 1.2em; }
+      """
+with gr.Blocks(css=css) as demo:
+    state = gr.State(get_empty_state())
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown("""## Ask questions of our *needs assessment* bot! \n
+                        **It is specially trained to only answer needs assessment related questions.**
+                    """ ,
+                    elem_id="header")
+        with gr.Row():
+            with gr.Column():
+                chatbot = gr.Chatbot(elem_id="chatbox")
+                input_message = gr.Textbox(show_label=False, placeholder="Enter your needs assessment question", visible=True).style(container=False)
+                btn_submit = gr.Button("Submit")
+                #total_tokens_str = gr.Markdown(elem_id="total_tokens_str")
+                btn_clear_conversation = gr.Button("Start New Conversation", visible=False)
+            with gr.Column(visible=False):
+                prompt_template = gr.Dropdown(label="Choose an Expert:", choices=list(prompt_templates.keys()))
+                prompt_template_preview = gr.Markdown(elem_id="prompt_template_preview")
+                with gr.Accordion("Advanced parameters", open=False):
+                    temperature = gr.Slider(minimum=0, maximum=2.0, value=0.7, step=0.1, label="Flexibility", info="Higher = More AI, Lower = More Expert")
+                    max_tokens = gr.Slider(minimum=100, maximum=400, value=200, step=1, label="Length of Response.")
+                    context_length = gr.Slider(minimum=1, maximum=5, value=2, step=1, label="Context Length", info="Number of previous questions you have asked.")
+    btn_submit.click(submit_message, [ input_message, prompt_template, temperature, max_tokens, context_length, state], [input_message, chatbot,  state])
+    input_message.submit(submit_message, [ input_message, prompt_template, temperature, max_tokens, context_length, state], [input_message, chatbot,  state])
+    btn_clear_conversation.click(clear_conversation, [], [input_message, chatbot,  state])
+    prompt_template.change(on_prompt_template_change_description, inputs=[prompt_template], outputs=[prompt_template_preview])
+    demo.load(download_prompt_templates, inputs=None, outputs=[prompt_template], queur=False)
+demo.queue(concurrency_count=10)
+demo.launch(height='800px')