Andrew Lai commited on
Commit
61ac6e5
·
1 Parent(s): 5848cc6
Files changed (8) hide show
  1. .gitattributes +2 -0
  2. Dockerfile +13 -0
  3. README.md +4 -4
  4. app.py +109 -0
  5. data.csv +3 -0
  6. key.env +3 -0
  7. pre-requirements.txt +2 -0
  8. requirements.txt +16 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.psd filter=lfs diff=lfs merge=lfs -text
37
+ *.csv filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ WORKDIR $HOME/app
7
+ COPY --chown=user . $HOME/app
8
+ COPY ./pre-requirements.txt ~/app/pre-requirements.txt
9
+ RUN pip install -r ~/app/pre-requirements.txt
10
+ COPY ./requirements.txt ~/app/requirements.txt
11
+ RUN pip install -r ~/app/requirements.txt
12
+ COPY . .
13
+ CMD ["chainlit", "run", "app.py", "--port", "7862"]
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Proj2
3
- emoji: 🏆
4
- colorFrom: purple
5
- colorTo: green
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
 
1
  ---
2
+ title: Proj2rag
3
+ emoji: 🐨
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ from huggingface_hub import list_datasets
3
+ from google.colab import userdata
4
+ from langchain import OpenAI, LLMMathChain, SerpAPIWrapper
5
+ from langchain.agents import initialize_agent, Tool, AgentExecutor
6
+ from langchain_community.chat_models import ChatOpenAI
7
+ import os
8
+ import chainlit as cl
9
+ import openai
10
+ from google.colab import userdata
11
+ from dotenv import load_dotenv
12
+ from langchain_community.document_loaders import TextLoader
13
+ from langchain_community.document_loaders.csv_loader import CSVLoader
14
+ from langchain_community.vectorstores import FAISS
15
+ from langchain.storage import LocalFileStore
16
+ from langchain.prompts import ChatPromptTemplate
17
+ from langchain_openai import ChatOpenAI
18
+ from langchain.schema.runnable import RunnableMap
19
+ from langchain.schema.output_parser import StrOutputParser
20
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
21
+ from langchain.output_parsers import ResponseSchema, StructuredOutputParser
22
+ import pandas as pd
23
+ from langchain_openai import OpenAIEmbeddings
24
+ import openai
25
+ import asyncio
26
+ from dotenv import dotenv_values
27
+
28
+ # get keys
29
+ my_secrets = dotenv_values("key.env")
30
+
31
+ #load the csv
32
+ loader = TextLoader('data.csv')
33
+ documents = loader.load()
34
+
35
+ #split using recursive text splitter
36
+ text_splitter = RecursiveCharacterTextSplitter(
37
+ chunk_size=1000,
38
+ chunk_overlap=100,
39
+ length_function=len,
40
+ is_separator_regex=False,
41
+ )
42
+
43
+ docs = text_splitter.split_documents(documents)
44
+
45
+ # create embeddings
46
+ underlying_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002",api_key=my_secrets["OPEN_API_KEY"])
47
+ db = FAISS.from_documents(docs, underlying_embeddings)
48
+
49
+ # Get the retriever for the Chat Model
50
+ retriever = db.as_retriever(
51
+ search_kwargs={"k": 10}
52
+ )
53
+
54
+
55
+ @cl.on_chat_start
56
+ def start():
57
+
58
+ # Create the prompt template make sure it doesn't return data not in rag
59
+ template = """
60
+ You're a helpful AI assistent tasked to answer the user's questions about movies.
61
+ You can only make conversations based on the provided context about movies. If a response cannot be formed strictly using the context, politely say you don’t have knowledge about that topic under new line character 'ANSWER:' tag which is prefixed with new line character.
62
+
63
+ Remember, you must return both an answer under 'ANSWER:' tag which is prefixed with new line character and citations in line separated format of answer and bulleted list of citiations under 'CITATIONS:' tag. A citation consists of a VERBATIM quote that \
64
+ justifies the answer and the ID of the quoted article. Return a citation for every quote across all articles \
65
+ that justify the answer. Add a new line character after all citations. Use the following format for your final output:
66
+
67
+ new line character
68
+ ANSWER:
69
+
70
+ CITATIONS:
71
+ new line character
72
+
73
+ CONTEXT:
74
+ {context}
75
+
76
+ QUESTION: {question}
77
+
78
+ YOUR ANSWER:
79
+ """
80
+
81
+ prompt = ChatPromptTemplate.from_messages([("system", template)])
82
+
83
+ llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, api_key=my_secrets["OPEN_API_KEY"])
84
+
85
+ # Define the chain
86
+ inputs = RunnableMap({
87
+ 'context': lambda x: retriever.get_relevant_documents(x['question']),
88
+ 'question': lambda x: x['question']
89
+ })
90
+
91
+ #create runnable chain
92
+ runnable_chain = (
93
+ inputs |
94
+ prompt |
95
+ llm |
96
+ StrOutputParser()
97
+ )
98
+ cl.user_session.set("runnable_chain", runnable_chain)
99
+
100
+
101
+ @cl.on_message
102
+ async def on_message(message: cl.Message):
103
+ runnable_chain = cl.user_session.get("runnable_chain")
104
+ msg = message.content
105
+
106
+ result = runnable_chain.invoke({"question": msg})
107
+
108
+ #print(str(result))
109
+ await cl.Message(content=result).send()
data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4adc33bd9fe74303c344be46e5916d65182fb218e248fe80452ab3f025b06c64
3
+ size 2
key.env ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ HF_TOKEN=
2
+ NGROK_KEY=
3
+ OPEN_API_KEY=
pre-requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pip>=23.2
2
+ gradio_client==0.2.7
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ chainlit
3
+ langchain-openai
4
+ openai
5
+ chromadb
6
+ tiktoken
7
+ pymupdf
8
+ datasets
9
+ langchain_community
10
+ chainlit
11
+ pyngrok
12
+ openai
13
+ google-search-results
14
+ optimum
15
+ auto-gptq
16
+ faiss-gpu