Spaces:
Sleeping
Sleeping
Luca Foppiano
commited on
Commit
β’
9997b7b
1
Parent(s):
fcde626
fix env variables (#9)
Browse files* avoid writing env variables of api keys
- README.md +3 -3
- document_qa/document_qa_engine.py +1 -0
- requirements.txt +0 -1
- streamlit_app.py +28 -21
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: yellow
|
5 |
colorTo: pink
|
6 |
sdk: streamlit
|
@@ -10,7 +10,7 @@ pinned: false
|
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
-
# DocumentIQA: Scientific Document
|
14 |
|
15 |
**Work in progress** :construction_worker:
|
16 |
|
|
|
1 |
---
|
2 |
+
title: Scientific Document Insights Q/A
|
3 |
+
emoji: π
|
4 |
colorFrom: yellow
|
5 |
colorTo: pink
|
6 |
sdk: streamlit
|
|
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
+
# DocumentIQA: Scientific Document Insights Q/A
|
14 |
|
15 |
**Work in progress** :construction_worker:
|
16 |
|
document_qa/document_qa_engine.py
CHANGED
@@ -205,6 +205,7 @@ class DocumentQAEngine:
|
|
205 |
if doc_id:
|
206 |
hash = doc_id
|
207 |
else:
|
|
|
208 |
hash = metadata[0]['hash']
|
209 |
|
210 |
if hash not in self.embeddings_dict.keys():
|
|
|
205 |
if doc_id:
|
206 |
hash = doc_id
|
207 |
else:
|
208 |
+
|
209 |
hash = metadata[0]['hash']
|
210 |
|
211 |
if hash not in self.embeddings_dict.keys():
|
requirements.txt
CHANGED
@@ -19,7 +19,6 @@ chromadb==0.4.15
|
|
19 |
tiktoken==0.4.0
|
20 |
openai==0.27.7
|
21 |
langchain==0.0.314
|
22 |
-
promptlayer==0.2.4
|
23 |
typing-inspect==0.9.0
|
24 |
typing_extensions==4.8.0
|
25 |
pydantic==2.4.2
|
|
|
19 |
tiktoken==0.4.0
|
20 |
openai==0.27.7
|
21 |
langchain==0.0.314
|
|
|
22 |
typing-inspect==0.9.0
|
23 |
typing_extensions==4.8.0
|
24 |
pydantic==2.4.2
|
streamlit_app.py
CHANGED
@@ -10,7 +10,7 @@ from langchain.llms.huggingface_hub import HuggingFaceHub
|
|
10 |
dotenv.load_dotenv(override=True)
|
11 |
|
12 |
import streamlit as st
|
13 |
-
from langchain.chat_models import
|
14 |
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
|
15 |
|
16 |
from document_qa.document_qa_engine import DocumentQAEngine
|
@@ -52,7 +52,7 @@ if 'uploaded' not in st.session_state:
|
|
52 |
st.session_state['uploaded'] = False
|
53 |
|
54 |
st.set_page_config(
|
55 |
-
page_title="Document Insights
|
56 |
page_icon="π",
|
57 |
initial_sidebar_state="expanded",
|
58 |
menu_items={
|
@@ -70,13 +70,21 @@ def new_file():
|
|
70 |
|
71 |
|
72 |
# @st.cache_resource
|
73 |
-
def init_qa(model):
|
74 |
if model == 'chatgpt-3.5-turbo':
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
elif model == 'mistral-7b-instruct-v0.1':
|
81 |
chat = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1",
|
82 |
model_kwargs={"temperature": 0.01, "max_length": 4096, "max_new_tokens": 2048})
|
@@ -162,12 +170,11 @@ with st.sidebar:
|
|
162 |
st.markdown(
|
163 |
":warning: Mistral is free to use, however requests might hit limits of the huggingface free API and fail. :warning: ")
|
164 |
|
165 |
-
if model == 'mistral-7b-instruct-v0.1'
|
166 |
if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
|
167 |
api_key = st.text_input('Huggingface API Key', type="password")
|
168 |
|
169 |
-
st.markdown(
|
170 |
-
"Get it [here](https://huggingface.co/docs/hub/security-tokens)")
|
171 |
else:
|
172 |
api_key = os.environ['HUGGINGFACEHUB_API_TOKEN']
|
173 |
|
@@ -176,33 +183,33 @@ with st.sidebar:
|
|
176 |
if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']:
|
177 |
with st.spinner("Preparing environment"):
|
178 |
st.session_state['api_keys'][model] = api_key
|
179 |
-
if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
|
180 |
-
|
181 |
st.session_state['rqa'][model] = init_qa(model)
|
182 |
|
183 |
-
elif model == 'chatgpt-3.5-turbo':
|
184 |
if 'OPENAI_API_KEY' not in os.environ:
|
185 |
api_key = st.text_input('OpenAI API Key', type="password")
|
186 |
-
st.markdown(
|
187 |
-
"Get it [here](https://platform.openai.com/account/api-keys)")
|
188 |
else:
|
189 |
api_key = os.environ['OPENAI_API_KEY']
|
190 |
|
191 |
if api_key:
|
192 |
-
# st.session_state['api_key'] = is_api_key_provided = True
|
193 |
if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']:
|
194 |
with st.spinner("Preparing environment"):
|
195 |
st.session_state['api_keys'][model] = api_key
|
196 |
if 'OPENAI_API_KEY' not in os.environ:
|
197 |
-
|
198 |
-
|
|
|
199 |
# else:
|
200 |
# is_api_key_provided = st.session_state['api_key']
|
201 |
|
202 |
-
st.title("π Scientific Document
|
203 |
st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
|
204 |
|
205 |
-
st.markdown(
|
|
|
206 |
|
207 |
uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
|
208 |
disabled=st.session_state['model'] is not None and st.session_state['model'] not in
|
|
|
10 |
dotenv.load_dotenv(override=True)
|
11 |
|
12 |
import streamlit as st
|
13 |
+
from langchain.chat_models import ChatOpenAI
|
14 |
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
|
15 |
|
16 |
from document_qa.document_qa_engine import DocumentQAEngine
|
|
|
52 |
st.session_state['uploaded'] = False
|
53 |
|
54 |
st.set_page_config(
|
55 |
+
page_title="Scientific Document Insights Q/A",
|
56 |
page_icon="π",
|
57 |
initial_sidebar_state="expanded",
|
58 |
menu_items={
|
|
|
70 |
|
71 |
|
72 |
# @st.cache_resource
|
73 |
+
def init_qa(model, api_key=None):
|
74 |
if model == 'chatgpt-3.5-turbo':
|
75 |
+
if api_key:
|
76 |
+
chat = ChatOpenAI(model_name="gpt-3.5-turbo",
|
77 |
+
temperature=0,
|
78 |
+
openai_api_key=api_key,
|
79 |
+
frequency_penalty=0.1)
|
80 |
+
embeddings = OpenAIEmbeddings(openai_api_key=api_key)
|
81 |
+
else:
|
82 |
+
chat = ChatOpenAI(model_name="gpt-3.5-turbo",
|
83 |
+
temperature=0,
|
84 |
+
frequency_penalty=0.1)
|
85 |
+
embeddings = OpenAIEmbeddings()
|
86 |
+
|
87 |
+
|
88 |
elif model == 'mistral-7b-instruct-v0.1':
|
89 |
chat = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1",
|
90 |
model_kwargs={"temperature": 0.01, "max_length": 4096, "max_new_tokens": 2048})
|
|
|
170 |
st.markdown(
|
171 |
":warning: Mistral is free to use, however requests might hit limits of the huggingface free API and fail. :warning: ")
|
172 |
|
173 |
+
if model == 'mistral-7b-instruct-v0.1' and model not in st.session_state['api_keys']:
|
174 |
if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
|
175 |
api_key = st.text_input('Huggingface API Key', type="password")
|
176 |
|
177 |
+
st.markdown("Get it [here](https://huggingface.co/docs/hub/security-tokens)")
|
|
|
178 |
else:
|
179 |
api_key = os.environ['HUGGINGFACEHUB_API_TOKEN']
|
180 |
|
|
|
183 |
if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']:
|
184 |
with st.spinner("Preparing environment"):
|
185 |
st.session_state['api_keys'][model] = api_key
|
186 |
+
# if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
|
187 |
+
# os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
|
188 |
st.session_state['rqa'][model] = init_qa(model)
|
189 |
|
190 |
+
elif model == 'chatgpt-3.5-turbo' and model not in st.session_state['api_keys']:
|
191 |
if 'OPENAI_API_KEY' not in os.environ:
|
192 |
api_key = st.text_input('OpenAI API Key', type="password")
|
193 |
+
st.markdown("Get it [here](https://platform.openai.com/account/api-keys)")
|
|
|
194 |
else:
|
195 |
api_key = os.environ['OPENAI_API_KEY']
|
196 |
|
197 |
if api_key:
|
|
|
198 |
if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']:
|
199 |
with st.spinner("Preparing environment"):
|
200 |
st.session_state['api_keys'][model] = api_key
|
201 |
if 'OPENAI_API_KEY' not in os.environ:
|
202 |
+
st.session_state['rqa'][model] = init_qa(model, api_key)
|
203 |
+
else:
|
204 |
+
st.session_state['rqa'][model] = init_qa(model)
|
205 |
# else:
|
206 |
# is_api_key_provided = st.session_state['api_key']
|
207 |
|
208 |
+
st.title("π Scientific Document Insights Q/A")
|
209 |
st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
|
210 |
|
211 |
+
st.markdown(
|
212 |
+
":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.")
|
213 |
|
214 |
uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
|
215 |
disabled=st.session_state['model'] is not None and st.session_state['model'] not in
|