Spaces:
Sleeping
Sleeping
added privacy statement, minor cosmetics on the key information, NER written without abbreviation
Browse files- streamlit_app.py +27 -15
streamlit_app.py
CHANGED
|
@@ -48,11 +48,13 @@ if "messages" not in st.session_state:
|
|
| 48 |
if 'ner_processing' not in st.session_state:
|
| 49 |
st.session_state['ner_processing'] = False
|
| 50 |
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def new_file():
|
| 53 |
st.session_state['loaded_embeddings'] = None
|
| 54 |
st.session_state['doc_id'] = None
|
| 55 |
-
|
| 56 |
|
| 57 |
# @st.cache_resource
|
| 58 |
def init_qa(model):
|
|
@@ -128,11 +130,15 @@ def play_old_messages():
|
|
| 128 |
else:
|
| 129 |
st.write(message['content'])
|
| 130 |
|
|
|
|
| 131 |
# is_api_key_provided = st.session_state['api_key']
|
| 132 |
|
| 133 |
with st.sidebar:
|
|
|
|
|
|
|
|
|
|
| 134 |
st.session_state['model'] = model = st.radio(
|
| 135 |
-
"Model
|
| 136 |
("chatgpt-3.5-turbo", "mistral-7b-instruct-v0.1"), # , "llama-2-70b-chat"),
|
| 137 |
index=1,
|
| 138 |
captions=[
|
|
@@ -140,15 +146,17 @@ with st.sidebar:
|
|
| 140 |
"Mistral-7B-Instruct-V0.1 + Sentence BERT (embeddings)"
|
| 141 |
# "LLama2-70B-Chat + Sentence BERT (embeddings)",
|
| 142 |
],
|
| 143 |
-
help="Select the model you want to use.",
|
| 144 |
-
disabled=st.session_state['doc_id'] is not None)
|
| 145 |
|
| 146 |
if model == 'mistral-7b-instruct-v0.1' or model == 'llama-2-70b-chat':
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
|
|
|
|
|
|
| 152 |
|
| 153 |
if api_key:
|
| 154 |
# st.session_state['api_key'] = is_api_key_provided = True
|
|
@@ -159,10 +167,13 @@ with st.sidebar:
|
|
| 159 |
st.session_state['rqa'][model] = init_qa(model)
|
| 160 |
|
| 161 |
elif model == 'chatgpt-3.5-turbo':
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
| 166 |
if api_key:
|
| 167 |
# st.session_state['api_key'] = is_api_key_provided = True
|
| 168 |
with st.spinner("Preparing environment"):
|
|
@@ -177,7 +188,8 @@ st.title("📝 Scientific Document Insight Q&A")
|
|
| 177 |
st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
|
| 178 |
|
| 179 |
uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
|
| 180 |
-
disabled=st.session_state['model'] is not None and st.session_state['model'] not in
|
|
|
|
| 181 |
help="The full-text is extracted using Grobid. ")
|
| 182 |
|
| 183 |
question = st.chat_input(
|
|
@@ -198,7 +210,7 @@ with st.sidebar:
|
|
| 198 |
help="Number of chunks to consider when answering a question",
|
| 199 |
disabled=not uploaded_file)
|
| 200 |
|
| 201 |
-
st.session_state['ner_processing'] = st.checkbox("NER processing on LLM response")
|
| 202 |
st.markdown(
|
| 203 |
'**NER on LLM responses**: The responses from the LLMs are post-processed to extract <span style="color:orange">physical quantities, measurements</span> and <span style="color:green">materials</span> mentions.',
|
| 204 |
unsafe_allow_html=True)
|
|
|
|
| 48 |
if 'ner_processing' not in st.session_state:
|
| 49 |
st.session_state['ner_processing'] = False
|
| 50 |
|
| 51 |
+
if 'uploaded' not in st.session_state:
|
| 52 |
+
st.session_state['uploaded'] = False
|
| 53 |
|
| 54 |
def new_file():
|
| 55 |
st.session_state['loaded_embeddings'] = None
|
| 56 |
st.session_state['doc_id'] = None
|
| 57 |
+
st.session_state['uploaded'] = True
|
| 58 |
|
| 59 |
# @st.cache_resource
|
| 60 |
def init_qa(model):
|
|
|
|
| 130 |
else:
|
| 131 |
st.write(message['content'])
|
| 132 |
|
| 133 |
+
|
| 134 |
# is_api_key_provided = st.session_state['api_key']
|
| 135 |
|
| 136 |
with st.sidebar:
|
| 137 |
+
st.markdown(
|
| 138 |
+
":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.")
|
| 139 |
+
|
| 140 |
st.session_state['model'] = model = st.radio(
|
| 141 |
+
"Model",
|
| 142 |
("chatgpt-3.5-turbo", "mistral-7b-instruct-v0.1"), # , "llama-2-70b-chat"),
|
| 143 |
index=1,
|
| 144 |
captions=[
|
|
|
|
| 146 |
"Mistral-7B-Instruct-V0.1 + Sentence BERT (embeddings)"
|
| 147 |
# "LLama2-70B-Chat + Sentence BERT (embeddings)",
|
| 148 |
],
|
| 149 |
+
help="Select the LLM model and embeddings you want to use.",
|
| 150 |
+
disabled=st.session_state['doc_id'] is not None or st.session_state['uploaded'])
|
| 151 |
|
| 152 |
if model == 'mistral-7b-instruct-v0.1' or model == 'llama-2-70b-chat':
|
| 153 |
+
if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
|
| 154 |
+
api_key = st.text_input('Huggingface API Key', type="password")
|
| 155 |
+
|
| 156 |
+
st.markdown(
|
| 157 |
+
"Get it for [Open AI](https://platform.openai.com/account/api-keys) or [Huggingface](https://huggingface.co/docs/hub/security-tokens)")
|
| 158 |
+
else:
|
| 159 |
+
api_key = os.environ['HUGGINGFACEHUB_API_TOKEN']
|
| 160 |
|
| 161 |
if api_key:
|
| 162 |
# st.session_state['api_key'] = is_api_key_provided = True
|
|
|
|
| 167 |
st.session_state['rqa'][model] = init_qa(model)
|
| 168 |
|
| 169 |
elif model == 'chatgpt-3.5-turbo':
|
| 170 |
+
if 'OPENAI_API_KEY' not in os.environ:
|
| 171 |
+
api_key = st.text_input('OpenAI API Key', type="password")
|
| 172 |
+
st.markdown(
|
| 173 |
+
"Get it for [Open AI](https://platform.openai.com/account/api-keys) or [Huggingface](https://huggingface.co/docs/hub/security-tokens)")
|
| 174 |
+
else:
|
| 175 |
+
api_key = os.environ['OPENAI_API_KEY']
|
| 176 |
+
|
| 177 |
if api_key:
|
| 178 |
# st.session_state['api_key'] = is_api_key_provided = True
|
| 179 |
with st.spinner("Preparing environment"):
|
|
|
|
| 188 |
st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
|
| 189 |
|
| 190 |
uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
|
| 191 |
+
disabled=st.session_state['model'] is not None and st.session_state['model'] not in
|
| 192 |
+
st.session_state['api_keys'],
|
| 193 |
help="The full-text is extracted using Grobid. ")
|
| 194 |
|
| 195 |
question = st.chat_input(
|
|
|
|
| 210 |
help="Number of chunks to consider when answering a question",
|
| 211 |
disabled=not uploaded_file)
|
| 212 |
|
| 213 |
+
st.session_state['ner_processing'] = st.checkbox("Named Entities Recognition (NER) processing on LLM response")
|
| 214 |
st.markdown(
|
| 215 |
'**NER on LLM responses**: The responses from the LLMs are post-processed to extract <span style="color:orange">physical quantities, measurements</span> and <span style="color:green">materials</span> mentions.',
|
| 216 |
unsafe_allow_html=True)
|