lfoppiano commited on
Commit
25f0178
1 Parent(s): 924fb11

fix pdf viewer when asking questions

Browse files
Files changed (1) hide show
  1. streamlit_app.py +17 -9
streamlit_app.py CHANGED
@@ -52,6 +52,9 @@ if 'ner_processing' not in st.session_state:
52
  if 'uploaded' not in st.session_state:
53
  st.session_state['uploaded'] = False
54
 
 
 
 
55
  st.set_page_config(
56
  page_title="Scientific Document Insights Q/A",
57
  page_icon="📝",
@@ -265,28 +268,33 @@ with st.sidebar:
265
  @st.cache_resource
266
  def get_pdf_display(binary):
267
  base64_pdf = base64.b64encode(binary).decode('utf-8')
268
- return F'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="950" type="application/pdf"></embed>'
269
 
270
 
271
  if uploaded_file and not st.session_state.loaded_embeddings:
272
  if model not in st.session_state['api_keys']:
273
  st.error("Before uploading a document, you must enter the API key. ")
274
  st.stop()
275
- with st.spinner('Reading file, calling Grobid, and creating memory embeddings...'):
276
- binary = uploaded_file.getvalue()
277
- tmp_file = NamedTemporaryFile()
278
- tmp_file.write(bytearray(binary))
279
 
280
- left_column.markdown(get_pdf_display(binary), unsafe_allow_html=True)
 
 
 
 
 
281
 
282
- st.session_state['doc_id'] = hash = st.session_state['rqa'][model].create_memory_embeddings(tmp_file.name,
283
  chunk_size=chunk_size,
284
  perc_overlap=0.1)
285
- st.session_state['loaded_embeddings'] = True
286
- st.session_state.messages = []
287
 
288
  # timestamp = datetime.utcnow()
289
 
 
 
 
 
290
  with right_column:
291
  if st.session_state.loaded_embeddings and question and len(question) > 0 and st.session_state.doc_id:
292
  for message in st.session_state.messages:
 
52
  if 'uploaded' not in st.session_state:
53
  st.session_state['uploaded'] = False
54
 
55
+ if 'binary' not in st.session_state:
56
+ st.session_state['binary'] = None
57
+
58
  st.set_page_config(
59
  page_title="Scientific Document Insights Q/A",
60
  page_icon="📝",
 
268
  @st.cache_resource
269
  def get_pdf_display(binary):
270
  base64_pdf = base64.b64encode(binary).decode('utf-8')
271
+ return F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="600" height="800" type="application/pdf"></embed>'
272
 
273
 
274
  if uploaded_file and not st.session_state.loaded_embeddings:
275
  if model not in st.session_state['api_keys']:
276
  st.error("Before uploading a document, you must enter the API key. ")
277
  st.stop()
 
 
 
 
278
 
279
+ with right_column:
280
+ with st.spinner('Reading file, calling Grobid, and creating memory embeddings...'):
281
+ binary = uploaded_file.getvalue()
282
+ tmp_file = NamedTemporaryFile()
283
+ tmp_file.write(bytearray(binary))
284
+ st.session_state['binary'] = binary
285
 
286
+ st.session_state['doc_id'] = hash = st.session_state['rqa'][model].create_memory_embeddings(tmp_file.name,
287
  chunk_size=chunk_size,
288
  perc_overlap=0.1)
289
+ st.session_state['loaded_embeddings'] = True
290
+ st.session_state.messages = []
291
 
292
  # timestamp = datetime.utcnow()
293
 
294
+ with left_column:
295
+ if st.session_state['binary']:
296
+ left_column.markdown(get_pdf_display(st.session_state['binary']), unsafe_allow_html=True)
297
+
298
  with right_column:
299
  if st.session_state.loaded_embeddings and question and len(question) > 0 and st.session_state.doc_id:
300
  for message in st.session_state.messages: