albhu commited on
Commit
95fca27
·
verified ·
1 Parent(s): 90d214f
Files changed (1) hide show
  1. app.py +126 -21
app.py CHANGED
@@ -1,29 +1,134 @@
 
1
  import streamlit as st
2
  import pandas as pd
3
- import time
4
- import search # This imports the modified search module
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def main():
7
- st.title("Enhanced Document Query Interface")
8
-
9
- uploaded_files = st.file_uploader("Upload documents", type=['pdf', 'docx'], accept_multiple_files=True)
10
- question = st.text_input("What's your question?")
11
-
12
- if st.button("Answer"):
13
- if uploaded_files and question:
14
- document_texts = []
15
- for uploaded_file in uploaded_files:
16
- if uploaded_file.type == "application/pdf":
17
- paragraphs = search.read_pdf_pdfminer(uploaded_file)
18
- else: # assuming docx
19
- paragraphs = search.read_docx(uploaded_file)
20
- document_texts.extend([para.content for para in paragraphs])
21
-
22
- documents_df = pd.DataFrame(document_texts, columns=['content'])
23
- answer = search.answer_question(question, documents_df)
 
 
 
 
 
 
 
 
 
 
 
 
24
  st.write(answer)
25
- else:
26
- st.write("Please upload at least one document and enter a question.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  if __name__ == "__main__":
29
  main()
 
1
+ import time
2
  import streamlit as st
3
  import pandas as pd
4
+ import os
5
+ from dotenv import load_dotenv
6
+ import search # Import the search module
7
+ from reportlab.lib.pagesizes import letter
8
+ from reportlab.pdfgen import canvas
9
+ from docx import Document
10
+
11
+ load_dotenv()
12
+
13
+ st.set_page_config(
14
+ page_title="DocGPT GT",
15
+ page_icon="speech_balloon",
16
+ layout="wide",
17
+ )
18
+
19
+ hide_streamlit_style = """
20
+ <style>
21
+ #MainMenu {visibility: hidden;}
22
+ footer {visibility: hidden;}
23
+ footer:after {
24
+ content:'2023';
25
+ visibility: visible;
26
+ display: block;
27
+ position: relative;
28
+ padding: 5px;
29
+ top: 2px;
30
+ }
31
+ </style>
32
+ """
33
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
34
+
35
+ def save_as_pdf(conversation):
36
+ pdf_filename = "conversation.pdf"
37
+ c = canvas.Canvas(pdf_filename, pagesize=letter)
38
+
39
+ c.drawString(100, 750, "Conversation:")
40
+ y_position = 730
41
+ for q, a in conversation:
42
+ c.drawString(120, y_position, f"Q: {q}")
43
+ c.drawString(120, y_position - 20, f"A: {a}")
44
+ y_position -= 40
45
+
46
+ c.save()
47
+
48
+ st.markdown(f"Download [PDF](./{pdf_filename})")
49
+
50
+ def save_as_docx(conversation):
51
+ doc = Document()
52
+ doc.add_heading('Conversation', 0)
53
+
54
+ for q, a in conversation:
55
+ doc.add_paragraph(f'Q: {q}')
56
+ doc.add_paragraph(f'A: {a}')
57
+
58
+ doc_filename = "conversation.docx"
59
+ doc.save(doc_filename)
60
+
61
+ st.markdown(f"Download [DOCX](./{doc_filename})")
62
+
63
+ def save_as_xlsx(conversation):
64
+ df = pd.DataFrame(conversation, columns=["Question", "Answer"])
65
+ xlsx_filename = "conversation.xlsx"
66
+ df.to_excel(xlsx_filename, index=False)
67
+
68
+ st.markdown(f"Download [XLSX](./{xlsx_filename})")
69
+
70
+ def save_as_txt(conversation):
71
+ txt_filename = "conversation.txt"
72
+ with open(txt_filename, "w") as txt_file:
73
+ for q, a in conversation:
74
+ txt_file.write(f"Q: {q}\nA: {a}\n\n")
75
+
76
+ st.markdown(f"Download [TXT](./{txt_filename})")
77
 
78
  def main():
79
+ st.markdown('<h1>Ask anything from Legal Texts</h1><p style="font-size: 12; color: gray;"></p>', unsafe_allow_html=True)
80
+ st.markdown("<h2>Upload documents</h2>", unsafe_allow_html=True)
81
+ uploaded_files = st.file_uploader("Upload one or more documents", type=['pdf', 'docx'], accept_multiple_files=True)
82
+ question = st.text_input("Ask a question based on the documents", key="question_input")
83
+
84
+ progress = st.progress(0)
85
+ for i in range(100):
86
+ progress.progress(i + 1)
87
+ time.sleep(0.01)
88
+
89
+ if uploaded_files:
90
+ df = pd.DataFrame(columns=["page_num", "paragraph_num", "content", "tokens"])
91
+ for uploaded_file in uploaded_files:
92
+ paragraphs = search.read_pdf_pdfminer(uploaded_file) if uploaded_file.type == "application/pdf" else search.read_docx(uploaded_file)
93
+ temp_df = pd.DataFrame(
94
+ [(p.page_num, p.paragraph_num, p.content, search.count_tokens(p.content))
95
+ for p in paragraphs],
96
+ columns=["page_num", "paragraph_num", "content", "tokens"]
97
+ )
98
+ df = pd.concat([df, temp_df], ignore_index=True)
99
+
100
+ if "interactions" not in st.session_state:
101
+ st.session_state["interactions"] = []
102
+
103
+ answer = ""
104
+ if question != st.session_state.get("last_question", ""):
105
+ st.text("Searching...")
106
+ answer = search.answer_query_with_context(question, df)
107
+ st.session_state["interactions"].append((question, answer))
108
  st.write(answer)
109
+
110
+ st.markdown("### Interaction History")
111
+ for q, a in st.session_state["interactions"]:
112
+ st.write(f"**Q:** {q}\n\n**A:** {a}")
113
+
114
+ st.session_state["last_question"] = question
115
+
116
+ st.markdown("<h2>Sample paragraphs</h2>", unsafe_allow_html=True)
117
+ sample_size = min(len(df), 5)
118
+ st.dataframe(df.sample(n=sample_size))
119
+
120
+ if st.button("Save as PDF"):
121
+ save_as_pdf(st.session_state["interactions"])
122
+ if st.button("Save as DOCX"):
123
+ save_as_docx(st.session_state["interactions"])
124
+ if st.button("Save as XLSX"):
125
+ save_as_xlsx(st.session_state["interactions"])
126
+ if st.button("Save as TXT"):
127
+ save_as_txt(st.session_state["interactions"])
128
+
129
+
130
+ else:
131
+ st.markdown("<h2>Please upload a document to proceed.</h2>", unsafe_allow_html=True)
132
 
133
  if __name__ == "__main__":
134
  main()