Spaces:
Runtime error
Runtime error
Commit
·
42cf399
1
Parent(s):
b15e4ae
Update app.py
Browse files
app.py
CHANGED
|
@@ -96,32 +96,32 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
|
|
| 96 |
#vectordb = Chroma.from_documents(split_pages, embeddings, persist_directory=persist_directory)
|
| 97 |
#vectordb.persist()
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
|
| 102 |
-
|
| 103 |
|
| 104 |
# add all file in the list to the merger object
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
|
| 126 |
|
| 127 |
#with open("foo.pkl", 'wb') as f:
|
|
@@ -161,10 +161,10 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
|
|
| 161 |
|
| 162 |
# completion = completion({"question": query, "chat_history": history[-context_length*2:]})
|
| 163 |
|
| 164 |
-
with open("foo.pkl", 'rb') as f:
|
| 165 |
-
|
| 166 |
|
| 167 |
-
docsearch = FAISS.from_texts(texts,
|
| 168 |
query = str(system_prompt + history[-context_length*2:] + [prompt_msg])
|
| 169 |
docs = docsearch.similarity_search(query)
|
| 170 |
#print(docs[0].page_content)
|
|
|
|
| 96 |
#vectordb = Chroma.from_documents(split_pages, embeddings, persist_directory=persist_directory)
|
| 97 |
#vectordb.persist()
|
| 98 |
|
| 99 |
+
path = './files'
|
| 100 |
+
pdf_files = glob.glob(os.path.join(path, "*.pdf"))
|
| 101 |
|
| 102 |
+
merger = PdfWriter()
|
| 103 |
|
| 104 |
# add all file in the list to the merger object
|
| 105 |
+
for pdf in pdf_files:
|
| 106 |
+
merger.append(pdf)
|
| 107 |
+
merger.write("merged-pdf.pdf")
|
| 108 |
+
merger.close()
|
| 109 |
|
| 110 |
+
reader = PdfReader("merged-pdf.pdf")
|
| 111 |
+
raw_text = ''
|
| 112 |
+
for i, page in enumerate(reader.pages):
|
| 113 |
+
text = page.extract_text()
|
| 114 |
+
if text:
|
| 115 |
+
raw_text += text
|
| 116 |
+
text_splitter = CharacterTextSplitter(
|
| 117 |
+
separator = "\n",
|
| 118 |
+
chunk_size = 1000,
|
| 119 |
+
chunk_overlap = 200,
|
| 120 |
+
length_function = len,
|
| 121 |
+
)
|
| 122 |
+
texts = text_splitter.split_text(raw_text)
|
| 123 |
+
len(texts)
|
| 124 |
+
embeddings = OpenAIEmbeddings()
|
| 125 |
|
| 126 |
|
| 127 |
#with open("foo.pkl", 'wb') as f:
|
|
|
|
| 161 |
|
| 162 |
# completion = completion({"question": query, "chat_history": history[-context_length*2:]})
|
| 163 |
|
| 164 |
+
#with open("foo.pkl", 'rb') as f:
|
| 165 |
+
# new_docsearch = pickle.load(f)
|
| 166 |
|
| 167 |
+
docsearch = FAISS.from_texts(texts, embeddings)
|
| 168 |
query = str(system_prompt + history[-context_length*2:] + [prompt_msg])
|
| 169 |
docs = docsearch.similarity_search(query)
|
| 170 |
#print(docs[0].page_content)
|