Spaces:
Runtime error
Runtime error
Commit
·
b160a8b
1
Parent(s):
7e972ea
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,15 +7,32 @@ import openai
|
|
| 7 |
import gradio as gr
|
| 8 |
import os
|
| 9 |
from sklearn.neighbors import NearestNeighbors
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
def download_pdf(url, output_path):
|
| 12 |
urllib.request.urlretrieve(url, output_path)
|
| 13 |
|
|
|
|
| 14 |
def preprocess(text):
|
| 15 |
text = text.replace('\n', ' ')
|
| 16 |
text = re.sub('\s+', ' ', text)
|
| 17 |
return text
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
def pdf_to_text(path, start_page=1, end_page=None):
|
| 20 |
doc = fitz.open(path)
|
| 21 |
total_pages = doc.page_count
|
|
@@ -110,9 +127,6 @@ def load_recommender(paths, start_page=1):
|
|
| 110 |
recommender.fit(chunks)
|
| 111 |
return 'Corpus Loaded.'
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
def generate_text(openAI_key, prompt, engine="text-davinci-003"):
|
| 117 |
openai.api_key = openAI_key
|
| 118 |
completions = openai.Completion.create(
|
|
@@ -174,7 +188,16 @@ def question_answer(url, files, question, openAI_key):
|
|
| 174 |
if question.strip() == '':
|
| 175 |
return '[ERROR]: Question field is empty'
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
load_recommender(pdf_paths)
|
|
|
|
|
|
|
| 178 |
return generate_answer(question, openAI_key)
|
| 179 |
|
| 180 |
recommender = SemanticSearch()
|
|
|
|
| 7 |
import gradio as gr
|
| 8 |
import os
|
| 9 |
from sklearn.neighbors import NearestNeighbors
|
| 10 |
+
from jina import Document, DocumentArray
|
| 11 |
+
|
| 12 |
+
# Create a new DocumentArray for file storage
|
| 13 |
+
doc_array = DocumentArray()
|
| 14 |
|
| 15 |
def download_pdf(url, output_path):
|
| 16 |
urllib.request.urlretrieve(url, output_path)
|
| 17 |
|
| 18 |
+
|
| 19 |
def preprocess(text):
|
| 20 |
text = text.replace('\n', ' ')
|
| 21 |
text = re.sub('\s+', ' ', text)
|
| 22 |
return text
|
| 23 |
|
| 24 |
+
# Store a file in the DocumentArray
|
| 25 |
+
def store_file_in_docarray(file_name, file_content):
|
| 26 |
+
doc = Document(id=file_name, content=file_content)
|
| 27 |
+
doc_array.append(doc)
|
| 28 |
+
|
| 29 |
+
# Retrieve a file from the DocumentArray
|
| 30 |
+
def get_file_from_docarray(file_name):
|
| 31 |
+
for doc in doc_array:
|
| 32 |
+
if doc.id == file_name:
|
| 33 |
+
return doc.content
|
| 34 |
+
return None
|
| 35 |
+
|
| 36 |
def pdf_to_text(path, start_page=1, end_page=None):
|
| 37 |
doc = fitz.open(path)
|
| 38 |
total_pages = doc.page_count
|
|
|
|
| 127 |
recommender.fit(chunks)
|
| 128 |
return 'Corpus Loaded.'
|
| 129 |
|
|
|
|
|
|
|
|
|
|
| 130 |
def generate_text(openAI_key, prompt, engine="text-davinci-003"):
|
| 131 |
openai.api_key = openAI_key
|
| 132 |
completions = openai.Completion.create(
|
|
|
|
| 188 |
if question.strip() == '':
|
| 189 |
return '[ERROR]: Question field is empty'
|
| 190 |
|
| 191 |
+
# Store the PDF content in the DocumentArray
|
| 192 |
+
for pdf_path in pdf_paths:
|
| 193 |
+
with open(pdf_path, "rb") as f:
|
| 194 |
+
content = f.read()
|
| 195 |
+
store_file_in_docarray(pdf_path, content)
|
| 196 |
+
|
| 197 |
+
# Load the recommender
|
| 198 |
load_recommender(pdf_paths)
|
| 199 |
+
|
| 200 |
+
# Generate an answer
|
| 201 |
return generate_answer(question, openAI_key)
|
| 202 |
|
| 203 |
recommender = SemanticSearch()
|