Spaces:
Running
Running
luanpoppe
commited on
Commit
·
0870c96
1
Parent(s):
e79797a
fix: manutenção indevida dos Documents dos PDFs enviados anteriormente
Browse files- compose.yaml +1 -1
- endpoint_teste/views.py +2 -0
- langchain_backend/main.py +7 -2
- langchain_backend/utils.py +24 -4
compose.yaml
CHANGED
@@ -11,7 +11,7 @@ services:
|
|
11 |
- .env
|
12 |
develop:
|
13 |
watch:
|
14 |
-
- action: sync
|
15 |
path: ./
|
16 |
target: /app
|
17 |
ignore:
|
|
|
11 |
- .env
|
12 |
develop:
|
13 |
watch:
|
14 |
+
- action: sync+restart
|
15 |
path: ./
|
16 |
target: /app
|
17 |
ignore:
|
endpoint_teste/views.py
CHANGED
@@ -59,6 +59,8 @@ def getTeste(request):
|
|
59 |
@api_view(["POST"])
|
60 |
def getPDF(request):
|
61 |
if request.method == "POST":
|
|
|
|
|
62 |
serializer = PDFUploadSerializer(data=request.data)
|
63 |
if serializer.is_valid(raise_exception=True):
|
64 |
# Access the uploaded file
|
|
|
59 |
@api_view(["POST"])
|
60 |
def getPDF(request):
|
61 |
if request.method == "POST":
|
62 |
+
print('\n\n\n')
|
63 |
+
print("CHEGOU AQUI")
|
64 |
serializer = PDFUploadSerializer(data=request.data)
|
65 |
if serializer.is_valid(raise_exception=True):
|
66 |
# Access the uploaded file
|
langchain_backend/main.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
import os
|
2 |
-
from langchain_backend.utils import create_prompt_llm_chain, create_retriever, getPDF
|
|
|
3 |
from langchain.chains import create_retrieval_chain
|
4 |
|
5 |
os.environ.get("OPENAI_API_KEY")
|
6 |
|
7 |
def get_llm_answer(system_prompt, user_prompt, pdf_url, model):
|
8 |
print('model: ', model)
|
9 |
-
pages =
|
10 |
if pdf_url:
|
11 |
pages = getPDF(pdf_url)
|
12 |
else:
|
@@ -19,4 +20,8 @@ def get_llm_answer(system_prompt, user_prompt, pdf_url, model):
|
|
19 |
# else:
|
20 |
# rag_chain = create_retrieval_chain(retriever, create_prompt_llm_chain(system_prompt))
|
21 |
results = rag_chain.invoke({"input": user_prompt})
|
|
|
|
|
|
|
|
|
22 |
return results
|
|
|
1 |
import os
|
2 |
+
from langchain_backend.utils import create_prompt_llm_chain, create_retriever, getPDF, vectorstore
|
3 |
+
from langchain_backend import utils
|
4 |
from langchain.chains import create_retrieval_chain
|
5 |
|
6 |
os.environ.get("OPENAI_API_KEY")
|
7 |
|
8 |
def get_llm_answer(system_prompt, user_prompt, pdf_url, model):
|
9 |
print('model: ', model)
|
10 |
+
pages = []
|
11 |
if pdf_url:
|
12 |
pages = getPDF(pdf_url)
|
13 |
else:
|
|
|
20 |
# else:
|
21 |
# rag_chain = create_retrieval_chain(retriever, create_prompt_llm_chain(system_prompt))
|
22 |
results = rag_chain.invoke({"input": user_prompt})
|
23 |
+
print('allIds ARQUIVO MAIN: ', utils.allIds)
|
24 |
+
vectorstore.delete( utils.allIds)
|
25 |
+
utils.allIds = []
|
26 |
+
print('utils.allIds: ', utils.allIds)
|
27 |
return results
|
langchain_backend/utils.py
CHANGED
@@ -8,21 +8,41 @@ from langchain.chains.combine_documents import create_stuff_documents_chain
|
|
8 |
from langchain_core.prompts import ChatPromptTemplate
|
9 |
from langchain_huggingface import HuggingFaceEndpoint
|
10 |
from setup.environment import default_model
|
|
|
11 |
|
12 |
os.environ.get("OPENAI_API_KEY")
|
13 |
os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def getPDF(file_path):
|
|
|
16 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
17 |
loader = PyPDFLoader(file_path, extract_images=False)
|
18 |
pages = loader.load_and_split(text_splitter)
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
return pages
|
20 |
|
21 |
def create_retriever(documents):
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
retriever = vectorstore.as_retriever(
|
28 |
# search_type="similarity",
|
|
|
8 |
from langchain_core.prompts import ChatPromptTemplate
|
9 |
from langchain_huggingface import HuggingFaceEndpoint
|
10 |
from setup.environment import default_model
|
11 |
+
from uuid import uuid4
|
12 |
|
13 |
os.environ.get("OPENAI_API_KEY")
|
14 |
os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
15 |
|
16 |
+
vectorstore = Chroma(
|
17 |
+
collection_name="documents",
|
18 |
+
embedding_function=OpenAIEmbeddings()
|
19 |
+
)
|
20 |
+
allIds = []
|
21 |
+
|
22 |
def getPDF(file_path):
|
23 |
+
documentId = 0
|
24 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
25 |
loader = PyPDFLoader(file_path, extract_images=False)
|
26 |
pages = loader.load_and_split(text_splitter)
|
27 |
+
for page in pages:
|
28 |
+
print('\n\n\n')
|
29 |
+
print('allIds: ', allIds)
|
30 |
+
documentId = str(uuid4())
|
31 |
+
allIds.append(documentId)
|
32 |
+
page.id = documentId
|
33 |
return pages
|
34 |
|
35 |
def create_retriever(documents):
|
36 |
+
print('\n\n')
|
37 |
+
print('documents: ', documents)
|
38 |
+
|
39 |
+
# vectorstore = Chroma.from_documents(
|
40 |
+
# documents,
|
41 |
+
# embedding=OpenAIEmbeddings(),
|
42 |
+
# )
|
43 |
+
# vectorstore.delete_collection()
|
44 |
+
|
45 |
+
vectorstore.add_documents(documents=documents)
|
46 |
|
47 |
retriever = vectorstore.as_retriever(
|
48 |
# search_type="similarity",
|