Spaces:
Running
Running
luanpoppe
commited on
Commit
·
0870c96
1
Parent(s):
e79797a
fix: manutenção indevida dos Documents dos PDFs enviados anteriormente
Browse files- compose.yaml +1 -1
- endpoint_teste/views.py +2 -0
- langchain_backend/main.py +7 -2
- langchain_backend/utils.py +24 -4
compose.yaml
CHANGED
|
@@ -11,7 +11,7 @@ services:
|
|
| 11 |
- .env
|
| 12 |
develop:
|
| 13 |
watch:
|
| 14 |
-
- action: sync
|
| 15 |
path: ./
|
| 16 |
target: /app
|
| 17 |
ignore:
|
|
|
|
| 11 |
- .env
|
| 12 |
develop:
|
| 13 |
watch:
|
| 14 |
+
- action: sync+restart
|
| 15 |
path: ./
|
| 16 |
target: /app
|
| 17 |
ignore:
|
endpoint_teste/views.py
CHANGED
|
@@ -59,6 +59,8 @@ def getTeste(request):
|
|
| 59 |
@api_view(["POST"])
|
| 60 |
def getPDF(request):
|
| 61 |
if request.method == "POST":
|
|
|
|
|
|
|
| 62 |
serializer = PDFUploadSerializer(data=request.data)
|
| 63 |
if serializer.is_valid(raise_exception=True):
|
| 64 |
# Access the uploaded file
|
|
|
|
| 59 |
@api_view(["POST"])
|
| 60 |
def getPDF(request):
|
| 61 |
if request.method == "POST":
|
| 62 |
+
print('\n\n\n')
|
| 63 |
+
print("CHEGOU AQUI")
|
| 64 |
serializer = PDFUploadSerializer(data=request.data)
|
| 65 |
if serializer.is_valid(raise_exception=True):
|
| 66 |
# Access the uploaded file
|
langchain_backend/main.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
import os
|
| 2 |
-
from langchain_backend.utils import create_prompt_llm_chain, create_retriever, getPDF
|
|
|
|
| 3 |
from langchain.chains import create_retrieval_chain
|
| 4 |
|
| 5 |
os.environ.get("OPENAI_API_KEY")
|
| 6 |
|
| 7 |
def get_llm_answer(system_prompt, user_prompt, pdf_url, model):
|
| 8 |
print('model: ', model)
|
| 9 |
-
pages =
|
| 10 |
if pdf_url:
|
| 11 |
pages = getPDF(pdf_url)
|
| 12 |
else:
|
|
@@ -19,4 +20,8 @@ def get_llm_answer(system_prompt, user_prompt, pdf_url, model):
|
|
| 19 |
# else:
|
| 20 |
# rag_chain = create_retrieval_chain(retriever, create_prompt_llm_chain(system_prompt))
|
| 21 |
results = rag_chain.invoke({"input": user_prompt})
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
return results
|
|
|
|
| 1 |
import os
|
| 2 |
+
from langchain_backend.utils import create_prompt_llm_chain, create_retriever, getPDF, vectorstore
|
| 3 |
+
from langchain_backend import utils
|
| 4 |
from langchain.chains import create_retrieval_chain
|
| 5 |
|
| 6 |
os.environ.get("OPENAI_API_KEY")
|
| 7 |
|
| 8 |
def get_llm_answer(system_prompt, user_prompt, pdf_url, model):
|
| 9 |
print('model: ', model)
|
| 10 |
+
pages = []
|
| 11 |
if pdf_url:
|
| 12 |
pages = getPDF(pdf_url)
|
| 13 |
else:
|
|
|
|
| 20 |
# else:
|
| 21 |
# rag_chain = create_retrieval_chain(retriever, create_prompt_llm_chain(system_prompt))
|
| 22 |
results = rag_chain.invoke({"input": user_prompt})
|
| 23 |
+
print('allIds ARQUIVO MAIN: ', utils.allIds)
|
| 24 |
+
vectorstore.delete( utils.allIds)
|
| 25 |
+
utils.allIds = []
|
| 26 |
+
print('utils.allIds: ', utils.allIds)
|
| 27 |
return results
|
langchain_backend/utils.py
CHANGED
|
@@ -8,21 +8,41 @@ from langchain.chains.combine_documents import create_stuff_documents_chain
|
|
| 8 |
from langchain_core.prompts import ChatPromptTemplate
|
| 9 |
from langchain_huggingface import HuggingFaceEndpoint
|
| 10 |
from setup.environment import default_model
|
|
|
|
| 11 |
|
| 12 |
os.environ.get("OPENAI_API_KEY")
|
| 13 |
os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def getPDF(file_path):
|
|
|
|
| 16 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 17 |
loader = PyPDFLoader(file_path, extract_images=False)
|
| 18 |
pages = loader.load_and_split(text_splitter)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
return pages
|
| 20 |
|
| 21 |
def create_retriever(documents):
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
retriever = vectorstore.as_retriever(
|
| 28 |
# search_type="similarity",
|
|
|
|
| 8 |
from langchain_core.prompts import ChatPromptTemplate
|
| 9 |
from langchain_huggingface import HuggingFaceEndpoint
|
| 10 |
from setup.environment import default_model
|
| 11 |
+
from uuid import uuid4
|
| 12 |
|
| 13 |
os.environ.get("OPENAI_API_KEY")
|
| 14 |
os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
| 15 |
|
| 16 |
+
vectorstore = Chroma(
|
| 17 |
+
collection_name="documents",
|
| 18 |
+
embedding_function=OpenAIEmbeddings()
|
| 19 |
+
)
|
| 20 |
+
allIds = []
|
| 21 |
+
|
| 22 |
def getPDF(file_path):
|
| 23 |
+
documentId = 0
|
| 24 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 25 |
loader = PyPDFLoader(file_path, extract_images=False)
|
| 26 |
pages = loader.load_and_split(text_splitter)
|
| 27 |
+
for page in pages:
|
| 28 |
+
print('\n\n\n')
|
| 29 |
+
print('allIds: ', allIds)
|
| 30 |
+
documentId = str(uuid4())
|
| 31 |
+
allIds.append(documentId)
|
| 32 |
+
page.id = documentId
|
| 33 |
return pages
|
| 34 |
|
| 35 |
def create_retriever(documents):
|
| 36 |
+
print('\n\n')
|
| 37 |
+
print('documents: ', documents)
|
| 38 |
+
|
| 39 |
+
# vectorstore = Chroma.from_documents(
|
| 40 |
+
# documents,
|
| 41 |
+
# embedding=OpenAIEmbeddings(),
|
| 42 |
+
# )
|
| 43 |
+
# vectorstore.delete_collection()
|
| 44 |
+
|
| 45 |
+
vectorstore.add_documents(documents=documents)
|
| 46 |
|
| 47 |
retriever = vectorstore.as_retriever(
|
| 48 |
# search_type="similarity",
|