luanpoppe commited on
Commit
0870c96
·
1 Parent(s): e79797a

fix: manutenção indevida dos Documents dos PDFs enviados anteriormente

Browse files
compose.yaml CHANGED
@@ -11,7 +11,7 @@ services:
11
  - .env
12
  develop:
13
  watch:
14
- - action: sync
15
  path: ./
16
  target: /app
17
  ignore:
 
11
  - .env
12
  develop:
13
  watch:
14
+ - action: sync+restart
15
  path: ./
16
  target: /app
17
  ignore:
endpoint_teste/views.py CHANGED
@@ -59,6 +59,8 @@ def getTeste(request):
59
  @api_view(["POST"])
60
  def getPDF(request):
61
  if request.method == "POST":
 
 
62
  serializer = PDFUploadSerializer(data=request.data)
63
  if serializer.is_valid(raise_exception=True):
64
  # Access the uploaded file
 
59
  @api_view(["POST"])
60
  def getPDF(request):
61
  if request.method == "POST":
62
+ print('\n\n\n')
63
+ print("CHEGOU AQUI")
64
  serializer = PDFUploadSerializer(data=request.data)
65
  if serializer.is_valid(raise_exception=True):
66
  # Access the uploaded file
langchain_backend/main.py CHANGED
@@ -1,12 +1,13 @@
1
  import os
2
- from langchain_backend.utils import create_prompt_llm_chain, create_retriever, getPDF
 
3
  from langchain.chains import create_retrieval_chain
4
 
5
  os.environ.get("OPENAI_API_KEY")
6
 
7
  def get_llm_answer(system_prompt, user_prompt, pdf_url, model):
8
  print('model: ', model)
9
- pages = None
10
  if pdf_url:
11
  pages = getPDF(pdf_url)
12
  else:
@@ -19,4 +20,8 @@ def get_llm_answer(system_prompt, user_prompt, pdf_url, model):
19
  # else:
20
  # rag_chain = create_retrieval_chain(retriever, create_prompt_llm_chain(system_prompt))
21
  results = rag_chain.invoke({"input": user_prompt})
 
 
 
 
22
  return results
 
1
  import os
2
+ from langchain_backend.utils import create_prompt_llm_chain, create_retriever, getPDF, vectorstore
3
+ from langchain_backend import utils
4
  from langchain.chains import create_retrieval_chain
5
 
6
  os.environ.get("OPENAI_API_KEY")
7
 
8
  def get_llm_answer(system_prompt, user_prompt, pdf_url, model):
9
  print('model: ', model)
10
+ pages = []
11
  if pdf_url:
12
  pages = getPDF(pdf_url)
13
  else:
 
20
  # else:
21
  # rag_chain = create_retrieval_chain(retriever, create_prompt_llm_chain(system_prompt))
22
  results = rag_chain.invoke({"input": user_prompt})
23
+ print('allIds ARQUIVO MAIN: ', utils.allIds)
24
+ vectorstore.delete( utils.allIds)
25
+ utils.allIds = []
26
+ print('utils.allIds: ', utils.allIds)
27
  return results
langchain_backend/utils.py CHANGED
@@ -8,21 +8,41 @@ from langchain.chains.combine_documents import create_stuff_documents_chain
8
  from langchain_core.prompts import ChatPromptTemplate
9
  from langchain_huggingface import HuggingFaceEndpoint
10
  from setup.environment import default_model
 
11
 
12
  os.environ.get("OPENAI_API_KEY")
13
  os.environ.get("HUGGINGFACEHUB_API_TOKEN")
14
 
 
 
 
 
 
 
15
  def getPDF(file_path):
 
16
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
17
  loader = PyPDFLoader(file_path, extract_images=False)
18
  pages = loader.load_and_split(text_splitter)
 
 
 
 
 
 
19
  return pages
20
 
21
  def create_retriever(documents):
22
- vectorstore = Chroma.from_documents(
23
- documents,
24
- embedding=OpenAIEmbeddings(),
25
- )
 
 
 
 
 
 
26
 
27
  retriever = vectorstore.as_retriever(
28
  # search_type="similarity",
 
8
  from langchain_core.prompts import ChatPromptTemplate
9
  from langchain_huggingface import HuggingFaceEndpoint
10
  from setup.environment import default_model
11
+ from uuid import uuid4
12
 
13
  os.environ.get("OPENAI_API_KEY")
14
  os.environ.get("HUGGINGFACEHUB_API_TOKEN")
15
 
16
+ vectorstore = Chroma(
17
+ collection_name="documents",
18
+ embedding_function=OpenAIEmbeddings()
19
+ )
20
+ allIds = []
21
+
22
  def getPDF(file_path):
23
+ documentId = 0
24
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
25
  loader = PyPDFLoader(file_path, extract_images=False)
26
  pages = loader.load_and_split(text_splitter)
27
+ for page in pages:
28
+ print('\n\n\n')
29
+ print('allIds: ', allIds)
30
+ documentId = str(uuid4())
31
+ allIds.append(documentId)
32
+ page.id = documentId
33
  return pages
34
 
35
  def create_retriever(documents):
36
+ print('\n\n')
37
+ print('documents: ', documents)
38
+
39
+ # vectorstore = Chroma.from_documents(
40
+ # documents,
41
+ # embedding=OpenAIEmbeddings(),
42
+ # )
43
+ # vectorstore.delete_collection()
44
+
45
+ vectorstore.add_documents(documents=documents)
46
 
47
  retriever = vectorstore.as_retriever(
48
  # search_type="similarity",