import tempfile, os from typing import List from langchain_core.documents import Document as LangchainDocument from llama_index import Document from _utils.bubble_integrations.obter_arquivo import get_pdf_from_bubble from llama_parse import LlamaParse, ResultType def handle_pdf_files_from_serializer(files): listaPDFs = [] for file in files: file.seek(0) with tempfile.NamedTemporaryFile( delete=False, suffix=".pdf" ) as temp_file: # Create a temporary file to save the uploaded PDF for ( chunk ) in file.chunks(): # Write the uploaded file content to the temporary file temp_file.write(chunk) temp_file_path = temp_file.name # Get the path of the temporary file listaPDFs.append(temp_file_path) print("listaPDFs: ", listaPDFs) return listaPDFs def remove_pdf_temp_files(listaPDFs): for file in listaPDFs: os.remove(file) async def return_document_list_with_llama_parser(file: str): llama_parser_api = os.getenv("LLAMA_CLOUD_API_KEY") documents: List[LangchainDocument] = [] if llama_parser_api: parser = LlamaParse( api_key=llama_parser_api, result_type=ResultType.JSON, # Options: 'text', 'markdown', 'json', 'structured' language="pt", verbose=True, ) parsed_document = await parser.aget_json(file) for doc in parsed_document[0].get("pages"): # type: ignore # documents.append(doc.to_langchain_format()) langchain_document = LangchainDocument( page_content=doc.get("md"), # type: ignore metadata={ "page": doc.get("page"), # type: ignore # **doc.get("metadata", {}), # type: ignore }, # Include page number in metadata ) documents.append(langchain_document) return documents else: raise ValueError("Não foi possível obter a API_KEY do llama parser")