Spaces:
Sleeping
Sleeping
| import pytest | |
| import os | |
| from _utils.splitters.Splitter_class import Splitter | |
| from _utils.models.gerar_relatorio import ( | |
| DocumentChunk, | |
| ) | |
| base_dir = os.path.dirname(os.path.abspath(__file__)) | |
| chunk_size = 1000 | |
| chunk_overlap = 200 | |
| cwd = os.getcwd() | |
| pdf_file = os.path.join(cwd, "tests", "fixtures", "_pdf-uma-pagina.pdf") | |
| class TestSplitters: | |
| splitter = Splitter(chunk_size, chunk_overlap) | |
| async def test_load_and_split_document_No_llama_parse_No_Bubble(self, monkeypatch): | |
| should_use_llama_parse = False | |
| isBubble = False | |
| result_chunks, result_strings = await self.splitter.load_and_split_document( | |
| pdf_file, should_use_llama_parse, isBubble | |
| ) | |
| assert isinstance(result_chunks, list) | |
| assert isinstance(result_strings, list) | |
| assert len(result_chunks) > 0 | |
| assert len(result_strings) > 0 | |
| assert all(isinstance(item, str) for item in result_strings) | |
| assert all(isinstance(item, DocumentChunk) for item in result_chunks) | |
| assert all( | |
| (chunk_size - 100) < len(item.content) < (chunk_size + 100) | |
| for item in result_chunks | |
| ) | |
| async def test_load_and_split_document_No_llama_parse_No_Bubble_with_bigger_chunk( | |
| self, monkeypatch | |
| ): | |
| should_use_llama_parse = False | |
| isBubble = False | |
| chunk_size = 3500 | |
| splitter_temp = Splitter(chunk_size, chunk_overlap) | |
| result_chunks, result_strings = await splitter_temp.load_and_split_document( | |
| pdf_file, should_use_llama_parse, isBubble | |
| ) | |
| assert isinstance(result_chunks, list) | |
| assert isinstance(result_strings, list) | |
| assert len(result_chunks) > 0 | |
| assert len(result_strings) > 0 | |
| assert all(isinstance(item, str) for item in result_strings) | |
| assert all(isinstance(item, DocumentChunk) for item in result_chunks) | |
| assert all( | |
| (chunk_size - 200) < len(item.content) < (chunk_size + 200) | |
| for item in result_chunks | |
| ) | |
| async def test_load_and_split_document_With_llama_parse_No_Bubble( | |
| self, monkeypatch | |
| ): | |
| should_use_llama_parse = True | |
| isBubble = False | |
| result_chunks, result_strings = await self.splitter.load_and_split_document( | |
| pdf_file, should_use_llama_parse, isBubble | |
| ) | |
| assert isinstance(result_chunks, list) | |
| assert isinstance(result_strings, list) | |
| assert len(result_chunks) > 0 | |
| assert len(result_strings) > 0 | |
| assert all(isinstance(item, str) for item in result_strings) | |
| assert all(isinstance(item, DocumentChunk) for item in result_chunks) | |
| # Teste abaixo não passa ainda --> Será consertado no futuro | |
| # assert all( | |
| # (chunk_size - 100) < len(item.content) < (chunk_size + 100) | |
| # for item in result_chunks | |
| # ) | |