Spaces:

luanpoppe
/

vella-backend

Running

App Files Files Community

luanpoppe commited on Jan 14

Commit

7fa7a9c

1 Parent(s): cb23311

refactor: pequenas refatorações

Browse files

Files changed (5) hide show

_utils/gerar_relatorio_modelo_usuario/EnhancedDocumentSummarizer.py +1 -1
_utils/gerar_relatorio_modelo_usuario/contextual_retriever.py +46 -12
_utils/gerar_relatorio_modelo_usuario/llm_calls.py +1 -1
_utils/gerar_relatorio_modelo_usuario/prompts.py +114 -19
_utils/prompts/Prompt_class.py +2 -0

_utils/gerar_relatorio_modelo_usuario/EnhancedDocumentSummarizer.py CHANGED Viewed

@@ -229,7 +229,7 @@ class EnhancedDocumentSummarizer(DocumentSummarizer):
             documento_gerado = llm.invoke(
                 prompt_gerar_documento.format(
-                    context=resumo_auxiliar_do_documento,
                     # modelo_usuario=serializer.data["modelo"],
                 )
             ).content

             documento_gerado = llm.invoke(
                 prompt_gerar_documento.format(
+                    context=self.resumo_gerado,
                     # modelo_usuario=serializer.data["modelo"],
                 )
             ).content

_utils/gerar_relatorio_modelo_usuario/contextual_retriever.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import os
-# from _utils.gerar_relatorio_modelo_usuario.prompts import (
-#     prompt_auxiliar_do_contextual_prompt,
-# )
 from _utils.bubble_integrations.obter_arquivo import get_pdf_from_bubble
 from _utils.chains.Chain_class import Chain
 from _utils.prompts.Prompt_class import Prompt
@@ -20,6 +21,7 @@ from multiprocessing import Process, Barrier, Queue
 from dataclasses import dataclass
 from langchain_core.messages import HumanMessage
 from asgiref.sync import sync_to_async
 from _utils.gerar_relatorio_modelo_usuario.llm_calls import aclaude_answer, agpt_answer
 from _utils.gerar_relatorio_modelo_usuario.prompts import contextual_prompt
@@ -28,6 +30,7 @@ from _utils.models.gerar_relatorio import (
     DocumentChunk,
     RetrievalConfig,
 )
 lista_contador = []
@@ -43,11 +46,13 @@ class ContextualRetriever:
         self.bm25 = None
         self.claude_context_model = claude_context_model
-    async def llm_generate_context(self, full_text: str, chunk: DocumentChunk) -> str:
         """Generate contextual description using ChatOpenAI"""
         try:
             print("COMEÇOU A REQUISIÇÃO")
-            prompt = contextual_prompt(full_text, chunk.content)
             # response = await aclaude_answer(
             #     self.claude_client, self.claude_context_model, prompt
             # )
@@ -67,20 +72,24 @@ class ContextualRetriever:
     #     Chain(prompt, ChatOpenAI())
     #     return
-    async def create_contextualized_chunk(self, chunk, full_text):
         lista_contador.append(0)
         print("contador: ", len(lista_contador))
         # Código comentado abaixo é para ler as páginas ao redor da página atual do chunk
         # page_content = ""
         # for i in range(
         #     max(0, chunk.page_number - 1),
-        #     min(len(full_text), chunk.page_number + 2),
         # ):
-        #     page_content += full_text[i].page_content if full_text[i] else ""
         page_number = chunk.page_number - 1
-        page_content = full_text[page_number].page_content
-        context = await self.llm_generate_context(page_content, chunk)
         return ContextualizedChunk(
             content=chunk.content,
             page_number=chunk.page_number,
@@ -91,15 +100,40 @@ class ContextualRetriever:
         )
     async def contextualize_all_chunks(
-        self, full_text: List[Document], chunks: List[DocumentChunk]
     ) -> List[ContextualizedChunk]:
         """Add context to all chunks"""
         contextualized_chunks = []
         lista_contador = []
         async with asyncio.TaskGroup() as tg:
             tasks = [
-                tg.create_task(self.create_contextualized_chunk(chunk, full_text))
                 for chunk in chunks
             ]

 import os
+from _utils.gerar_relatorio_modelo_usuario.prompts import (
+    prompt_auxiliar_do_contextual_prompt,
+    create_prompt_auxiliar_do_contextual_prompt,
+)
 from _utils.bubble_integrations.obter_arquivo import get_pdf_from_bubble
 from _utils.chains.Chain_class import Chain
 from _utils.prompts.Prompt_class import Prompt
 from dataclasses import dataclass
 from langchain_core.messages import HumanMessage
 from asgiref.sync import sync_to_async
+from setup.easy_imports import ChatPromptTemplate, ChatOpenAI
 from _utils.gerar_relatorio_modelo_usuario.llm_calls import aclaude_answer, agpt_answer
 from _utils.gerar_relatorio_modelo_usuario.prompts import contextual_prompt
     DocumentChunk,
     RetrievalConfig,
 )
+from _utils.prompts.Prompt_class import prompt as prompt_obj
 lista_contador = []
         self.bm25 = None
         self.claude_context_model = claude_context_model
+    async def llm_generate_context(
+        self, page_text: str, chunk: DocumentChunk, resumo_auxiliar
+    ) -> str:
         """Generate contextual description using ChatOpenAI"""
         try:
             print("COMEÇOU A REQUISIÇÃO")
+            prompt = contextual_prompt(page_text, resumo_auxiliar, chunk.content)
             # response = await aclaude_answer(
             #     self.claude_client, self.claude_context_model, prompt
             # )
     #     Chain(prompt, ChatOpenAI())
     #     return
+    async def create_contextualized_chunk(
+        self, chunk, single_page_text, response_auxiliar_summary
+    ):
         lista_contador.append(0)
         print("contador: ", len(lista_contador))
         # Código comentado abaixo é para ler as páginas ao redor da página atual do chunk
         # page_content = ""
         # for i in range(
         #     max(0, chunk.page_number - 1),
+        #     min(len(single_page_text), chunk.page_number + 2),
         # ):
+        #     page_content += single_page_text[i].page_content if single_page_text[i] else ""
         page_number = chunk.page_number - 1
+        page_content = single_page_text[page_number].page_content
+        context = await self.llm_generate_context(
+            page_content, chunk, response_auxiliar_summary
+        )
         return ContextualizedChunk(
             content=chunk.content,
             page_number=chunk.page_number,
         )
     async def contextualize_all_chunks(
+        self, full_text_as_array: List[Document], chunks: List[DocumentChunk]
     ) -> List[ContextualizedChunk]:
         """Add context to all chunks"""
         contextualized_chunks = []
         lista_contador = []
+        full_text = ""
+        for x in full_text_as_array:
+            full_text += x.page_content
+        # prompt_auxiliar_summary = prompt_obj.create_prompt_template(
+        #     "", prompt_auxiliar_do_contextual_prompt
+        # ).invoke({"PROCESSO_JURIDICO": full_text})
+        # response_auxiliar_summary = await ChatOpenAI(max_tokens=128000).ainvoke(
+        #     prompt_auxiliar_summary
+        # )
+        prompt_auxiliar_summary = create_prompt_auxiliar_do_contextual_prompt(full_text)
+        print("\n\n\nprompt_auxiliar_summary: ", prompt_auxiliar_summary)
+        response_auxiliar_summary = await aclaude_answer(
+            self.claude_client, self.claude_context_model, prompt_auxiliar_summary
+        )
+        print("\n\n\n\nresponse_auxiliar_summary: ", response_auxiliar_summary)
         async with asyncio.TaskGroup() as tg:
             tasks = [
+                tg.create_task(
+                    self.create_contextualized_chunk(
+                        chunk, full_text_as_array, response_auxiliar_summary
+                    )
+                )
                 for chunk in chunks
             ]

_utils/gerar_relatorio_modelo_usuario/llm_calls.py CHANGED Viewed

@@ -8,7 +8,7 @@ async def aclaude_answer(claude_client, claude_context_model, prompt):
     print("\n\nComeçou uma requisição pelo Claude")
     response = await claude_client.messages.create(
         model=claude_context_model,
-        max_tokens=100,
         messages=[{"role": "user", "content": prompt}],
     )
     return response.content[

     print("\n\nComeçou uma requisição pelo Claude")
     response = await claude_client.messages.create(
         model=claude_context_model,
+        max_tokens=100,  # Máximo é 4096
         messages=[{"role": "user", "content": prompt}],
     )
     return response.content[

_utils/gerar_relatorio_modelo_usuario/prompts.py CHANGED Viewed

@@ -16,7 +16,7 @@ Siga este passo a passo para criar o resumo:
 1. Leia atentamente todo o processo jurídico fornecido.
 <processo_juridico>
-{{PROCESSO_JURIDICO}}
 </processo_juridico>
 2. Identifique e anote as datas e conteúdos relevantes relacionados às 10 peças processuais listadas acima.
@@ -57,24 +57,119 @@ Formate sua resposta da seguinte maneira:
 </resumo_final>"""
-def contextual_prompt(full_text, chunk_content):
-    return f"""You are a language model tasked with providing context to improve the retrieval of information from a chunk extracted from a document. Follow these steps internally (do not display reasoning or reflection in the final output):
-1. **Chain of Thought (internal)**:
-- Identify the document ID, which is the value between "NUM." and "- Pág".
-- Identify the document name from the header.
-2. **Reflection (internal)**:
-- Confirm the document ID and name are correctly identified.
-- Ensure the final context is concise and helpful.
-3. **Final Response**:
-- Provide a short context situating the *chunk* within the document, including the document ID and document name.
-- Do not include any reasoning or reflection in your response.
-**Example Usage:**
-```
-<document> {full_text} </document>
-<chunk> {chunk_content} </chunk>
-Please return only the succinct context (without displaying your internal reasoning), including the document ID and the document name.
-```
-"""
 # Novo nome --> prompt-auxiliar --> Para gerar documentos (é usado como auxiliar no prompt final)

 1. Leia atentamente todo o processo jurídico fornecido.
 <processo_juridico>
+{PROCESSO_JURIDICO}
 </processo_juridico>
 2. Identifique e anote as datas e conteúdos relevantes relacionados às 10 peças processuais listadas acima.
 </resumo_final>"""
+def create_prompt_auxiliar_do_contextual_prompt(PROCESSO_JURIDICO: str):
+    return f"""Você é um assistente jurídico especializado em direito brasileiro. Sua tarefa é criar um resumo conciso e informativo de um processo jurídico, de acordo com as leis do Brasil. O resumo deve focar nos momentos cruciais do processo, na última movimentação processual e nas principais movimentações que ocorreram.
+Aqui estão as 10 principais peças processuais em ordem cronológica do processo civil brasileiro que você deve priorizar em sua análise:
+1. Petição Inicial
+2. Contestação
+3. Réplica
+4. Decisão de Saneamento
+5. Sentença
+6. Recurso de Apelação
+7. Embargos de Declaração
+8. Cumprimento de Sentença
+9. Embargos à Execução
+10. Agravo de Instrumento
+Siga este passo a passo para criar o resumo:
+1. Leia atentamente todo o processo jurídico fornecido.
+<processo_juridico>
+{PROCESSO_JURIDICO}
+</processo_juridico>
+2. Identifique e anote as datas e conteúdos relevantes relacionados às 10 peças processuais listadas acima.
+3. Organize cronologicamente as informações coletadas.
+4. Destaque a última movimentação processual e seu significado para o andamento do processo.
+5. Resuma as principais movimentações, focando em seu impacto no processo.
+6. Elabore um texto coeso que apresente o fluxo do processo, destacando os pontos cruciais e as decisões mais importantes.
+Após criar o resumo inicial, utilize a técnica socrática de reflexão para garantir a precisão e completude do resumo. Faça a si mesmo as seguintes perguntas:
+1. O resumo abrange todas as 10 peças processuais principais?
+2. A última movimentação processual está claramente identificada e explicada?
+3. O texto apresenta uma visão clara do fluxo do processo?
+4. Todas as informações cruciais para o entendimento do caso estão incluídas?
+5. O resumo está livre de opiniões pessoais e se atém aos fatos do processo?
+6. A linguagem utilizada é clara e acessível, mesmo para quem não é especialista em direito?
+Revise e ajuste o resumo conforme necessário com base nessa reflexão.
+O resumo final deve ter no máximo 2 páginas de extensão (aproximadamente 1000 palavras).
+Formate sua resposta da seguinte maneira:
+<resumo_processo>
+[Insira aqui o resumo do processo jurídico]
+</resumo_processo>
+<reflexao_socratica>
+[Insira aqui suas respostas às perguntas da reflexão socrática]
+</reflexao_socratica>
+<resumo_final>
+[Insira aqui o resumo final revisado, se houver alterações após a reflexão]
+</resumo_final>"""
+def contextual_prompt(single_page_text, summary_text, chunk_content):
+    return f"""You are an AI assistant specialized in providing context for document retrieval. Your task is to analyze a chunk of text from a larger document and provide a brief context for it.
+Here's the summary of the full text of the document:
+<summary_text>
+{summary_text}
+</summary_text>
+Here's the single page where the chunk is situated:
+<single_page>
+{single_page_text}
+</single_page>
+And here's the specific chunk to contextualize:
+<chunk>
+{chunk_content}
+</chunk>
+Follow these steps:
+1. Identify and quote the document ID (found between "NUM." and "- Pág") and the document name (from the header).
+2. Summarize the main topics or themes of the single page and where it fit within the summary of the full text.
+3. Identify where the specific chunk fits within these themes.
+4. Create a concise context that situates the chunk within the document.
+With this informations, your response should be a single, concise paragraph that includes:
+- The document ID
+- The document name
+- A brief context for the chunk
+Example final output structure (do not copy the content, only the format):
+<chunk_context>
+[Single paragraph with document ID, name, and chunk context]
+</chunk_context>"""
+#     return f"""You are a language model tasked with providing context to improve the retrieval of information from a chunk extracted from a document. Follow these steps internally (do not display reasoning or reflection in the final output):
+# 1. **Chain of Thought (internal)**:
+# - Identify the document ID, which is the value between "NUM." and "- Pág".
+# - Identify the document name from the header.
+# 2. **Reflection (internal)**:
+# - Confirm the document ID and name are correctly identified.
+# - Ensure the final context is concise and helpful.
+# 3. **Final Response**:
+# - Provide a short context situating the *chunk* within the document, including the document ID and document name.
+# - Do not include any reasoning or reflection in your response.
+# **Example Usage:**
+# ```
+# <document> {full_text} </document>
+# <chunk> {chunk_content} </chunk>
+# Please return only the succinct context (without displaying your internal reasoning), including the document ID and the document name.
+# ```
+# """
 # Novo nome --> prompt-auxiliar --> Para gerar documentos (é usado como auxiliar no prompt final)

_utils/prompts/Prompt_class.py CHANGED Viewed

@@ -10,3 +10,5 @@ class Prompt:
             [("system", system_prompt), ("user", user_prompt)]
         )
         return prompt_template

             [("system", system_prompt), ("user", user_prompt)]
         )
         return prompt_template
+prompt = Prompt()