legalLM / AI_core /tools /summarization_tool.py
Muhammad2003's picture
Upload 45 files
1f891e5 verified
raw
history blame
2.4 kB
"""
Tool for summarizing legal documents.
"""
from langchain.tools import BaseTool
from langchain.chains import LLMChain, MapReduceDocumentsChain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader
from AI_core.config import LLM, TEXT_SPLITTER
class SummarizationTool(BaseTool):
"""Tool to summarize legal documents using a map-reduce approach."""
name: str = "document_summarization_tool"
description: str = "Summarizes legal documents. Input should be a file path to a PDF or text document."
def _run(self, file_path: str) -> str:
"""
Run the document summarization process.
Args:
file_path: Path to the document to summarize
Returns:
str: Summarized content
"""
# Load document
if file_path.endswith('.pdf'):
loader = PyPDFLoader(file_path)
documents = loader.load()
else:
with open(file_path, 'r') as f:
text = f.read()
documents = [text]
# Split documents
docs = TEXT_SPLITTER.split_documents(documents)
# Map step - summarize each chunk
map_template = """
You are a legal expert summarizing complex legal documents.
Summarize the following text in a concise and accurate manner, preserving key legal points:
{text}
"""
map_prompt = PromptTemplate(template=map_template, input_variables=["text"])
map_chain = LLMChain(llm=LLM, prompt=map_prompt, output_key="summary")
# Reduce step - combine summaries
reduce_template = """
You are a legal expert creating a comprehensive summary from multiple text segments.
Combine these summaries into a cohesive overview of the entire document, organized by key legal themes and points:
{summaries}
"""
reduce_prompt = PromptTemplate(template=reduce_template, input_variables=["summaries"])
reduce_chain = LLMChain(llm=LLM, prompt=reduce_prompt)
# Create MapReduce chain
map_reduce_chain = MapReduceDocumentsChain(
llm_chain=map_chain,
reduce_documents_chain=reduce_chain,
document_variable_name="text",
)
return map_reduce_chain.run(docs)