# Summarize a long document by chunking and summarizing parts. Uses | |
# aynchronous calls to the API. Adapted from LangChain [Map-Reduce | |
# summary](https://langchain.readthedocs.io/en/stable/_modules/langchain/chains/mapreduce.html). | |
import trio | |
from minichain import TemplatePrompt, show_log, start_chain | |
# Prompt that asks LLM to produce a bash command. | |
class SummaryPrompt(TemplatePrompt): | |
template_file = "summary.pmpt.tpl" | |
def chunk(f, width=4000, overlap=800): | |
"Split a documents into 4800 character overlapping chunks" | |
text = open(f).read().replace("\n\n", "\n") | |
chunks = [] | |
for i in range(4): | |
if i * width > len(text): | |
break | |
chunks.append({"text": text[i * width : (i + 1) * width + overlap]}) | |
return chunks | |
with start_chain("summary") as backend: | |
prompt = SummaryPrompt(backend.OpenAI()) | |
list_prompt = prompt.map() | |
# Map - Summarize each chunk in parallel | |
out = trio.run(list_prompt.arun, chunk("../state_of_the_union.txt")) | |
# Reduce - Summarize the summarized chunks | |
print(prompt({"text": "\n".join(out)})) | |
# + tags=["hide_inp"] | |
SummaryPrompt().show( | |
{"text": "One way to fight is to drive down wages and make Americans poorer."}, | |
"Make Americans poorer", | |
) | |
# - | |
show_log("summary.log") | |