File size: 3,259 Bytes
7b856a8
 
f5ec828
 
 
872b692
f5ec828
 
 
 
872b692
f5ec828
 
7b856a8
 
872b692
7b856a8
 
f5ec828
 
 
7b856a8
 
 
 
f5ec828
872b692
f5ec828
7b856a8
 
 
 
 
 
 
f5ec828
872b692
f5ec828
7b856a8
 
 
 
 
 
 
872b692
7b856a8
 
 
 
 
 
 
 
 
f5ec828
872b692
f5ec828
7b856a8
 
f5ec828
7b856a8
 
 
 
 
f5ec828
872b692
f5ec828
7b856a8
 
f5ec828
7b856a8
 
 
 
f5ec828
7b856a8
f5ec828
7b856a8
 
 
 
 
f5ec828
872b692
f5ec828
 
7b856a8
 
 
 
 
 
 
 
 
 
 
 
 
 
f5ec828
872b692
7b856a8
 
 
 
 
f5ec828
7b856a8
 
f5ec828
7b856a8
 
 
 
 
 
f5ec828
872b692
f5ec828
 
7b856a8
 
 
 
 
 
 
f5ec828
 
 
7b856a8
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "49192d35",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install -q git+https://github.com/srush/MiniChain\n",
    "!git clone https://github.com/srush/MiniChain; cp -fr MiniChain/examples/* . "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bf1da24c",
   "metadata": {},
   "source": [
    "Summarize a long document by chunking and summarizing parts.  Uses\n",
    "aynchronous calls to the API.  Adapted from LangChain [Map-Reduce\n",
    "summary](https://langchain.readthedocs.io/en/stable/_modules/langchain/chains/mapreduce.html)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cce74ed6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import trio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f25908e4",
   "metadata": {},
   "outputs": [],
   "source": [
    "from minichain import TemplatePrompt, show_log, start_chain"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "174e7a29",
   "metadata": {
    "lines_to_next_cell": 2
   },
   "source": [
    "Prompt that asks LLM to produce a bash command."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "12b26a26",
   "metadata": {},
   "outputs": [],
   "source": [
    "class SummaryPrompt(TemplatePrompt):\n",
    "    template_file = \"summary.pmpt.tpl\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "98747659",
   "metadata": {},
   "outputs": [],
   "source": [
    "def chunk(f, width=4000, overlap=800):\n",
    "    \"Split a documents into 4800 character overlapping chunks\"\n",
    "    text = open(f).read().replace(\"\\n\\n\", \"\\n\")\n",
    "    chunks = []\n",
    "    for i in range(4):\n",
    "        if i * width > len(text):\n",
    "            break\n",
    "        chunks.append({\"text\": text[i * width : (i + 1) * width + overlap]})\n",
    "    return chunks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e0ccfddc",
   "metadata": {},
   "outputs": [],
   "source": [
    "with start_chain(\"summary\") as backend:\n",
    "    prompt = SummaryPrompt(backend.OpenAI())\n",
    "    list_prompt = prompt.map()\n",
    "\n",
    "    # Map - Summarize each chunk in parallel\n",
    "    out = trio.run(list_prompt.arun, chunk(\"../state_of_the_union.txt\"))\n",
    "\n",
    "    # Reduce - Summarize the summarized chunks\n",
    "    print(prompt({\"text\": \"\\n\".join(out)}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3ffd907",
   "metadata": {
    "tags": [
     "hide_inp"
    ]
   },
   "outputs": [],
   "source": [
    "SummaryPrompt().show(\n",
    "    {\"text\": \"One way to fight is to drive down wages and make Americans poorer.\"},\n",
    "    \"Make Americans poorer\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "52be8068",
   "metadata": {},
   "outputs": [],
   "source": [
    "show_log(\"summary.log\")"
   ]
  }
 ],
 "metadata": {
  "jupytext": {
   "cell_metadata_filter": "tags,-all",
   "main_language": "python",
   "notebook_metadata_filter": "-all"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}