Vlad Iliescu commited on
Commit
57cc74c
·
1 Parent(s): 2b8bb45

feat: more infra, and a manual agent

Browse files
notebooks/01-vi-questions.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/02-vi-handmade-agent.ipynb ADDED
@@ -0,0 +1,810 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "id": "initial_id",
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "ExecuteTime": {
9
+ "end_time": "2025-06-16T14:22:31.049405Z",
10
+ "start_time": "2025-06-16T14:22:29.311032Z"
11
+ }
12
+ },
13
+ "source": [
14
+ "import time\n",
15
+ "import concurrent\n",
16
+ "from concurrent import futures\n",
17
+ "from typing import Any, List, Dict, Tuple\n",
18
+ "\n",
19
+ "from PIL import Image\n",
20
+ "from openai import AzureOpenAI\n",
21
+ "from tqdm.auto import tqdm\n",
22
+ "\n",
23
+ "import json\n",
24
+ "from pathlib import Path\n",
25
+ "\n",
26
+ "import pandas as pd\n",
27
+ "import requests\n",
28
+ "from dotenv import dotenv_values\n",
29
+ "from smolagents import tool, DuckDuckGoSearchTool\n",
30
+ "import wikipediaapi\n",
31
+ "\n",
32
+ "\n",
33
+ "\n",
34
+ "test_api_base = \"https://agents-course-unit4-scoring.hf.space\"\n",
35
+ "\n",
36
+ "def get_random_question():\n",
37
+ " url = f\"{test_api_base}/random-question\"\n",
38
+ "\n",
39
+ "\n",
40
+ " try:\n",
41
+ " # Fetch the random question\n",
42
+ " response = requests.get(url, timeout=10)\n",
43
+ " response.raise_for_status()\n",
44
+ " question_data = response.json()\n",
45
+ "\n",
46
+ " # Check if there's an associated file to download\n",
47
+ " if question_data.get(\"file_name\") and question_data.get(\"task_id\"):\n",
48
+ " task_id = question_data[\"task_id\"]\n",
49
+ " file_url = f\"{test_api_base}/files/{task_id}\"\n",
50
+ "\n",
51
+ " # Create a directory for downloaded files if it doesn't exist\n",
52
+ " download_dir = Path(\"downloaded_files\")\n",
53
+ " download_dir.mkdir(exist_ok=True)\n",
54
+ "\n",
55
+ " # Download the file\n",
56
+ " file_response = requests.get(file_url, timeout=30)\n",
57
+ " file_response.raise_for_status()\n",
58
+ "\n",
59
+ " # Get filename from content-disposition header or use task_id\n",
60
+ " content_disposition = file_response.headers.get('content-disposition', '')\n",
61
+ " filename = content_disposition.split('filename=')[1].strip('\"')\n",
62
+ " file_path = download_dir / filename\n",
63
+ "\n",
64
+ " # Save the file\n",
65
+ " with open(file_path, 'wb') as f:\n",
66
+ " f.write(file_response.content)\n",
67
+ "\n",
68
+ " # Add the file path to the question data\n",
69
+ " question_data['downloaded_file_path'] = str(file_path)\n",
70
+ " print(f\"Downloaded file to: {file_path}\")\n",
71
+ "\n",
72
+ " return question_data\n",
73
+ "\n",
74
+ " except requests.exceptions.RequestException as e:\n",
75
+ " print(f\"Error fetching question: {e}\")\n",
76
+ " return None\n",
77
+ " except json.JSONDecodeError as e:\n",
78
+ " print(f\"Error parsing JSON response: {e}\")\n",
79
+ " return None\n",
80
+ " except Exception as e:\n",
81
+ " print(f\"Unexpected error: {e}\")\n",
82
+ " return None"
83
+ ],
84
+ "outputs": [
85
+ {
86
+ "name": "stderr",
87
+ "output_type": "stream",
88
+ "text": [
89
+ "/Users/vladi/miniconda3/envs/gaia/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
90
+ " from .autonotebook import tqdm as notebook_tqdm\n"
91
+ ]
92
+ }
93
+ ],
94
+ "execution_count": 1
95
+ },
96
+ {
97
+ "metadata": {
98
+ "ExecuteTime": {
99
+ "end_time": "2025-06-16T14:22:32.140555Z",
100
+ "start_time": "2025-06-16T14:22:31.056873Z"
101
+ }
102
+ },
103
+ "cell_type": "code",
104
+ "source": [
105
+ "question = get_random_question()\n",
106
+ "if question:\n",
107
+ " print(f\"Task ID: {question.get('task_id')}\")\n",
108
+ " print(f\"Question: {question.get('question')}\")\n",
109
+ " print(f\"Level: {question.get('Level')}\")\n",
110
+ " if 'downloaded_file_path' in question:\n",
111
+ " print(f\"Downloaded file: {question['downloaded_file_path']}\")"
112
+ ],
113
+ "id": "55d7941445304e9b",
114
+ "outputs": [
115
+ {
116
+ "name": "stdout",
117
+ "output_type": "stream",
118
+ "text": [
119
+ "Downloaded file to: downloaded_files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3\n",
120
+ "Task ID: 99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3\n",
121
+ "Question: Hi, I'm making a pie but I could use some help with my shopping list. I have everything I need for the crust, but I'm not sure about the filling. I got the recipe from my friend Aditi, but she left it as a voice memo and the speaker on my phone is buzzing so I can't quite make out what she's saying. Could you please listen to the recipe and list all of the ingredients that my friend described? I only want the ingredients for the filling, as I have everything I need to make my favorite pie crust. I've attached the recipe as Strawberry pie.mp3.\n",
122
+ "\n",
123
+ "In your response, please only list the ingredients, not any measurements. So if the recipe calls for \"a pinch of salt\" or \"two cups of ripe strawberries\" the ingredients on the list would be \"salt\" and \"ripe strawberries\".\n",
124
+ "\n",
125
+ "Please format your response as a comma separated list of ingredients. Also, please alphabetize the ingredients.\n",
126
+ "Level: 1\n",
127
+ "Downloaded file: downloaded_files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3\n"
128
+ ]
129
+ }
130
+ ],
131
+ "execution_count": 2
132
+ },
133
+ {
134
+ "metadata": {
135
+ "ExecuteTime": {
136
+ "end_time": "2025-06-16T14:22:36.176421Z",
137
+ "start_time": "2025-06-16T14:22:36.148111Z"
138
+ }
139
+ },
140
+ "cell_type": "code",
141
+ "source": [
142
+ "config = dotenv_values()\n",
143
+ "\n",
144
+ "client = AzureOpenAI(\n",
145
+ " api_key=config[\"AZURE_OPENAI_API_KEY\"],\n",
146
+ " azure_endpoint=config[\"AZURE_OPENAI_API_BASE\"],\n",
147
+ " api_version=config[\"AZURE_OPENAI_API_VERSION\"]\n",
148
+ ")\n",
149
+ "model_id=config[\"AZURE_OPENAI_CHAT_MODEL\"]"
150
+ ],
151
+ "id": "99393f634f21563f",
152
+ "outputs": [],
153
+ "execution_count": 3
154
+ },
155
+ {
156
+ "metadata": {},
157
+ "cell_type": "markdown",
158
+ "source": "# Question Processing",
159
+ "id": "1290570b730fda4"
160
+ },
161
+ {
162
+ "metadata": {
163
+ "ExecuteTime": {
164
+ "end_time": "2025-06-16T14:22:43.615612Z",
165
+ "start_time": "2025-06-16T14:22:43.204200Z"
166
+ }
167
+ },
168
+ "cell_type": "code",
169
+ "source": [
170
+ "response = requests.get(f\"{test_api_base}/questions\", timeout=15)\n",
171
+ "response.raise_for_status()\n",
172
+ "\n",
173
+ "questions_data = response.json()"
174
+ ],
175
+ "id": "9f6fe414bc8fb090",
176
+ "outputs": [],
177
+ "execution_count": 4
178
+ },
179
+ {
180
+ "metadata": {
181
+ "ExecuteTime": {
182
+ "end_time": "2025-06-16T14:22:43.692064Z",
183
+ "start_time": "2025-06-16T14:22:43.684720Z"
184
+ }
185
+ },
186
+ "cell_type": "code",
187
+ "source": "pd.DataFrame(questions_data)",
188
+ "id": "3d0ab116de02315e",
189
+ "outputs": [
190
+ {
191
+ "data": {
192
+ "text/plain": [
193
+ " task_id \\\n",
194
+ "0 8e867cd7-cff9-4e6c-867a-ff5ddc2550be \n",
195
+ "1 a1e91b78-d3d8-4675-bb8d-62741b4b68a6 \n",
196
+ "2 2d83110e-a098-4ebb-9987-066c06fa42d0 \n",
197
+ "3 cca530fc-4052-43b2-b130-b30968d8aa44 \n",
198
+ "4 4fc2f1ae-8625-45b5-ab34-ad4433bc21f8 \n",
199
+ "5 6f37996b-2ac7-44b0-8e68-6d28256631b4 \n",
200
+ "6 9d191bce-651d-4746-be2d-7ef8ecadb9c2 \n",
201
+ "7 cabe07ed-9eca-40ea-8ead-410ef5e83f91 \n",
202
+ "8 3cef3a44-215e-4aed-8e3b-b1e3f08063b7 \n",
203
+ "9 99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3 \n",
204
+ "10 305ac316-eef6-4446-960a-92d80d542f82 \n",
205
+ "11 f918266a-b3e0-4914-865d-4faa564f1aef \n",
206
+ "12 3f57289b-8c60-48be-bd80-01f8099ca449 \n",
207
+ "13 1f975693-876d-457b-a649-393859e79bf3 \n",
208
+ "14 840bfca7-4f7b-481a-8794-c560c340185d \n",
209
+ "15 bda648d7-d618-4883-88f4-3466eabd860e \n",
210
+ "16 cf106601-ab4f-4af9-b045-5295fe67b37d \n",
211
+ "17 a0c07678-e491-4bbc-8f0b-07405144218f \n",
212
+ "18 7bd855d8-463d-4ed5-93ca-5fe35145f733 \n",
213
+ "19 5a0c1adf-205e-4841-a666-7c3ef95def9d \n",
214
+ "\n",
215
+ " question Level \\\n",
216
+ "0 How many studio albums were published by Merce... 1 \n",
217
+ "1 In the video https://www.youtube.com/watch?v=L... 1 \n",
218
+ "2 .rewsna eht sa \"tfel\" drow eht fo etisoppo eht... 1 \n",
219
+ "3 Review the chess position provided in the imag... 1 \n",
220
+ "4 Who nominated the only Featured Article on Eng... 1 \n",
221
+ "5 Given this table defining * on the set S = {a,... 1 \n",
222
+ "6 Examine the video at https://www.youtube.com/w... 1 \n",
223
+ "7 What is the surname of the equine veterinarian... 1 \n",
224
+ "8 I'm making a grocery list for my mom, but she'... 1 \n",
225
+ "9 Hi, I'm making a pie but I could use some help... 1 \n",
226
+ "10 Who did the actor who played Ray in the Polish... 1 \n",
227
+ "11 What is the final numeric output from the atta... 1 \n",
228
+ "12 How many at bats did the Yankee with the most ... 1 \n",
229
+ "13 Hi, I was out sick from my classes on Friday, ... 1 \n",
230
+ "14 On June 6, 2023, an article by Carolyn Collins... 1 \n",
231
+ "15 Where were the Vietnamese specimens described ... 1 \n",
232
+ "16 What country had the least number of athletes ... 1 \n",
233
+ "17 Who are the pitchers with the number before an... 1 \n",
234
+ "18 The attached Excel file contains the sales of ... 1 \n",
235
+ "19 What is the first name of the only Malko Compe... 1 \n",
236
+ "\n",
237
+ " file_name \n",
238
+ "0 \n",
239
+ "1 \n",
240
+ "2 \n",
241
+ "3 cca530fc-4052-43b2-b130-b30968d8aa44.png \n",
242
+ "4 \n",
243
+ "5 \n",
244
+ "6 \n",
245
+ "7 \n",
246
+ "8 \n",
247
+ "9 99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3 \n",
248
+ "10 \n",
249
+ "11 f918266a-b3e0-4914-865d-4faa564f1aef.py \n",
250
+ "12 \n",
251
+ "13 1f975693-876d-457b-a649-393859e79bf3.mp3 \n",
252
+ "14 \n",
253
+ "15 \n",
254
+ "16 \n",
255
+ "17 \n",
256
+ "18 7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx \n",
257
+ "19 "
258
+ ],
259
+ "text/html": [
260
+ "<div>\n",
261
+ "<style scoped>\n",
262
+ " .dataframe tbody tr th:only-of-type {\n",
263
+ " vertical-align: middle;\n",
264
+ " }\n",
265
+ "\n",
266
+ " .dataframe tbody tr th {\n",
267
+ " vertical-align: top;\n",
268
+ " }\n",
269
+ "\n",
270
+ " .dataframe thead th {\n",
271
+ " text-align: right;\n",
272
+ " }\n",
273
+ "</style>\n",
274
+ "<table border=\"1\" class=\"dataframe\">\n",
275
+ " <thead>\n",
276
+ " <tr style=\"text-align: right;\">\n",
277
+ " <th></th>\n",
278
+ " <th>task_id</th>\n",
279
+ " <th>question</th>\n",
280
+ " <th>Level</th>\n",
281
+ " <th>file_name</th>\n",
282
+ " </tr>\n",
283
+ " </thead>\n",
284
+ " <tbody>\n",
285
+ " <tr>\n",
286
+ " <th>0</th>\n",
287
+ " <td>8e867cd7-cff9-4e6c-867a-ff5ddc2550be</td>\n",
288
+ " <td>How many studio albums were published by Merce...</td>\n",
289
+ " <td>1</td>\n",
290
+ " <td></td>\n",
291
+ " </tr>\n",
292
+ " <tr>\n",
293
+ " <th>1</th>\n",
294
+ " <td>a1e91b78-d3d8-4675-bb8d-62741b4b68a6</td>\n",
295
+ " <td>In the video https://www.youtube.com/watch?v=L...</td>\n",
296
+ " <td>1</td>\n",
297
+ " <td></td>\n",
298
+ " </tr>\n",
299
+ " <tr>\n",
300
+ " <th>2</th>\n",
301
+ " <td>2d83110e-a098-4ebb-9987-066c06fa42d0</td>\n",
302
+ " <td>.rewsna eht sa \"tfel\" drow eht fo etisoppo eht...</td>\n",
303
+ " <td>1</td>\n",
304
+ " <td></td>\n",
305
+ " </tr>\n",
306
+ " <tr>\n",
307
+ " <th>3</th>\n",
308
+ " <td>cca530fc-4052-43b2-b130-b30968d8aa44</td>\n",
309
+ " <td>Review the chess position provided in the imag...</td>\n",
310
+ " <td>1</td>\n",
311
+ " <td>cca530fc-4052-43b2-b130-b30968d8aa44.png</td>\n",
312
+ " </tr>\n",
313
+ " <tr>\n",
314
+ " <th>4</th>\n",
315
+ " <td>4fc2f1ae-8625-45b5-ab34-ad4433bc21f8</td>\n",
316
+ " <td>Who nominated the only Featured Article on Eng...</td>\n",
317
+ " <td>1</td>\n",
318
+ " <td></td>\n",
319
+ " </tr>\n",
320
+ " <tr>\n",
321
+ " <th>5</th>\n",
322
+ " <td>6f37996b-2ac7-44b0-8e68-6d28256631b4</td>\n",
323
+ " <td>Given this table defining * on the set S = {a,...</td>\n",
324
+ " <td>1</td>\n",
325
+ " <td></td>\n",
326
+ " </tr>\n",
327
+ " <tr>\n",
328
+ " <th>6</th>\n",
329
+ " <td>9d191bce-651d-4746-be2d-7ef8ecadb9c2</td>\n",
330
+ " <td>Examine the video at https://www.youtube.com/w...</td>\n",
331
+ " <td>1</td>\n",
332
+ " <td></td>\n",
333
+ " </tr>\n",
334
+ " <tr>\n",
335
+ " <th>7</th>\n",
336
+ " <td>cabe07ed-9eca-40ea-8ead-410ef5e83f91</td>\n",
337
+ " <td>What is the surname of the equine veterinarian...</td>\n",
338
+ " <td>1</td>\n",
339
+ " <td></td>\n",
340
+ " </tr>\n",
341
+ " <tr>\n",
342
+ " <th>8</th>\n",
343
+ " <td>3cef3a44-215e-4aed-8e3b-b1e3f08063b7</td>\n",
344
+ " <td>I'm making a grocery list for my mom, but she'...</td>\n",
345
+ " <td>1</td>\n",
346
+ " <td></td>\n",
347
+ " </tr>\n",
348
+ " <tr>\n",
349
+ " <th>9</th>\n",
350
+ " <td>99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3</td>\n",
351
+ " <td>Hi, I'm making a pie but I could use some help...</td>\n",
352
+ " <td>1</td>\n",
353
+ " <td>99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3</td>\n",
354
+ " </tr>\n",
355
+ " <tr>\n",
356
+ " <th>10</th>\n",
357
+ " <td>305ac316-eef6-4446-960a-92d80d542f82</td>\n",
358
+ " <td>Who did the actor who played Ray in the Polish...</td>\n",
359
+ " <td>1</td>\n",
360
+ " <td></td>\n",
361
+ " </tr>\n",
362
+ " <tr>\n",
363
+ " <th>11</th>\n",
364
+ " <td>f918266a-b3e0-4914-865d-4faa564f1aef</td>\n",
365
+ " <td>What is the final numeric output from the atta...</td>\n",
366
+ " <td>1</td>\n",
367
+ " <td>f918266a-b3e0-4914-865d-4faa564f1aef.py</td>\n",
368
+ " </tr>\n",
369
+ " <tr>\n",
370
+ " <th>12</th>\n",
371
+ " <td>3f57289b-8c60-48be-bd80-01f8099ca449</td>\n",
372
+ " <td>How many at bats did the Yankee with the most ...</td>\n",
373
+ " <td>1</td>\n",
374
+ " <td></td>\n",
375
+ " </tr>\n",
376
+ " <tr>\n",
377
+ " <th>13</th>\n",
378
+ " <td>1f975693-876d-457b-a649-393859e79bf3</td>\n",
379
+ " <td>Hi, I was out sick from my classes on Friday, ...</td>\n",
380
+ " <td>1</td>\n",
381
+ " <td>1f975693-876d-457b-a649-393859e79bf3.mp3</td>\n",
382
+ " </tr>\n",
383
+ " <tr>\n",
384
+ " <th>14</th>\n",
385
+ " <td>840bfca7-4f7b-481a-8794-c560c340185d</td>\n",
386
+ " <td>On June 6, 2023, an article by Carolyn Collins...</td>\n",
387
+ " <td>1</td>\n",
388
+ " <td></td>\n",
389
+ " </tr>\n",
390
+ " <tr>\n",
391
+ " <th>15</th>\n",
392
+ " <td>bda648d7-d618-4883-88f4-3466eabd860e</td>\n",
393
+ " <td>Where were the Vietnamese specimens described ...</td>\n",
394
+ " <td>1</td>\n",
395
+ " <td></td>\n",
396
+ " </tr>\n",
397
+ " <tr>\n",
398
+ " <th>16</th>\n",
399
+ " <td>cf106601-ab4f-4af9-b045-5295fe67b37d</td>\n",
400
+ " <td>What country had the least number of athletes ...</td>\n",
401
+ " <td>1</td>\n",
402
+ " <td></td>\n",
403
+ " </tr>\n",
404
+ " <tr>\n",
405
+ " <th>17</th>\n",
406
+ " <td>a0c07678-e491-4bbc-8f0b-07405144218f</td>\n",
407
+ " <td>Who are the pitchers with the number before an...</td>\n",
408
+ " <td>1</td>\n",
409
+ " <td></td>\n",
410
+ " </tr>\n",
411
+ " <tr>\n",
412
+ " <th>18</th>\n",
413
+ " <td>7bd855d8-463d-4ed5-93ca-5fe35145f733</td>\n",
414
+ " <td>The attached Excel file contains the sales of ...</td>\n",
415
+ " <td>1</td>\n",
416
+ " <td>7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx</td>\n",
417
+ " </tr>\n",
418
+ " <tr>\n",
419
+ " <th>19</th>\n",
420
+ " <td>5a0c1adf-205e-4841-a666-7c3ef95def9d</td>\n",
421
+ " <td>What is the first name of the only Malko Compe...</td>\n",
422
+ " <td>1</td>\n",
423
+ " <td></td>\n",
424
+ " </tr>\n",
425
+ " </tbody>\n",
426
+ "</table>\n",
427
+ "</div>"
428
+ ]
429
+ },
430
+ "execution_count": 5,
431
+ "metadata": {},
432
+ "output_type": "execute_result"
433
+ }
434
+ ],
435
+ "execution_count": 5
436
+ },
437
+ {
438
+ "metadata": {
439
+ "ExecuteTime": {
440
+ "end_time": "2025-06-16T14:28:12.335526Z",
441
+ "start_time": "2025-06-16T14:28:12.332298Z"
442
+ }
443
+ },
444
+ "cell_type": "code",
445
+ "source": [
446
+ "df = pd.DataFrame(questions_data)\n",
447
+ "df[df[\"file_name\"] == \"\"][\"question\"].values"
448
+ ],
449
+ "id": "d624cc31dc5bb13d",
450
+ "outputs": [
451
+ {
452
+ "data": {
453
+ "text/plain": [
454
+ "array(['How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.',\n",
455
+ " 'In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?',\n",
456
+ " '.rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI',\n",
457
+ " 'Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?',\n",
458
+ " 'Given this table defining * on the set S = {a, b, c, d, e}\\n\\n|*|a|b|c|d|e|\\n|---|---|---|---|---|---|\\n|a|a|b|c|b|d|\\n|b|b|c|a|e|c|\\n|c|c|a|b|b|a|\\n|d|b|e|b|e|d|\\n|e|d|b|a|d|c|\\n\\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.',\n",
459
+ " 'Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\\n\\nWhat does Teal\\'c say in response to the question \"Isn\\'t that hot?\"',\n",
460
+ " \"What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?\",\n",
461
+ " \"I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:\\n\\nmilk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts\\n\\nI need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.\",\n",
462
+ " 'Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.',\n",
463
+ " 'How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?',\n",
464
+ " 'On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?',\n",
465
+ " \"Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.\",\n",
466
+ " \"What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.\",\n",
467
+ " \"Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.\",\n",
468
+ " 'What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?'],\n",
469
+ " dtype=object)"
470
+ ]
471
+ },
472
+ "execution_count": 11,
473
+ "metadata": {},
474
+ "output_type": "execute_result"
475
+ }
476
+ ],
477
+ "execution_count": 11
478
+ },
479
+ {
480
+ "metadata": {
481
+ "ExecuteTime": {
482
+ "end_time": "2025-06-06T11:41:12.207070Z",
483
+ "start_time": "2025-06-06T11:41:12.193633Z"
484
+ }
485
+ },
486
+ "cell_type": "code",
487
+ "source": [
488
+ "@tool\n",
489
+ "def read_file(file_path_str: str) -> str:\n",
490
+ " \"\"\"\n",
491
+ " A tool that reads the contents of a file and returns them as text.\n",
492
+ "\n",
493
+ " Args:\n",
494
+ " file_path_str: The path to the file that should be read.\n",
495
+ " \"\"\"\n",
496
+ "\n",
497
+ " file_path = Path(file_path_str)\n",
498
+ " file_path = file_path.resolve()\n",
499
+ " if not file_path.exists() or not file_path.is_file():\n",
500
+ " raise ValueError(f\"File {file_path} does not exist or is not a file.\")\n",
501
+ "\n",
502
+ " switcher = {\n",
503
+ " \".txt\": lambda: file_path.read_text(encoding=\"utf-8\"),\n",
504
+ " \".csv\": lambda: file_path.read_text(encoding=\"utf-8\"),\n",
505
+ " \".py\": lambda: file_path.read_text(encoding=\"utf-8\"),\n",
506
+ " \".xlsx\": lambda: pd.read_excel(file_path).to_string(),\n",
507
+ " }\n",
508
+ "\n",
509
+ " return switcher.get(file_path.suffix, lambda: \"Unsupported file type\")()\n",
510
+ "\n",
511
+ "@tool\n",
512
+ "def wikipedia_search(page: str) -> str:\n",
513
+ " \"\"\"\n",
514
+ " A tool that returns the contents for a specific Wikipedia page if found, or \"This content has been truncated to stay below 0 characters\" if page not found.\n",
515
+ "\n",
516
+ " Args:\n",
517
+ " page: The Wikipedia page for which to retrieve the content.\n",
518
+ " \"\"\"\n",
519
+ " wiki_wiki = wikipediaapi.Wikipedia(user_agent='LLM Agents', language='en')\n",
520
+ "\n",
521
+ " page_py = wiki_wiki.page(page)\n",
522
+ " return page_py.text\n",
523
+ "\n",
524
+ "def process_question(question_data: dict[str, Any]) -> dict[str, str]:\n",
525
+ " task_id = question_data.get(\"task_id\")\n",
526
+ " question_text = question_data.get(\"question\")\n",
527
+ " format_instructions = \"You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\"\n",
528
+ " adjusted_question_text = f\"{format_instructions}\\n\\n{question_text}\"\n",
529
+ "\n",
530
+ " file_path = None\n",
531
+ " if question_data.get(\"file_name\"):\n",
532
+ " task_id = question_data[\"task_id\"]\n",
533
+ " file_url = f\"{test_api_base}/files/{task_id}\"\n",
534
+ "\n",
535
+ " download_dir = Path(\"downloaded_files\")\n",
536
+ " download_dir.mkdir(exist_ok=True)\n",
537
+ "\n",
538
+ " file_response = requests.get(file_url, timeout=30)\n",
539
+ " file_response.raise_for_status()\n",
540
+ "\n",
541
+ " file_path = download_dir / question_data.get(\"file_name\")\n",
542
+ "\n",
543
+ " with open(file_path, 'wb') as f:\n",
544
+ " f.write(file_response.content)\n",
545
+ "\n",
546
+ " agent = CodeAgent(tools=[read_file,\n",
547
+ " # wikipedia_search\n",
548
+ " DuckDuckGoSearchTool(),\n",
549
+ " VisitWebpageTool()\n",
550
+ " ], model=model, max_steps=10,\n",
551
+ " # verbosity_level=0,\n",
552
+ " max_print_outputs_length=0)\n",
553
+ "\n",
554
+ " if file_path and file_path.suffix in ['.png', '.jpg', '.jpeg']: # I know, it's inconsistent\n",
555
+ " answer = agent.run(task=adjusted_question_text, images=[Image.open(file_path)])\n",
556
+ " else:\n",
557
+ " answer = agent.run(task=f\"{adjusted_question_text}{f' File: |{file_path}|' if question_data.get('file_name') else ''}\", )\n",
558
+ "\n",
559
+ " # print(f\"Task ID: {task_id}, Question: {question_text}, Answer: {answer}\")\n",
560
+ "\n",
561
+ " return {\n",
562
+ " \"task_id\": task_id,\n",
563
+ " \"submitted_answer\": answer,\n",
564
+ " \"question\": question_text\n",
565
+ " }\n",
566
+ "\n",
567
+ "\n",
568
+ "def run_agents_parallel(questions_data: List[Dict[str, Any]], max_workers: int = 4) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:\n",
569
+ " start = time.time()\n",
570
+ "\n",
571
+ " answers = []\n",
572
+ " results_log = []\n",
573
+ "\n",
574
+ " with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:\n",
575
+ " future_to_question = {executor.submit(process_question, q): q for q in questions_data}\n",
576
+ "\n",
577
+ " for future in tqdm(concurrent.futures.as_completed(future_to_question)):\n",
578
+ " try:\n",
579
+ " answer = future.result()\n",
580
+ " results_log.append(answer)\n",
581
+ " answers.append(answer)\n",
582
+ "\n",
583
+ " except Exception as e:\n",
584
+ " print(f\"Question processing failed: {e}\")\n",
585
+ "\n",
586
+ " submission_data = {\n",
587
+ " \"username\": \"vladi\",\n",
588
+ " \"agent_code\": \"https://huggingface.co/spaces/vladi/AgentsGAIAFun\",\n",
589
+ " \"answers\": answers\n",
590
+ " }\n",
591
+ " end = time.time()\n",
592
+ " print(f\"Processing time (parallel): {end - start:.2f} seconds\")\n",
593
+ "\n",
594
+ " return submission_data, results_log\n",
595
+ "\n",
596
+ "def run_agents(questions_data: list[{}]):\n",
597
+ " start = time.time()\n",
598
+ "\n",
599
+ " answers = []\n",
600
+ " results_log = []\n",
601
+ " for question_data in tqdm(questions_data):\n",
602
+ "\n",
603
+ " answer = process_question(question_data)\n",
604
+ "\n",
605
+ " results_log.append(answer)\n",
606
+ " answers.append(answer)\n",
607
+ "\n",
608
+ " submission_data = {\n",
609
+ " \"username\": \"vladi\",\n",
610
+ " \"agent_code\": \"https://huggingface.co/spaces/vladi/AgentsGAIAFun\",\n",
611
+ " \"answers\": answers\n",
612
+ " }\n",
613
+ "\n",
614
+ " end = time.time()\n",
615
+ " print(f\"Processing time (sequential): {end - start:.2f} seconds\")\n",
616
+ "\n",
617
+ " return submission_data, results_log\n",
618
+ "\n",
619
+ "def submit_answers(submission_data: dict):\n",
620
+ " print(f\"Submitting {len(submission_data['answers'])} answers\")\n",
621
+ "\n",
622
+ " response = requests.post(f\"{test_api_base}/submit\", json=submission_data, timeout=60)\n",
623
+ " response.raise_for_status()\n",
624
+ " result_data = response.json()\n",
625
+ "\n",
626
+ " return result_data\n"
627
+ ],
628
+ "id": "74bce95503481798",
629
+ "outputs": [],
630
+ "execution_count": 28
631
+ },
632
+ {
633
+ "metadata": {},
634
+ "cell_type": "code",
635
+ "source": [
636
+ "submission_data, results_log = run_agents(questions_data[:1])\n",
637
+ "# submission_data, results_log = run_agents_parallel(questions_data)\n",
638
+ "results_df = pd.DataFrame(results_log)\n",
639
+ "\n",
640
+ "results_log"
641
+ ],
642
+ "id": "57e1c5515e9bf8a1",
643
+ "outputs": [],
644
+ "execution_count": null
645
+ },
646
+ {
647
+ "metadata": {
648
+ "ExecuteTime": {
649
+ "end_time": "2025-05-01T14:52:59.689261Z",
650
+ "start_time": "2025-05-01T14:52:59.686607Z"
651
+ }
652
+ },
653
+ "cell_type": "code",
654
+ "source": "submission_data",
655
+ "id": "d66d9affd58d3428",
656
+ "outputs": [
657
+ {
658
+ "data": {
659
+ "text/plain": [
660
+ "{'username': 'vladi',\n",
661
+ " 'agent_code': 'https://huggingface.co/spaces/vladi/AgentsGAIAFun',\n",
662
+ " 'answers': [{'task_id': '2d83110e-a098-4ebb-9987-066c06fa42d0',\n",
663
+ " 'submitted_answer': 'right',\n",
664
+ " 'question': '.rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI'},\n",
665
+ " {'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6',\n",
666
+ " 'submitted_answer': 'I lack tools for video analysis. This task requires external video analysis tools or manual review.',\n",
667
+ " 'question': 'In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?'},\n",
668
+ " {'task_id': 'cca530fc-4052-43b2-b130-b30968d8aa44',\n",
669
+ " 'submitted_answer': 'Nd2',\n",
670
+ " 'question': \"Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation.\"},\n",
671
+ " {'task_id': '6f37996b-2ac7-44b0-8e68-6d28256631b4',\n",
672
+ " 'submitted_answer': 'b,e',\n",
673
+ " 'question': 'Given this table defining * on the set S = {a, b, c, d, e}\\n\\n|*|a|b|c|d|e|\\n|---|---|---|---|---|---|\\n|a|a|b|c|b|d|\\n|b|b|c|a|e|c|\\n|c|c|a|b|b|a|\\n|d|b|e|b|e|d|\\n|e|d|b|a|d|c|\\n\\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.'},\n",
674
+ " {'task_id': '9d191bce-651d-4746-be2d-7ef8ecadb9c2',\n",
675
+ " 'submitted_answer': 'I cannot directly analyze video content, including YouTube videos. Please provide a transcript or text if available.',\n",
676
+ " 'question': 'Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\\n\\nWhat does Teal\\'c say in response to the question \"Isn\\'t that hot?\"'},\n",
677
+ " {'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be',\n",
678
+ " 'submitted_answer': \"I cannot directly access Wikipedia or other data sources to answer this task. Please refer to Mercedes Sosa's discography on Wikipedia or other trusted platforms. Look for studio albums released between 2000 and 2009 and count them.\",\n",
679
+ " 'question': 'How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.'},\n",
680
+ " {'task_id': '3cef3a44-215e-4aed-8e3b-b1e3f08063b7',\n",
681
+ " 'submitted_answer': 'broccoli, celery, corn, green beans, lettuce, sweet potatoes, zucchini',\n",
682
+ " 'question': \"I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:\\n\\nmilk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts\\n\\nI need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.\"},\n",
683
+ " {'task_id': '99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3',\n",
684
+ " 'submitted_answer': \"I can't process the MP3 file directly. Could you provide me with a text transcription of the audio contents?\",\n",
685
+ " 'question': 'Hi, I\\'m making a pie but I could use some help with my shopping list. I have everything I need for the crust, but I\\'m not sure about the filling. I got the recipe from my friend Aditi, but she left it as a voice memo and the speaker on my phone is buzzing so I can\\'t quite make out what she\\'s saying. Could you please listen to the recipe and list all of the ingredients that my friend described? I only want the ingredients for the filling, as I have everything I need to make my favorite pie crust. I\\'ve attached the recipe as Strawberry pie.mp3.\\n\\nIn your response, please only list the ingredients, not any measurements. So if the recipe calls for \"a pinch of salt\" or \"two cups of ripe strawberries\" the ingredients on the list would be \"salt\" and \"ripe strawberries\".\\n\\nPlease format your response as a comma separated list of ingredients. Also, please alphabetize the ingredients.'},\n",
686
+ " {'task_id': '305ac316-eef6-4446-960a-92d80d542f82',\n",
687
+ " 'submitted_answer': 'Piotr',\n",
688
+ " 'question': 'Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.'},\n",
689
+ " {'task_id': 'f918266a-b3e0-4914-865d-4faa564f1aef',\n",
690
+ " 'submitted_answer': 0,\n",
691
+ " 'question': 'What is the final numeric output from the attached Python code?'},\n",
692
+ " {'task_id': '4fc2f1ae-8625-45b5-ab34-ad4433bc21f8',\n",
693
+ " 'submitted_answer': 'The only dinosaur-related article promoted to Featured Article status on English Wikipedia in November 2016 is **\"Dakota (dinosaur)\"**. The nomination for this article was made by **FunkMonk**, a prominent editor known for contributions to paleontology-related articles.',\n",
694
+ " 'question': 'Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?'},\n",
695
+ " {'task_id': '3f57289b-8c60-48be-bd80-01f8099ca449',\n",
696
+ " 'submitted_answer': 'The requested file does not exist. Please provide further instructions or clarify how to access the 1977 Yankees player statistics.',\n",
697
+ " 'question': 'How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?'},\n",
698
+ " {'task_id': '1f975693-876d-457b-a649-393859e79bf3',\n",
699
+ " 'submitted_answer': 'The provided file is an audio file (.mp3). Please use a transcription service like Otter.ai, Rev, or Whisper speech-to-text tools to extract the spoken information. Once transcribed, I can assist further in analyzing the text for the required page numbers.',\n",
700
+ " 'question': \"Hi, I was out sick from my classes on Friday, so I'm trying to figure out what I need to study for my Calculus mid-term next week. My friend from class sent me an audio recording of Professor Willowbrook giving out the recommended reading for the test, but my headphones are broken :(\\n\\nCould you please listen to the recording for me and tell me the page numbers I'm supposed to go over? I've attached a file called Homework.mp3 that has the recording. Please provide just the page numbers as a comma-delimited list. And please provide the list in ascending order.\"},\n",
701
+ " {'task_id': 'bda648d7-d618-4883-88f4-3466eabd860e',\n",
702
+ " 'submitted_answer': 'Hanoi',\n",
703
+ " 'question': \"Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.\"},\n",
704
+ " {'task_id': 'cf106601-ab4f-4af9-b045-5295fe67b37d',\n",
705
+ " 'submitted_answer': 'AFG',\n",
706
+ " 'question': \"What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.\"},\n",
707
+ " {'task_id': '840bfca7-4f7b-481a-8794-c560c340185d',\n",
708
+ " 'submitted_answer': 'To answer your query, I conducted research based on the article mentioned and the linked paper. Here\\'s the result:\\n\\nCarolyn Collins Petersen\\'s article on *Universe Today*, published on June 6, 2023, references a team’s observations. The paper linked at the bottom of the article is titled:\\n\\n**\"Spitzer Observations of the Massive Infrared Dark Cloud G38.91-0.44: A Detailed Look at Star Formation.\"**\\n\\nIn this paper, the work performed by R. G. Arendt was supported under the **NASA award number 80GSFC21M0002**.\\n\\nLet me know if you need further clarification or assistance!',\n",
709
+ " 'question': 'On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?'},\n",
710
+ " {'task_id': '7bd855d8-463d-4ed5-93ca-5fe35145f733',\n",
711
+ " 'submitted_answer': 'I cannot process the Excel file due to the limitations in using specific libraries or tools for Excel handling.',\n",
712
+ " 'question': 'The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.'},\n",
713
+ " {'task_id': '5a0c1adf-205e-4841-a666-7c3ef95def9d',\n",
714
+ " 'submitted_answer': 'Anna',\n",
715
+ " 'question': 'What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?'},\n",
716
+ " {'task_id': 'a0c07678-e491-4bbc-8f0b-07405144218f',\n",
717
+ " 'submitted_answer': 'To address the query effectively, I will answer based on standard baseball numbering and existing data from July 2023 regarding Taishō Tamai:\\n\\nAs of July 2023, Taishō Tamai wore jersey number **65** for the Yomiuri Giants, a Japanese professional baseball team. To determine the pitchers with numbers before and after his:\\n\\n1. **Pitcher Before** (jersey number **64**): **Ito** \\n2. **Pitcher After** (jersey number **66**): **Takahashi** \\n\\nTherefore, the result is:\\n\\n**Ito, Takahashi**',\n",
718
+ " 'question': \"Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.\"}]}"
719
+ ]
720
+ },
721
+ "execution_count": 9,
722
+ "metadata": {},
723
+ "output_type": "execute_result"
724
+ }
725
+ ],
726
+ "execution_count": 9
727
+ },
728
+ {
729
+ "metadata": {
730
+ "ExecuteTime": {
731
+ "end_time": "2025-05-01T14:53:15.147718Z",
732
+ "start_time": "2025-05-01T14:53:13.340660Z"
733
+ }
734
+ },
735
+ "cell_type": "code",
736
+ "source": "submit_answers(submission_data)",
737
+ "id": "ea6c404b932e9922",
738
+ "outputs": [
739
+ {
740
+ "name": "stdout",
741
+ "output_type": "stream",
742
+ "text": [
743
+ "Submitting 19 answers\n"
744
+ ]
745
+ },
746
+ {
747
+ "data": {
748
+ "text/plain": [
749
+ "{'username': 'vladi',\n",
750
+ " 'score': 10.0,\n",
751
+ " 'correct_count': 2,\n",
752
+ " 'total_attempted': 19,\n",
753
+ " 'message': 'Score calculated successfully: 2/20 total questions answered correctly (19 valid tasks attempted). Score did not improve previous record, leaderboard not updated.',\n",
754
+ " 'timestamp': '2025-05-01T14:53:15.124360+00:00'}"
755
+ ]
756
+ },
757
+ "execution_count": 10,
758
+ "metadata": {},
759
+ "output_type": "execute_result"
760
+ }
761
+ ],
762
+ "execution_count": 10
763
+ },
764
+ {
765
+ "metadata": {
766
+ "ExecuteTime": {
767
+ "end_time": "2025-06-06T11:39:35.785365Z",
768
+ "start_time": "2025-06-06T11:39:35.514993Z"
769
+ }
770
+ },
771
+ "cell_type": "code",
772
+ "source": "wikipedia_search(\"Mercedes Sosa\")",
773
+ "id": "57e58477ff361cff",
774
+ "outputs": [
775
+ {
776
+ "data": {
777
+ "text/plain": [
778
+ "'Haydée Mercedes \"La Negra\" Sosa (Latin American Spanish: [meɾˈseðes ˈsosa]; 9 July 1935 – 4 October 2009) was an Argentine singer who was popular throughout Latin America and many countries outside the region. With her roots in Argentine folk music, Sosa became one of the preeminent exponents of El nuevo cancionero. She gave voice to songs written by many Latin American songwriters. Her music made people hail her as the \"voice of the voiceless ones\". She was often called \"the conscience of Latin America\".\\nSosa performed in venues such as the Lincoln Center in New York City, the Théâtre Mogador in Paris, the Sistine Chapel in Vatican City, as well as sold-out shows in New York\\'s Carnegie Hall and the Roman Colosseum during her final decade of life. Her career spanned four decades and she was the recipient of six Latin Grammy awards (2000, 2003, 2004, 2006, 2009, 2011), including a Latin Grammy Lifetime Achievement Award in 2004 and two posthumous Latin Grammy Award for Best Folk Album in 2009 and 2011. She won the Premio Gardel in 2000, the main musical award in Argentina. She served as an ambassador for UNICEF.\\n\\nLife\\nSosa was born on 9 July 1935, in San Miguel de Tucumán, in the northwestern Argentine province of Tucumán, of mestizo ancestry. She was of French, Spanish and Diaguita descent. Her nickname \"la negra\", which is a common nickname in Argentina for people with darker complexion, is a reference to her indigenous heritage. Her parents, a day laborer and a washerwoman, were Peronists, although they never registered in the party, and she started her career as a singer for the Peronist Party in Tucuman under the name Gladys Osorio. In 1950, at age fifteen, she won a singing competition organized by a local radio station and was given a contract to perform for two months. She recorded her first album, La Voz de la Zafra, in 1959. A performance at the 1965 Cosquín National Folklore Festival—where she was introduced and brought to the stage while sitting in the audience by fellow folk singer Jorge Cafrune— brought her to the attention of the Argentine public. Sosa and her first husband, Manuel Oscar Matus, with whom she had one son, were key players in the mid-60s nueva canción movement (which was called nuevo cancionero in Argentina). Her second record was Canciones con Fundamento, a collection of Argentine folk songs.\\n\\nSosa \"spent the late 1960s building her audience in Europe and among the cosmopolitan middle class in Buenos Aires, becoming in the process a much bigger star\" than her contemporaries. In 1967, Sosa toured the United States and Europe with great success. In later years, she performed and recorded extensively, broadening her repertoire to include material from throughout Latin America.\\nIn the early 1970s, Sosa released two concept albums in collaboration with composer Ariel Ramírez and lyricist Félix Luna: Cantata Sudamericana and Mujeres Argentinas (Argentine Women). She also recorded a tribute to Chilean musician Violeta Parra in 1971, including what was to become one of Sosa\\'s signature songs, Gracias a la vida. She further popularized of songs written by Milton Nascimento of Brazil and Pablo Milanés and Silvio Rodríguez both from Cuba. Throughout the decade, she released albums such as Hasta la Victoria in 1972 and Traigo un Pueblo en mi Voz in 1973. They featured songs like \"Cuando tenga la tierra\", written by Ariel Petrocelli and Daniel Toro, which tackles political and social issues like wealth and land inequality. During the 1970s she was a part of two films by the director Leopoldo Torre Nilsson: El Santo de la Espada in 1970 and Güemes, la tierra en armas in 1971, in which she portrayed Juana Azurduy de Padilla, the guerrilla military leader who fought for Argentine independence.\\n\\nAfter the military junta of Jorge Videla came to power in 1976, the atmosphere in Argentina grew increasingly oppressive. Sosa faced death threats against both her and her family, but refused for many years to leave the country. At a concert in La Plata in 1979, Sosa was searched and arrested on stage, along with all those attending the concert. Their release came about through international intervention. Despite attempts to hold more concerts, she was officially barred from performing by the military regime. Banned in her own country, she moved to Paris and then to Madrid. She has spoken publicly about her artistic and emotional struggles during this period of her life. While in exile, she released the album A Quien Doy in 1981. The album included a recording of the song \"Cuando Me Acuerdo de Mi Pais\" which was originally written by the prolific Chilean singer/songwriter, Patricio Manns. The song, which he wrote while also in political exile, expresses the sorrow he felt from being separated from his homeland. She related to this feeling and struggled to continue recording and performing. In an interview with the New York Times, she said, “It was a mental problem, a problem of morale...It wasn’t my throat, or anything physical\".\\nSosa returned to Argentina from her exile in Europe in February 1982, several months before the military regime collapsed as a result of the Falklands War, and gave a series of concerts at the Teatro Ópera in Buenos Aires, where she invited many of her younger colleagues to share the stage. A double album of recordings from these performances became an instant best seller. She then traveled to perform in her home province of Tucuman. However, these performances were largely ignored by mainstream media in the country. In subsequent years, Sosa continued to tour both in Argentina and abroad, performing in such venues as the Lincoln Center in New York City and the Théâtre Mogador in Paris. In poor health for much of the 1990s, she performed a comeback show in Argentina in 1998. In 1994, she played in the Sistine Chapel in Vatican City. In 2002, she sold out both Carnegie Hall in New York and the Colosseum in Rome in the same year.\\n\\nA supporter of Perón, she favored leftist causes throughout her life. She supported President Raul Alfonsin in the election of 1983 which marked the return of democracy in Argentina following the dictatorship. She referred to this election as \"Argentina\\'s Spring\" She opposed President Carlos Menem, who was in office from 1989 to 1999, and supported the election of Néstor Kirchner, who became president in 2003.\\nSosa was a UNESCO Goodwill Ambassador for Latin America and the Caribbean.\\nSosa disliked being identified as a protest singer. While she was outright in her political stances, Sosa said the following on the position of the artist:\\n\\n“An artist isn’t political in the party political sense – they have a constituency, which is their public – it is the poetry that matters most of all.”\\nIn a career spanning four decades, she worked with performers across several genres and generations, folk, opera, pop, rock, including Martha Argerich, Andrea Bocelli, David Broza, Franco Battiato, Jaime Roos, Joan Baez, Francis Cabrel, Gal Costa, Luz Casal, Lila Downs, Lucio Dalla, Maria Farantouri, Lucecita Benitez, Nilda Fernández, Charly Garcia, León Gieco, Gian Marco, Nana Mouskouri, Pablo Milanés, Holly Near, Milton Nascimento, Pata Negra, Fito Páez, Franco De Vita, Lourdes Pérez, Luciano Pavarotti, Silvio Rodríguez, Ismael Serrano, Shakira, Sting, Caetano Veloso, Julieta Venegas, Gustavo Cerati and Konstantin Wecker\\nSosa participated in a 1999 production of Ariel Ramírez\\'s Misa Criolla. Her song Balderrama is featured in the 2008 movie Che, starring Benicio del Toro as the Argentine Marxist revolutionary Che Guevara.\\nSosa was the co-chair of the Earth Charter International Commission.\\n\\nAwards\\nSosa won the Latin Grammy Award for Best Folk Album in 2000 (Misa Criolla), 2003 (Acústico), 2006 (Corazón Libre), 2009 (Cantora 1, which also won Best Recording Package and was nominated for Album of the Year), and 2011 (Deja La Vida Volar), as well as several international awards.\\nIn 1995, Konex Foundation from Argentina granted her the Diamond Konex Award, one of the most prestigious awards in Argentina, as the most important personality in the popular music of her country in the last decade.\\n\\nDeath\\nSuffering from recurrent endocrine and respiratory problems in later years, the 74-year-old Sosa was hospitalized in Buenos Aires on 18 September 2009. She died from multiple organ failure on 4 October 2009, at 5:15 am. She is survived by one son, Fabián Matus, born of her first marriage. He said: \"She lived her 74 years to the fullest. She had done practically everything she wanted, she didn\\'t have any type of barrier or any type of fear that limited her\". The hospital expressed its sympathies to her relatives. Her website featured the following: \"Her undisputed talent, her honesty and her profound convictions leave a great legacy to future generations\".\\nHer body was placed on display at the National Congress building in Buenos Aires for the public to pay their respects, and President Fernández de Kirchner ordered three days of national mourning. Thousands had queued by the end of the day.\\nSosa\\'s obituary in The Daily Telegraph said she was \"an unrivalled interpreter of works by her compatriot, the Argentine Atahualpa Yupanqui, and Chile\\'s Violeta Parra\". Helen Popper of Reuters reported her death by saying she \"fought South America\\'s dictators with her voice and became a giant of contemporary Latin American music\". Sosa received three Latin Grammy nominations for her album, in 2009 . She went on to win Best Folk Album about a month after her death.\\n\\nTributes\\nIn 2019, Sosa was celebrated by a Google Doodle. The doodle was showcased in Argentina, Chile, Uruguay, Paraguay, Bolivia, Peru, Ecuador, Cuba, Iceland, Sweden, Serbia, Greece, Israel and Vietnam.\\nIn 2023, Rolling Stone ranked Sosa at number 160 on its list of the 200 Greatest Singers of All Time.\\n\\nDiscography\\nSosa recorded forty albums.\\n\\nStudio albums\\nEPs\\nLive albums\\nCompilation albums\\nFilmography\\nGüemes, la tierra en armas (1971)\\nArgentinísima (1972)\\nEsta es mi Argentina (1974)\\nMercedes Sosa, como un pájaro libre (1983)\\nSerá possible el sur: Mercedes Sosa (1985)\\nHistorias de Argentina en vivo (2001)\\n\\nFurther reading\\nReferences\\nExternal links\\n\\nTribute to Mercedes Sosa (in Portuguese BR)\\nMercedes Sosa\\'s website (in Spanish)\\nMercedes Sosa\\'s News (in Spanish)\\nMercedes Sosa at IMDb\\nMercedes Sosa\\'s Discography on Discogs.com'"
779
+ ]
780
+ },
781
+ "execution_count": 27,
782
+ "metadata": {},
783
+ "output_type": "execute_result"
784
+ }
785
+ ],
786
+ "execution_count": 27
787
+ }
788
+ ],
789
+ "metadata": {
790
+ "kernelspec": {
791
+ "display_name": "Python 3",
792
+ "language": "python",
793
+ "name": "python3"
794
+ },
795
+ "language_info": {
796
+ "codemirror_mode": {
797
+ "name": "ipython",
798
+ "version": 2
799
+ },
800
+ "file_extension": ".py",
801
+ "mimetype": "text/x-python",
802
+ "name": "python",
803
+ "nbconvert_exporter": "python",
804
+ "pygments_lexer": "ipython2",
805
+ "version": "2.7.6"
806
+ }
807
+ },
808
+ "nbformat": 4,
809
+ "nbformat_minor": 5
810
+ }
notebooks/03-vi-handmade-agent.ipynb ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "id": "initial_id",
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "ExecuteTime": {
9
+ "end_time": "2025-06-20T12:57:41.792135Z",
10
+ "start_time": "2025-06-20T12:57:40.889869Z"
11
+ }
12
+ },
13
+ "source": [
14
+ "import time\n",
15
+ "import concurrent\n",
16
+ "from concurrent import futures\n",
17
+ "from typing import Any, List, Dict, Tuple\n",
18
+ "\n",
19
+ "from PIL import Image\n",
20
+ "from openai import AzureOpenAI\n",
21
+ "from tqdm.auto import tqdm\n",
22
+ "\n",
23
+ "import json\n",
24
+ "from pathlib import Path\n",
25
+ "\n",
26
+ "import pandas as pd\n",
27
+ "import requests\n",
28
+ "from dotenv import dotenv_values\n",
29
+ "from smolagents import tool, DuckDuckGoSearchTool\n",
30
+ "import wikipediaapi\n"
31
+ ],
32
+ "outputs": [
33
+ {
34
+ "name": "stderr",
35
+ "output_type": "stream",
36
+ "text": [
37
+ "/Users/vladi/miniconda3/envs/gaia/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
38
+ " from .autonotebook import tqdm as notebook_tqdm\n"
39
+ ]
40
+ }
41
+ ],
42
+ "execution_count": 1
43
+ },
44
+ {
45
+ "metadata": {
46
+ "ExecuteTime": {
47
+ "end_time": "2025-06-20T12:57:52.502521Z",
48
+ "start_time": "2025-06-20T12:57:52.498693Z"
49
+ }
50
+ },
51
+ "cell_type": "code",
52
+ "source": [
53
+ "questions = ['How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.',\n",
54
+ " '.rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI',\n",
55
+ " 'Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?',\n",
56
+ " 'Given this table defining * on the set S = {a, b, c, d, e}\\n\\n|*|a|b|c|d|e|\\n|---|---|---|---|---|---|\\n|a|a|b|c|b|d|\\n|b|b|c|a|e|c|\\n|c|c|a|b|b|a|\\n|d|b|e|b|e|d|\\n|e|d|b|a|d|c|\\n\\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.',\n",
57
+ " 'Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\\n\\nWhat does Teal\\'c say in response to the question \"Isn\\'t that hot?\"',\n",
58
+ " \"What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?\",\n",
59
+ " \"I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:\\n\\nmilk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts\\n\\nI need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.\",\n",
60
+ " 'Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.',\n",
61
+ " 'How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?',\n",
62
+ " 'On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?',\n",
63
+ " \"Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.\",\n",
64
+ " \"What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.\",\n",
65
+ " \"Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.\",\n",
66
+ " 'What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?']"
67
+ ],
68
+ "id": "55d7941445304e9b",
69
+ "outputs": [],
70
+ "execution_count": 2
71
+ },
72
+ {
73
+ "metadata": {
74
+ "ExecuteTime": {
75
+ "end_time": "2025-06-20T13:10:59.085162Z",
76
+ "start_time": "2025-06-20T13:10:59.079916Z"
77
+ }
78
+ },
79
+ "cell_type": "code",
80
+ "source": [
81
+ "from smolagents import GoogleSearchTool\n",
82
+ "import requests\n",
83
+ "import urllib.request\n",
84
+ "from markdownify import markdownify as md\n",
85
+ "from bs4 import BeautifulSoup\n",
86
+ "from dotenv import dotenv_values\n",
87
+ "from openai import AzureOpenAI\n",
88
+ "import json\n",
89
+ "\n",
90
+ "\n",
91
+ "\n",
92
+ "def get_search_results_for(query):\n",
93
+ " encoded_query = urllib.parse.urlencode({'q': query})\n",
94
+ " url = f'https://html.duckduckgo.com/html?q={encoded_query}'\n",
95
+ "\n",
96
+ " request = urllib.request.Request(url)\n",
97
+ " request.add_header('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36')\n",
98
+ "\n",
99
+ " raw_response = urllib.request.urlopen(request).read()\n",
100
+ " html = raw_response.decode(\"utf-8\")\n",
101
+ "\n",
102
+ " soup = BeautifulSoup(html, 'html.parser')\n",
103
+ " a_results = soup.select(\"a.result__a\")\n",
104
+ "\n",
105
+ " links = []\n",
106
+ " for a_result in a_results:\n",
107
+ " # print(a_result)\n",
108
+ " url = a_result.attrs['href']\n",
109
+ " title = a_result.text\n",
110
+ " links.append({\"title\": title, \"url\": url} )\n",
111
+ "\n",
112
+ " return links\n",
113
+ "\n",
114
+ "search_tool = GoogleSearchTool(\"serper\")\n",
115
+ "\n",
116
+ "def get_google_search_results_for(query: str):\n",
117
+ " return search_tool.forward(query)\n",
118
+ "\n",
119
+ "\n",
120
+ "def load_page_content(url) -> str:\n",
121
+ " response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'})\n",
122
+ " page_content = response.content.decode('utf-8')\n",
123
+ " page_content_md = md(page_content)\n",
124
+ "\n",
125
+ " return page_content_md\n",
126
+ "\n"
127
+ ],
128
+ "id": "bf91141def5a906",
129
+ "outputs": [],
130
+ "execution_count": 24
131
+ },
132
+ {
133
+ "metadata": {
134
+ "ExecuteTime": {
135
+ "end_time": "2025-06-20T13:10:59.604614Z",
136
+ "start_time": "2025-06-20T13:10:59.600718Z"
137
+ }
138
+ },
139
+ "cell_type": "code",
140
+ "source": [
141
+ "tools = [{\n",
142
+ " \"type\": \"function\",\n",
143
+ " \"function\": {\n",
144
+ " \"name\": \"get_search_results_for\",\n",
145
+ " \"description\": \"Returns the top 10 results for a DuckDuckGo query.\",\n",
146
+ " \"parameters\": {\n",
147
+ " \"type\": \"object\",\n",
148
+ " \"properties\": {\n",
149
+ " \"query\": {\n",
150
+ " \"type\": \"string\",\n",
151
+ " \"description\": \"query to search for on DuckDuckGo\"\n",
152
+ " }\n",
153
+ " },\n",
154
+ " \"required\": [\n",
155
+ " \"query\"\n",
156
+ " ],\n",
157
+ " \"additionalProperties\": False\n",
158
+ " },\n",
159
+ " \"strict\": True\n",
160
+ " }\n",
161
+ " },\n",
162
+ " {\n",
163
+ " \"type\": \"function\",\n",
164
+ " \"function\": {\n",
165
+ " \"name\": \"load_page_content\",\n",
166
+ " \"description\": \"Returns the content of a particular webpage.\",\n",
167
+ " \"parameters\": {\n",
168
+ " \"type\": \"object\",\n",
169
+ " \"properties\": {\n",
170
+ " \"url\": {\n",
171
+ " \"type\": \"string\",\n",
172
+ " \"description\": \"Url of the webpage for which to retrieve the content\"\n",
173
+ " }\n",
174
+ " },\n",
175
+ " \"required\": [\n",
176
+ " \"url\"\n",
177
+ " ],\n",
178
+ " \"additionalProperties\": False\n",
179
+ " },\n",
180
+ " \"strict\": True\n",
181
+ " }\n",
182
+ " }\n",
183
+ "]\n",
184
+ "\n",
185
+ "def call_function(name, args):\n",
186
+ " if name == \"get_search_results_for\":\n",
187
+ " return get_google_search_results_for(**args)\n",
188
+ " if name == \"load_page_content\":\n",
189
+ " return load_page_content(**args)\n",
190
+ "\n",
191
+ " return None\n"
192
+ ],
193
+ "id": "45340dbef571198f",
194
+ "outputs": [],
195
+ "execution_count": 25
196
+ },
197
+ {
198
+ "metadata": {
199
+ "ExecuteTime": {
200
+ "end_time": "2025-06-20T13:22:00.080330Z",
201
+ "start_time": "2025-06-20T13:21:51.685133Z"
202
+ }
203
+ },
204
+ "cell_type": "code",
205
+ "source": [
206
+ "query = questions[2]\n",
207
+ "config = dotenv_values()\n",
208
+ "\n",
209
+ "client = AzureOpenAI(\n",
210
+ " api_key=config[\"AZURE_OPENAI_API_KEY\"],\n",
211
+ " azure_endpoint=config[\"AZURE_OPENAI_API_BASE\"],\n",
212
+ " api_version=config[\"AZURE_OPENAI_API_VERSION\"]\n",
213
+ ")\n",
214
+ "openai_chatmodel = config[\"AZURE_OPENAI_CHAT_MODEL\"]\n",
215
+ "\n",
216
+ "GRAY = \"\\033[90m\"\n",
217
+ "BOLD = \"\\033[1m\"\n",
218
+ "RESET = \"\\033[0m\"\n",
219
+ "\n",
220
+ "\n",
221
+ "messages = [\n",
222
+ " {\n",
223
+ " \"role\": \"system\",\n",
224
+ " \"content\": \"You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\"\n",
225
+ " },\n",
226
+ " {\"role\": \"user\", \"content\": query}\n",
227
+ "]\n",
228
+ "\n",
229
+ "total_input_token_count = 0\n",
230
+ "total_output_token_count = 0\n",
231
+ "\n",
232
+ "while True:\n",
233
+ " completion = client.chat.completions.create(\n",
234
+ " model=openai_chatmodel,\n",
235
+ " messages=messages,\n",
236
+ " tools=tools\n",
237
+ " )\n",
238
+ "\n",
239
+ " total_input_token_count += completion.usage.prompt_tokens\n",
240
+ " total_output_token_count += completion.usage.completion_tokens\n",
241
+ "\n",
242
+ " if completion.choices[0].finish_reason == \"stop\":\n",
243
+ " print(f\"{BOLD}Final answer: {completion.choices[0].message.content}{RESET}\")\n",
244
+ " break\n",
245
+ " elif completion.choices[0].finish_reason == \"tool_calls\":\n",
246
+ " messages.append(completion.choices[0].message)\n",
247
+ " for tool_call in completion.choices[0].message.tool_calls:\n",
248
+ " name = tool_call.function.name\n",
249
+ " args = json.loads(tool_call.function.arguments)\n",
250
+ "\n",
251
+ " result = call_function(name, args)\n",
252
+ " print(f\"Called {BOLD}{name}({args}){RESET} and it returned {GRAY}{str(result)[:300]}{RESET}\")\n",
253
+ "\n",
254
+ " messages.append({\n",
255
+ " \"role\": \"tool\",\n",
256
+ " \"tool_call_id\": tool_call.id,\n",
257
+ " \"content\": str(result)\n",
258
+ " })\n",
259
+ " else:\n",
260
+ " raise Exception(\"We're not supposed to be here\")"
261
+ ],
262
+ "id": "adfd8a6e27fb6069",
263
+ "outputs": [
264
+ {
265
+ "name": "stdout",
266
+ "output_type": "stream",
267
+ "text": [
268
+ "Called \u001B[1mget_search_results_for({'query': 'only Featured Article on English Wikipedia about a dinosaur promoted November 2016'})\u001B[0m and it returned \u001B[90m## Search Results\n",
269
+ "0. [Wikipedia:Featured article candidates/Giganotosaurus/archive1](https://en.wikipedia.org/wiki/Wikipedia:Featured_article_candidates/Giganotosaurus/archive1)\n",
270
+ "\n",
271
+ "The article was promoted by Ian Rose via FACBot (talk) 14:41, 19 November 2016 [1]. ... article is one of the most viewed\u001B[0m\n",
272
+ "Called \u001B[1mload_page_content({'url': 'https://en.wikipedia.org/wiki/Wikipedia:Featured_article_candidates/Giganotosaurus/archive1'})\u001B[0m and it returned \u001B[90mWikipedia:Featured article candidates/Giganotosaurus/archive1 - Wikipedia\n",
273
+ "\n",
274
+ "[Jump to content](#bodyContent)\n",
275
+ "\n",
276
+ "Main menu\n",
277
+ "\n",
278
+ "Main menu\n",
279
+ "\n",
280
+ "move to sidebar\n",
281
+ "hide\n",
282
+ "\n",
283
+ "Navigation\n",
284
+ "\n",
285
+ "* [Main page](/wiki/Main_Page \"Visit the main page [z]\")\n",
286
+ "* [Contents](/wiki/Wikipedia:Contents \"Guides to browsing Wikipedia\")\n",
287
+ "* [Curren\u001B[0m\n",
288
+ "\u001B[1mFinal answer: The only Featured Article on English Wikipedia about a dinosaur promoted in November 2016 is \"Giganotosaurus,\" and it was nominated by FunkMonk.\n",
289
+ "\n",
290
+ "FINAL ANSWER: FunkMonk\u001B[0m\n"
291
+ ]
292
+ }
293
+ ],
294
+ "execution_count": 30
295
+ },
296
+ {
297
+ "metadata": {},
298
+ "cell_type": "code",
299
+ "outputs": [],
300
+ "execution_count": null,
301
+ "source": "",
302
+ "id": "4518e95a30ad7b8a"
303
+ }
304
+ ],
305
+ "metadata": {
306
+ "kernelspec": {
307
+ "display_name": "Python 3",
308
+ "language": "python",
309
+ "name": "python3"
310
+ },
311
+ "language_info": {
312
+ "codemirror_mode": {
313
+ "name": "ipython",
314
+ "version": 2
315
+ },
316
+ "file_extension": ".py",
317
+ "mimetype": "text/x-python",
318
+ "name": "python",
319
+ "nbconvert_exporter": "python",
320
+ "pygments_lexer": "ipython2",
321
+ "version": "2.7.6"
322
+ }
323
+ },
324
+ "nbformat": 4,
325
+ "nbformat_minor": 5
326
+ }
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  gradio[oauth]==5.27.0
2
  requests==2.32.3
3
- smolagents[openai]==1.14.0
 
 
 
1
  gradio[oauth]==5.27.0
2
  requests==2.32.3
3
+ smolagents[openai]==1.14.0
4
+ wikipedia-api==0.8.1
5
+ openai-function-calling==2.6.0