gperdrizet commited on
Commit
b4e2809
·
verified ·
1 Parent(s): 75bb385

Switched to single agent powered by GPT-4.1, added step wait function to avoid hitting the OpenAI API rate limit.

Browse files
configuration.py CHANGED
@@ -1,8 +1,7 @@
 
 
 
1
 
2
- """
3
- Configuration constants for the GAIA agent project.
4
- Contains API URLs and agent instructions used throughout the application.
5
- """
6
  # pylint: disable=line-too-long
7
 
8
  # Which questions to answer
@@ -15,3 +14,30 @@ DEFAULT_API_URL = 'https://agents-course-unit4-scoring.hf.space'
15
  INSTRUCTIONS = """
16
  You are a general AI assistant. I will ask you a question. Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. Submit the final answer via the final_answer tool.
17
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Configuration constants for the GAIA agent project."""
2
+
3
+ from smolagents import OpenAIServerModel, InferenceClientModel
4
 
 
 
 
 
5
  # pylint: disable=line-too-long
6
 
7
  # Which questions to answer
 
14
  INSTRUCTIONS = """
15
  You are a general AI assistant. I will ask you a question. Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. Submit the final answer via the final_answer tool.
16
  """
17
+
18
+ # Agent model definitions
19
+ MANAGER_MODEL = InferenceClientModel(
20
+ "deepseek-ai/DeepSeek-V3",
21
+ provider="together",
22
+ max_tokens=64000
23
+ )
24
+
25
+ WORKER_MODEL = InferenceClientModel(
26
+ "deepseek-ai/DeepSeek-V3",
27
+ provider="together",
28
+ max_tokens=64000
29
+ )
30
+
31
+ CHECK_MODEL = InferenceClientModel(
32
+ "deepseek-ai/DeepSeek-V3",
33
+ provider="together",
34
+ max_tokens=64000
35
+ )
36
+
37
+ MODEL = OpenAIServerModel(
38
+ model_id="gpt-4.1",
39
+ max_tokens=8000
40
+ )
41
+
42
+ TOKEN_LIMITER = 5000
43
+ STEP_WAIT = 60
functions/agent.py CHANGED
@@ -1,141 +1,91 @@
1
  '''Agent definition for GAIA question answering system.'''
2
 
3
  # Standard library
4
- import os
5
- import json
6
  import logging
7
 
8
- from openai import OpenAI
9
-
10
  # Imports for agent creation
11
- from smolagents import CodeAgent, InferenceClientModel, VisitWebpageTool, ActionStep, MessageRole
 
12
  from functions.tools import (
13
  google_search,
14
  wikipedia_search,
15
- get_wikipedia_page
 
 
16
  )
17
 
 
 
 
18
  # Get logger for this module
19
  logger = logging.getLogger(__name__)
20
 
21
  def create_agent():
22
  '''Creates agent for GAIA question answering system.'''
23
 
24
- model = InferenceClientModel(
25
- "deepseek-ai/DeepSeek-V3",
26
- provider="together",
27
- max_tokens=64000
28
- )
29
-
30
- tools = [
31
- wikipedia_search,
32
- get_wikipedia_page,
33
- google_search,
34
- VisitWebpageTool(),
35
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  agent = CodeAgent(
38
- model=model,
39
- tools=tools,
 
 
 
 
 
 
 
40
  additional_authorized_imports=['bs4.*', 'json'],
41
- step_callbacks=[step_memory_cap],
42
  name="GAIA_agent",
43
  verbosity_level=5,
44
- max_steps=30,
45
- planning_interval=2,
46
- description="GAIA agent for question answering"
47
  )
48
 
49
-
50
  return agent
51
-
52
-
53
- def step_memory_cap(memory_step: ActionStep, agent: CodeAgent) -> None:
54
- '''Removes old steps from agent memory to keep context length under control.'''
55
-
56
- task_step = agent.memory.steps[0]
57
- planning_step = agent.memory.steps[1]
58
- latest_step = agent.memory.steps[-1]
59
-
60
- if len(agent.memory.steps) > 2:
61
- agent.memory.steps = [task_step, planning_step, latest_step]
62
-
63
- logger.info('Agent memory has %d steps', len(agent.memory.steps))
64
- logger.info('Latest step is step %d', memory_step.step_number)
65
- logger.info('Contains: %s messages', len(agent.memory.steps[-1].model_input_messages))
66
- logger.info('Token usage: %s', agent.memory.steps[-1].token_usage.total_tokens)
67
-
68
- for message in agent.memory.steps[-1].model_input_messages:
69
- logger.debug(' Role: %s: %s', message['role'], message['content'][:100])
70
-
71
- token_usage = agent.memory.steps[-1].token_usage.total_tokens
72
-
73
- if token_usage > 50000:
74
- logger.info('Token usage is %d, summarizing old messages', token_usage)
75
-
76
- summary = summarize_old_messages(
77
- agent.memory.steps[-1].model_input_messages[1:]
78
- )
79
-
80
- if summary is not None:
81
-
82
- new_messages = [agent.memory.steps[-1].model_input_messages[0]]
83
- new_messages.append({
84
- 'role': MessageRole.USER,
85
- 'content': [{'type': 'text', 'text': f'Here is a summary of your investigation so far: {summary}'}]
86
- })
87
- agent.memory.steps = [agent.memory.steps[0]]
88
- agent.memory.steps[0].model_input_messages = new_messages
89
-
90
- for message in agent.memory.steps[0].model_input_messages:
91
- logger.debug(' Role: %s: %s', message['role'], message['content'][:100])
92
-
93
-
94
- def summarize_old_messages(messages: dict) -> dict:
95
- '''Summarizes old messages to keep context length under control.'''
96
-
97
- client = OpenAI(api_key=os.environ['MODAL_API_KEY'])
98
-
99
- client.base_url = (
100
- 'https://gperdrizet--vllm-openai-compatible-summarization-serve.modal.run/v1'
101
- )
102
-
103
- # Default to first avalible model
104
- model = client.models.list().data[0]
105
- model_id = model.id
106
-
107
- messages = [
108
- {
109
- 'role': 'system',
110
- 'content': f'Summarize the following interaction between an AI agent and a user. Return the summary formatted as text, not as JSON: {json.dumps(messages)}'
111
- }
112
- ]
113
-
114
- completion_args = {
115
- 'model': model_id,
116
- 'messages': messages,
117
- # "frequency_penalty": args.frequency_penalty,
118
- # "max_tokens": 128,
119
- # "n": args.n,
120
- # "presence_penalty": args.presence_penalty,
121
- # "seed": args.seed,
122
- # "stop": args.stop,
123
- # "stream": args.stream,
124
- # "temperature": args.temperature,
125
- # "top_p": args.top_p,
126
- }
127
-
128
- try:
129
- response = client.chat.completions.create(**completion_args)
130
-
131
- except Exception as e: # pylint: disable=broad-exception-caught
132
- response = None
133
- logger.error('Error during Modal API call: %s', e)
134
-
135
- if response is not None:
136
- summary = response.choices[0].message.content
137
-
138
- else:
139
- summary = None
140
-
141
- return summary
 
1
  '''Agent definition for GAIA question answering system.'''
2
 
3
  # Standard library
 
 
4
  import logging
5
 
 
 
6
  # Imports for agent creation
7
+ from smolagents import CodeAgent, VisitWebpageTool
8
+
9
  from functions.tools import (
10
  google_search,
11
  wikipedia_search,
12
+ get_wikipedia_page,
13
+ libretext_book_search,
14
+ get_libretext_book
15
  )
16
 
17
+ from functions.agent_helper_functions import step_memory_cap, step_wait
18
+ from configuration import MODEL
19
+
20
  # Get logger for this module
21
  logger = logging.getLogger(__name__)
22
 
23
  def create_agent():
24
  '''Creates agent for GAIA question answering system.'''
25
 
26
+ # web_agent = CodeAgent(
27
+ # model=WORKER_MODEL,
28
+ # tools=[google_search, VisitWebpageTool()],
29
+ # additional_authorized_imports=['bs4.*', 'json'],
30
+ # step_callbacks=[step_memory_cap],
31
+ # name="web_agent",
32
+ # verbosity_level=5,
33
+ # max_steps=10,
34
+ # planning_interval=5,
35
+ # description="Web search agent for general queries and retrieving web pages as HTML",
36
+ # )
37
+
38
+ # wikipedia_agent = CodeAgent(
39
+ # model=WORKER_MODEL,
40
+ # tools=[wikipedia_search, get_wikipedia_page],
41
+ # additional_authorized_imports=['bs4.*', 'json'],
42
+ # step_callbacks=[step_memory_cap],
43
+ # name="wikipedia_agent",
44
+ # verbosity_level=5,
45
+ # max_steps=10,
46
+ # planning_interval=5,
47
+ # description="Wikipedia agent to search and retrieve Wikipedia pages as HTML",
48
+ # )
49
+
50
+ # libretext_agent = CodeAgent(
51
+ # model=WORKER_MODEL,
52
+ # tools=[libretext_book_search, get_libretext_book],
53
+ # additional_authorized_imports=['bs4.*', 'json'],
54
+ # step_callbacks=[step_memory_cap],
55
+ # name="libretext_agent",
56
+ # verbosity_level=5,
57
+ # max_steps=10,
58
+ # planning_interval=5,
59
+ # description="LibreText agent to search and retrieve content from academic textbooks books",
60
+ # )
61
+
62
+ # manager_agent = CodeAgent(
63
+ # model=MANAGER_MODEL,
64
+ # tools=[],
65
+ # managed_agents=[web_agent, wikipedia_agent, libretext_agent],
66
+ # additional_authorized_imports=['bs4.*', 'json'],
67
+ # planning_interval=2,
68
+ # verbosity_level=2,
69
+ # final_answer_checks=[check_reasoning],
70
+ # max_steps=20,
71
+ # )
72
 
73
  agent = CodeAgent(
74
+ model=MODEL,
75
+ tools=[
76
+ google_search,
77
+ VisitWebpageTool(),
78
+ wikipedia_search,
79
+ get_wikipedia_page,
80
+ libretext_book_search,
81
+ get_libretext_book
82
+ ],
83
  additional_authorized_imports=['bs4.*', 'json'],
84
+ step_callbacks=[step_memory_cap, step_wait],
85
  name="GAIA_agent",
86
  verbosity_level=5,
87
+ max_steps=20,
88
+ planning_interval=5
 
89
  )
90
 
 
91
  return agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
functions/agent_helper_functions.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''Helper functions for the agent(s) in the GAIA question answering system.'''
2
+
3
+ import os
4
+ import time
5
+ import json
6
+ import logging
7
+ from openai import OpenAI
8
+ from smolagents import CodeAgent, ActionStep, MessageRole
9
+ from configuration import CHECK_MODEL, TOKEN_LIMITER, STEP_WAIT
10
+
11
+ # Get logger for this module
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def check_reasoning(final_answer:str, agent_memory):
16
+ """Checks the reasoning and plot of the agent's final answer."""
17
+
18
+ prompt = (
19
+ f"Here is a user-given task and the agent steps: {agent_memory.get_succinct_steps()}. " +
20
+ "Please check that the reasoning process and answer are correct. " +
21
+ "Do they correctly answer the given task? " +
22
+ "First list reasons why yes/no, then write your final decision: " +
23
+ "PASS in caps lock if it is satisfactory, FAIL if it is not. " +
24
+ f"Final answer: {str(final_answer)}"
25
+ )
26
+
27
+ messages = [
28
+ {
29
+ "role": "user",
30
+ "content": [
31
+ {
32
+ "type": "text",
33
+ "text": prompt,
34
+ }
35
+ ],
36
+ }
37
+ ]
38
+
39
+ output = CHECK_MODEL(messages).content
40
+ print("Feedback: ", output)
41
+
42
+ if "FAIL" in output:
43
+ raise Exception(output) # pylint:disable=broad-exception-raised
44
+
45
+ return True
46
+
47
+
48
+ def step_memory_cap(memory_step: ActionStep, agent: CodeAgent) -> None:
49
+ '''Removes old steps from agent memory to keep context length under control.'''
50
+
51
+ task_step = agent.memory.steps[0]
52
+ planning_step = agent.memory.steps[1]
53
+ latest_step = agent.memory.steps[-1]
54
+
55
+ if len(agent.memory.steps) > 2:
56
+ agent.memory.steps = [task_step, planning_step, latest_step]
57
+
58
+ logger.info('Agent memory has %d steps', len(agent.memory.steps))
59
+ logger.info('Latest step is step %d', memory_step.step_number)
60
+ logger.info('Contains: %s messages', len(agent.memory.steps[-1].model_input_messages))
61
+ logger.info('Token usage: %s', agent.memory.steps[-1].token_usage.total_tokens)
62
+
63
+ for message in agent.memory.steps[-1].model_input_messages:
64
+ logger.debug(' Role: %s: %s', message['role'], message['content'][:100])
65
+
66
+ token_usage = agent.memory.steps[-1].token_usage.total_tokens
67
+
68
+ if token_usage > TOKEN_LIMITER:
69
+ logger.info('Token usage is %d, summarizing old messages', token_usage)
70
+
71
+ summary = summarize_old_messages(
72
+ agent.memory.steps[-1].model_input_messages[1:]
73
+ )
74
+
75
+ if summary is not None:
76
+
77
+ new_messages = [agent.memory.steps[-1].model_input_messages[0]]
78
+ new_messages.append({
79
+ 'role': MessageRole.USER,
80
+ 'content': [{
81
+ 'type': 'text',
82
+ 'text': f'Here is a summary of your investigation so far: {summary}'
83
+ }]
84
+ })
85
+ agent.memory.steps = [agent.memory.steps[0]]
86
+ agent.memory.steps[0].model_input_messages = new_messages
87
+
88
+ for message in agent.memory.steps[0].model_input_messages:
89
+ logger.debug(' Role: %s: %s', message['role'], message['content'][:100])
90
+
91
+
92
+ def summarize_old_messages(messages: dict) -> dict:
93
+ '''Summarizes old messages to keep context length under control.'''
94
+
95
+ client = OpenAI(api_key=os.environ['MODAL_API_KEY'])
96
+
97
+ client.base_url = (
98
+ 'https://gperdrizet--vllm-openai-compatible-summarization-serve.modal.run/v1'
99
+ )
100
+
101
+ # Default to first avalible model
102
+ model = client.models.list().data[0]
103
+ model_id = model.id
104
+
105
+ messages = [
106
+ {
107
+ 'role': 'system',
108
+ 'content': ('Summarize the following interaction between an AI agent and a user.' +
109
+ f'Return the summary formatted as text, not as JSON: {json.dumps(messages)}')
110
+ }
111
+ ]
112
+
113
+ completion_args = {
114
+ 'model': model_id,
115
+ 'messages': messages,
116
+ }
117
+
118
+ try:
119
+ response = client.chat.completions.create(**completion_args)
120
+
121
+ except Exception as e: # pylint: disable=broad-exception-caught
122
+ response = None
123
+ logger.error('Error during Modal API call: %s', e)
124
+
125
+ if response is not None:
126
+ summary = response.choices[0].message.content
127
+
128
+ else:
129
+ summary = None
130
+
131
+ return summary
132
+
133
+ def step_wait(memory_step: ActionStep, agent: CodeAgent) -> None:
134
+ '''Waits for a while to prevent hitting API rate limits.'''
135
+
136
+ logger.info('Waiting for %d seconds to prevent hitting API rate limits', STEP_WAIT)
137
+ logger.info('Current step is %d', memory_step.step_number)
138
+ logger.info('Current agent has %d steps', len(agent.memory.steps))
139
+
140
+ time.sleep(STEP_WAIT)
141
+
142
+ return True
functions/tool_helper_functions.py CHANGED
@@ -1,9 +1,11 @@
1
  '''Helper functions for GAIA question answering agent tools.'''
2
 
 
3
  import time
4
  import logging
5
- import requests
6
  from bs4 import BeautifulSoup
 
7
 
8
  # Get logger for this module
9
  logger = logging.getLogger(__name__)
@@ -52,10 +54,12 @@ def libretext_book_parser(url: str) -> dict:
52
  chapter_count = 0
53
 
54
  for listing in chapter_listings:
 
55
  # Extract the link element
56
  link = listing.find('a', class_='mt-sortable-listing-link')
57
 
58
  if link:
 
59
  # Extract title from the span with class 'mt-sortable-listing-title'
60
  title_span = link.find('span', class_='mt-sortable-listing-title')
61
  title = title_span.get_text(strip=True) if title_span else ''
@@ -69,11 +73,13 @@ def libretext_book_parser(url: str) -> dict:
69
  # Clean up description - remove the title prefix if it appears
70
  if description and title and description.startswith(title):
71
  description = description[len(title):].strip()
 
72
  if description.startswith(':'):
73
  description = description[1:].strip()
74
 
75
  # Only add meaningful chapters (skip empty titles or very short ones)
76
  if title and len(title) > 2:
 
77
  parsed_chapters[chapter_count] = {
78
  'title': title,
79
  'url': chapter_url,
@@ -85,14 +91,17 @@ def libretext_book_parser(url: str) -> dict:
85
  chapter_count += 1
86
 
87
  logger.info('Successfully extracted %d chapters from book', len(parsed_chapters))
 
88
  return parsed_chapters
89
 
90
  except requests.exceptions.RequestException as e:
91
  logger.error('Request error while fetching book page: %s', str(e))
 
92
  return {'error': f'Request error: {str(e)}'}
93
 
94
  except Exception as e: # pylint:disable=broad-exception-caught
95
  logger.error('Unexpected error in book parser: %s', str(e))
 
96
  return {'error': f'Unexpected error: {str(e)}'}
97
 
98
 
@@ -233,40 +242,52 @@ def save_libretext_book_as_markdown(book_data: dict, filename: str = None, sourc
233
 
234
  # Table of contents
235
  chapters = book_data.get('chapters', {})
 
236
  if chapters:
237
  markdown_content.append("## Table of Contents\n")
 
238
  for chapter_title in chapters.keys():
 
239
  # Create anchor link for the chapter
240
- anchor = chapter_title.lower().replace(' ', '-').replace(':', '').replace('(', '').replace(')', '')
 
 
 
 
241
  markdown_content.append(f"- [{chapter_title}](#{anchor})\n")
242
  markdown_content.append("\n---\n\n")
243
 
244
  # Chapter content
245
  for chapter_title, chapter_data in chapters.items():
 
246
  # Chapter heading
247
  markdown_content.append(f"## {chapter_title}\n\n")
248
 
249
  sections = chapter_data.get('sections', {})
250
 
251
  if not sections:
 
252
  markdown_content.append("*No sections found for this chapter.*\n\n")
253
  continue
254
 
255
  # Section content
256
  for section_title, section_data in sections.items():
 
257
  # Section heading
258
  markdown_content.append(f"### {section_title}\n\n")
259
 
260
  # Section URL
261
  section_url = section_data.get('Section url', '')
 
262
  if section_url:
263
  markdown_content.append(f"**URL:** [{section_url}]({section_url})\n\n")
264
 
265
  # Section summary
266
  section_summary = section_data.get('Section summary', '')
 
267
  if section_summary:
268
  markdown_content.append(f"{section_summary}\n\n")
269
- else:
270
  markdown_content.append("*No summary available.*\n\n")
271
 
272
  markdown_content.append("---\n\n")
@@ -277,9 +298,271 @@ def save_libretext_book_as_markdown(book_data: dict, filename: str = None, sourc
277
 
278
  success_msg = f"Successfully saved LibreTexts book as markdown file: {filename}"
279
  logger.info(success_msg)
 
280
  return success_msg
281
 
282
  except Exception as e: # pylint:disable=broad-exception-caught
283
  error_msg = f"Error saving markdown file: {str(e)}"
284
  logger.error(error_msg)
 
285
  return error_msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  '''Helper functions for GAIA question answering agent tools.'''
2
 
3
+ import requests
4
  import time
5
  import logging
6
+ import bleach
7
  from bs4 import BeautifulSoup
8
+ from bleach.css_sanitizer import CSSSanitizer
9
 
10
  # Get logger for this module
11
  logger = logging.getLogger(__name__)
 
54
  chapter_count = 0
55
 
56
  for listing in chapter_listings:
57
+
58
  # Extract the link element
59
  link = listing.find('a', class_='mt-sortable-listing-link')
60
 
61
  if link:
62
+
63
  # Extract title from the span with class 'mt-sortable-listing-title'
64
  title_span = link.find('span', class_='mt-sortable-listing-title')
65
  title = title_span.get_text(strip=True) if title_span else ''
 
73
  # Clean up description - remove the title prefix if it appears
74
  if description and title and description.startswith(title):
75
  description = description[len(title):].strip()
76
+
77
  if description.startswith(':'):
78
  description = description[1:].strip()
79
 
80
  # Only add meaningful chapters (skip empty titles or very short ones)
81
  if title and len(title) > 2:
82
+
83
  parsed_chapters[chapter_count] = {
84
  'title': title,
85
  'url': chapter_url,
 
91
  chapter_count += 1
92
 
93
  logger.info('Successfully extracted %d chapters from book', len(parsed_chapters))
94
+
95
  return parsed_chapters
96
 
97
  except requests.exceptions.RequestException as e:
98
  logger.error('Request error while fetching book page: %s', str(e))
99
+
100
  return {'error': f'Request error: {str(e)}'}
101
 
102
  except Exception as e: # pylint:disable=broad-exception-caught
103
  logger.error('Unexpected error in book parser: %s', str(e))
104
+
105
  return {'error': f'Unexpected error: {str(e)}'}
106
 
107
 
 
242
 
243
  # Table of contents
244
  chapters = book_data.get('chapters', {})
245
+
246
  if chapters:
247
  markdown_content.append("## Table of Contents\n")
248
+
249
  for chapter_title in chapters.keys():
250
+
251
  # Create anchor link for the chapter
252
+ anchor = chapter_title.lower().replace(
253
+ ' ',
254
+ '-'
255
+ ).replace(':', '').replace('(', '').replace(')', '')
256
+
257
  markdown_content.append(f"- [{chapter_title}](#{anchor})\n")
258
  markdown_content.append("\n---\n\n")
259
 
260
  # Chapter content
261
  for chapter_title, chapter_data in chapters.items():
262
+
263
  # Chapter heading
264
  markdown_content.append(f"## {chapter_title}\n\n")
265
 
266
  sections = chapter_data.get('sections', {})
267
 
268
  if not sections:
269
+
270
  markdown_content.append("*No sections found for this chapter.*\n\n")
271
  continue
272
 
273
  # Section content
274
  for section_title, section_data in sections.items():
275
+
276
  # Section heading
277
  markdown_content.append(f"### {section_title}\n\n")
278
 
279
  # Section URL
280
  section_url = section_data.get('Section url', '')
281
+
282
  if section_url:
283
  markdown_content.append(f"**URL:** [{section_url}]({section_url})\n\n")
284
 
285
  # Section summary
286
  section_summary = section_data.get('Section summary', '')
287
+
288
  if section_summary:
289
  markdown_content.append(f"{section_summary}\n\n")
290
+
291
  markdown_content.append("*No summary available.*\n\n")
292
 
293
  markdown_content.append("---\n\n")
 
298
 
299
  success_msg = f"Successfully saved LibreTexts book as markdown file: {filename}"
300
  logger.info(success_msg)
301
+
302
  return success_msg
303
 
304
  except Exception as e: # pylint:disable=broad-exception-caught
305
  error_msg = f"Error saving markdown file: {str(e)}"
306
  logger.error(error_msg)
307
+
308
  return error_msg
309
+
310
+
311
+ class WikipediaFetcher:
312
+ """Gets and cleans up Wikipedia pages."""
313
+
314
+ def fetch(self, page_name):
315
+ """
316
+ Passed a Wikipedia page's URL fragment, like
317
+ 'Edward_Montagu,_1st_Earl_of_Sandwich', this will fetch the page's
318
+ main contents, tidy the HTML, strip out any elements we don't want
319
+ and return the final HTML string.
320
+
321
+ Returns a dict with two elements:
322
+ 'success' is either True or, if we couldn't fetch the page, False.
323
+ 'content' is the HTML if success==True, or else an error message.
324
+ """
325
+ result = self._get_html(page_name)
326
+
327
+ if result["success"]:
328
+ result["content"] = self._tidy_html(result["content"])
329
+
330
+ return result
331
+
332
+
333
+ def _get_html(self, page_name):
334
+ """
335
+ Passed the name of a Wikipedia page (eg, 'Samuel_Pepys'), it fetches
336
+ the HTML content (not the entire HTML page) and returns it.
337
+
338
+ Returns a dict with two elements:
339
+ 'success' is either True or, if we couldn't fetch the page, False.
340
+ 'content' is the HTML if success==True, or else an error message.
341
+ """
342
+ error_message = ""
343
+
344
+ url = f"https://en.wikipedia.org/wiki/{page_name}"
345
+
346
+ try:
347
+ response = requests.get(url, params={"action": "render"}, timeout=5)
348
+ except requests.exceptions.ConnectionError:
349
+ error_message = "Can't connect to domain."
350
+ except requests.exceptions.Timeout:
351
+ error_message = "Connection timed out."
352
+ except requests.exceptions.TooManyRedirects:
353
+ error_message = "Too many redirects."
354
+
355
+ try:
356
+ response.raise_for_status()
357
+ except requests.exceptions.HTTPError:
358
+ # 4xx or 5xx errors:
359
+ error_message = f"HTTP Error: {response.status_code}"
360
+ except NameError:
361
+ if error_message == "":
362
+ error_message = "Something unusual went wrong."
363
+
364
+ if error_message:
365
+ return {"success": False, "content": error_message}
366
+ else:
367
+ return {"success": True, "content": response.text}
368
+
369
+
370
+ def _tidy_html(self, html):
371
+ """
372
+ Passed the raw Wikipedia HTML, this returns valid HTML, with all
373
+ disallowed elements stripped out.
374
+ """
375
+ html = self._bleach_html(html)
376
+ html = self._strip_html(html)
377
+ return html
378
+
379
+
380
+ def _bleach_html(self, html):
381
+ """
382
+ Ensures we have valid HTML; no unclosed or mis-nested tags.
383
+ Removes any tags and attributes we don't want to let through.
384
+ Doesn't remove the contents of any disallowed tags.
385
+
386
+ Pass it an HTML string, it'll return the bleached HTML string.
387
+ """
388
+
389
+ # Pretty much most elements, but no forms or audio/video.
390
+ allowed_tags = {
391
+ "a",
392
+ "abbr",
393
+ "acronym",
394
+ "address",
395
+ "area",
396
+ "article",
397
+ "b",
398
+ "blockquote",
399
+ "br",
400
+ "caption",
401
+ "cite",
402
+ "code",
403
+ "col",
404
+ "colgroup",
405
+ "dd",
406
+ "del",
407
+ "dfn",
408
+ "div",
409
+ "dl",
410
+ "dt",
411
+ "em",
412
+ "figcaption",
413
+ "figure",
414
+ "footer",
415
+ "h1",
416
+ "h2",
417
+ "h3",
418
+ "h4",
419
+ "h5",
420
+ "h6",
421
+ "header",
422
+ "hgroup",
423
+ "hr",
424
+ "i",
425
+ "img",
426
+ "ins",
427
+ "kbd",
428
+ "li",
429
+ "map",
430
+ "nav",
431
+ "ol",
432
+ "p",
433
+ "pre",
434
+ "q",
435
+ "s",
436
+ "samp",
437
+ "section",
438
+ "small",
439
+ "span",
440
+ "strong",
441
+ "sub",
442
+ "sup",
443
+ "table",
444
+ "tbody",
445
+ "td",
446
+ "tfoot",
447
+ "th",
448
+ "thead",
449
+ "time",
450
+ "tr",
451
+ "ul",
452
+ "var",
453
+ # We allow script and style here, so we can close/un-mis-nest
454
+ # its tags, but then it's removed completely in _strip_html():
455
+ "script",
456
+ "style",
457
+ }
458
+
459
+ # These attributes will not be removed from any of the allowed tags.
460
+ allowed_attributes = {
461
+ "*": ["class", "id"],
462
+ "a": ["href", "title"],
463
+ "abbr": ["title"],
464
+ "acronym": ["title"],
465
+ "img": ["alt", "src", "srcset"],
466
+ # Ugh. Don't know why this page doesn't use .tright like others
467
+ # http://127.0.0.1:8000/encyclopedia/5040/
468
+ "table": ["align"],
469
+ "td": ["colspan", "rowspan", "style"],
470
+ "th": ["colspan", "rowspan", "scope"],
471
+ }
472
+
473
+ # These CSS properties are allowed within style attributes
474
+ # Added for the family tree on /encyclopedia/5825/
475
+ # Hopefully doesn't make anything else too hideous.
476
+ allowed_css_properties = [
477
+ "background",
478
+ "border",
479
+ "border-bottom",
480
+ "border-collapse",
481
+ "border-left",
482
+ "border-radius",
483
+ "border-right",
484
+ "border-spacing",
485
+ "border-top",
486
+ "height",
487
+ "padding",
488
+ "text-align",
489
+ "width",
490
+ ]
491
+
492
+ css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_css_properties)
493
+
494
+ a = bleach.clean(
495
+ html,
496
+ tags=allowed_tags,
497
+ attributes=allowed_attributes,
498
+ css_sanitizer=css_sanitizer,
499
+ strip=True,
500
+ )
501
+
502
+ return a
503
+
504
+
505
+ def _strip_html(self, html):
506
+ """
507
+ Takes out any tags, and their contents, that we don't want at all.
508
+ And adds custom classes to existing tags (so we can apply CSS styles
509
+ without having to multiply our CSS).
510
+
511
+ Pass it an HTML string, it returns the stripped HTML string.
512
+ """
513
+
514
+ # CSS selectors. Strip these and their contents.
515
+ selectors = [
516
+ "div.hatnote",
517
+ "div.navbar.mini", # Will also match div.mini.navbar
518
+ # Bottom of https://en.wikipedia.org/wiki/Charles_II_of_England :
519
+ "div.topicon",
520
+ "a.mw-headline-anchor",
521
+ "script",
522
+ "style",
523
+ ]
524
+
525
+ # Strip any element that has one of these classes.
526
+ classes = [
527
+ # "This article may be expanded with text translated from..."
528
+ # https://en.wikipedia.org/wiki/Afonso_VI_of_Portugal
529
+ "ambox-notice",
530
+ "magnify",
531
+ # eg audio on https://en.wikipedia.org/wiki/Bagpipes
532
+ "mediaContainer",
533
+ "navbox",
534
+ "noprint",
535
+ ]
536
+
537
+ # Any element has a class matching a key, it will have the classes
538
+ # in the value added.
539
+ add_classes = {
540
+ # Give these tables standard Bootstrap styles.
541
+ "infobox": ["table", "table-bordered"],
542
+ "ambox": ["table", "table-bordered"],
543
+ "wikitable": ["table", "table-bordered"],
544
+ }
545
+
546
+ soup = BeautifulSoup(html, "lxml")
547
+
548
+ for selector in selectors:
549
+ _ = [tag.decompose() for tag in soup.select(selector)]
550
+
551
+ for clss in classes:
552
+ _ = [tag.decompose() for tag in soup.find_all(attrs={"class": clss})]
553
+
554
+ for clss, new_classes in add_classes.items():
555
+ for tag in soup.find_all(attrs={"class": clss}):
556
+ tag["class"] = tag.get("class", []) + new_classes
557
+
558
+ # Depending on the HTML parser BeautifulSoup used, soup may have
559
+ # surrounding <html><body></body></html> or just <body></body> tags.
560
+ if soup.body:
561
+ soup = soup.body
562
+ elif soup.html:
563
+ soup = soup.html.body
564
+
565
+ # Put the content back into a string.
566
+ html = "".join(str(tag) for tag in soup.contents)
567
+
568
+ return html
functions/tools.py CHANGED
@@ -2,9 +2,7 @@
2
 
3
  import time
4
  import logging
5
- import bleach
6
  import requests
7
- from bleach.css_sanitizer import CSSSanitizer
8
  from smolagents import tool
9
  from googlesearch import search
10
  from bs4 import BeautifulSoup
@@ -17,7 +15,8 @@ from selenium.common.exceptions import TimeoutException, WebDriverException
17
  from functions.tool_helper_functions import (
18
  libretext_book_parser,
19
  libretext_chapter_parser,
20
- save_libretext_book_as_markdown
 
21
  )
22
 
23
  # Get logger for this module
@@ -119,7 +118,7 @@ def get_wikipedia_page(query: str) -> str:
119
  content = content.split(
120
  '<div class="mw-heading mw-heading2"><h2 id="Further_reading">Further reading</h2></div>'
121
  )[0]
122
-
123
  content = content.split(
124
  '<div class="mw-heading mw-heading2"><h2 id="References">References</h2></div>'
125
  )[0]
@@ -127,266 +126,6 @@ def get_wikipedia_page(query: str) -> str:
127
  return content
128
 
129
 
130
- class WikipediaFetcher:
131
- """Gets and cleans up Wikipedia pages."""
132
-
133
- def fetch(self, page_name):
134
- """
135
- Passed a Wikipedia page's URL fragment, like
136
- 'Edward_Montagu,_1st_Earl_of_Sandwich', this will fetch the page's
137
- main contents, tidy the HTML, strip out any elements we don't want
138
- and return the final HTML string.
139
-
140
- Returns a dict with two elements:
141
- 'success' is either True or, if we couldn't fetch the page, False.
142
- 'content' is the HTML if success==True, or else an error message.
143
- """
144
- result = self._get_html(page_name)
145
-
146
- if result["success"]:
147
- result["content"] = self._tidy_html(result["content"])
148
-
149
- return result
150
-
151
-
152
- def _get_html(self, page_name):
153
- """
154
- Passed the name of a Wikipedia page (eg, 'Samuel_Pepys'), it fetches
155
- the HTML content (not the entire HTML page) and returns it.
156
-
157
- Returns a dict with two elements:
158
- 'success' is either True or, if we couldn't fetch the page, False.
159
- 'content' is the HTML if success==True, or else an error message.
160
- """
161
- error_message = ""
162
-
163
- url = f"https://en.wikipedia.org/wiki/{page_name}"
164
-
165
- try:
166
- response = requests.get(url, params={"action": "render"}, timeout=5)
167
- except requests.exceptions.ConnectionError:
168
- error_message = "Can't connect to domain."
169
- except requests.exceptions.Timeout:
170
- error_message = "Connection timed out."
171
- except requests.exceptions.TooManyRedirects:
172
- error_message = "Too many redirects."
173
-
174
- try:
175
- response.raise_for_status()
176
- except requests.exceptions.HTTPError:
177
- # 4xx or 5xx errors:
178
- error_message = f"HTTP Error: {response.status_code}"
179
- except NameError:
180
- if error_message == "":
181
- error_message = "Something unusual went wrong."
182
-
183
- if error_message:
184
- return {"success": False, "content": error_message}
185
- else:
186
- return {"success": True, "content": response.text}
187
-
188
-
189
- def _tidy_html(self, html):
190
- """
191
- Passed the raw Wikipedia HTML, this returns valid HTML, with all
192
- disallowed elements stripped out.
193
- """
194
- html = self._bleach_html(html)
195
- html = self._strip_html(html)
196
- return html
197
-
198
-
199
- def _bleach_html(self, html):
200
- """
201
- Ensures we have valid HTML; no unclosed or mis-nested tags.
202
- Removes any tags and attributes we don't want to let through.
203
- Doesn't remove the contents of any disallowed tags.
204
-
205
- Pass it an HTML string, it'll return the bleached HTML string.
206
- """
207
-
208
- # Pretty much most elements, but no forms or audio/video.
209
- allowed_tags = {
210
- "a",
211
- "abbr",
212
- "acronym",
213
- "address",
214
- "area",
215
- "article",
216
- "b",
217
- "blockquote",
218
- "br",
219
- "caption",
220
- "cite",
221
- "code",
222
- "col",
223
- "colgroup",
224
- "dd",
225
- "del",
226
- "dfn",
227
- "div",
228
- "dl",
229
- "dt",
230
- "em",
231
- "figcaption",
232
- "figure",
233
- "footer",
234
- "h1",
235
- "h2",
236
- "h3",
237
- "h4",
238
- "h5",
239
- "h6",
240
- "header",
241
- "hgroup",
242
- "hr",
243
- "i",
244
- "img",
245
- "ins",
246
- "kbd",
247
- "li",
248
- "map",
249
- "nav",
250
- "ol",
251
- "p",
252
- "pre",
253
- "q",
254
- "s",
255
- "samp",
256
- "section",
257
- "small",
258
- "span",
259
- "strong",
260
- "sub",
261
- "sup",
262
- "table",
263
- "tbody",
264
- "td",
265
- "tfoot",
266
- "th",
267
- "thead",
268
- "time",
269
- "tr",
270
- "ul",
271
- "var",
272
- # We allow script and style here, so we can close/un-mis-nest
273
- # its tags, but then it's removed completely in _strip_html():
274
- "script",
275
- "style",
276
- }
277
-
278
- # These attributes will not be removed from any of the allowed tags.
279
- allowed_attributes = {
280
- "*": ["class", "id"],
281
- "a": ["href", "title"],
282
- "abbr": ["title"],
283
- "acronym": ["title"],
284
- "img": ["alt", "src", "srcset"],
285
- # Ugh. Don't know why this page doesn't use .tright like others
286
- # http://127.0.0.1:8000/encyclopedia/5040/
287
- "table": ["align"],
288
- "td": ["colspan", "rowspan", "style"],
289
- "th": ["colspan", "rowspan", "scope"],
290
- }
291
-
292
- # These CSS properties are allowed within style attributes
293
- # Added for the family tree on /encyclopedia/5825/
294
- # Hopefully doesn't make anything else too hideous.
295
- allowed_css_properties = [
296
- "background",
297
- "border",
298
- "border-bottom",
299
- "border-collapse",
300
- "border-left",
301
- "border-radius",
302
- "border-right",
303
- "border-spacing",
304
- "border-top",
305
- "height",
306
- "padding",
307
- "text-align",
308
- "width",
309
- ]
310
-
311
- css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_css_properties)
312
-
313
- a = bleach.clean(
314
- html,
315
- tags=allowed_tags,
316
- attributes=allowed_attributes,
317
- css_sanitizer=css_sanitizer,
318
- strip=True,
319
- )
320
-
321
- return a
322
-
323
-
324
- def _strip_html(self, html):
325
- """
326
- Takes out any tags, and their contents, that we don't want at all.
327
- And adds custom classes to existing tags (so we can apply CSS styles
328
- without having to multiply our CSS).
329
-
330
- Pass it an HTML string, it returns the stripped HTML string.
331
- """
332
-
333
- # CSS selectors. Strip these and their contents.
334
- selectors = [
335
- "div.hatnote",
336
- "div.navbar.mini", # Will also match div.mini.navbar
337
- # Bottom of https://en.wikipedia.org/wiki/Charles_II_of_England :
338
- "div.topicon",
339
- "a.mw-headline-anchor",
340
- "script",
341
- "style",
342
- ]
343
-
344
- # Strip any element that has one of these classes.
345
- classes = [
346
- # "This article may be expanded with text translated from..."
347
- # https://en.wikipedia.org/wiki/Afonso_VI_of_Portugal
348
- "ambox-notice",
349
- "magnify",
350
- # eg audio on https://en.wikipedia.org/wiki/Bagpipes
351
- "mediaContainer",
352
- "navbox",
353
- "noprint",
354
- ]
355
-
356
- # Any element has a class matching a key, it will have the classes
357
- # in the value added.
358
- add_classes = {
359
- # Give these tables standard Bootstrap styles.
360
- "infobox": ["table", "table-bordered"],
361
- "ambox": ["table", "table-bordered"],
362
- "wikitable": ["table", "table-bordered"],
363
- }
364
-
365
- soup = BeautifulSoup(html, "lxml")
366
-
367
- for selector in selectors:
368
- _ = [tag.decompose() for tag in soup.select(selector)]
369
-
370
- for clss in classes:
371
- _ = [tag.decompose() for tag in soup.find_all(attrs={"class": clss})]
372
-
373
- for clss, new_classes in add_classes.items():
374
- for tag in soup.find_all(attrs={"class": clss}):
375
- tag["class"] = tag.get("class", []) + new_classes
376
-
377
- # Depending on the HTML parser BeautifulSoup used, soup may have
378
- # surrounding <html><body></body></html> or just <body></body> tags.
379
- if soup.body:
380
- soup = soup.body
381
- elif soup.html:
382
- soup = soup.html.body
383
-
384
- # Put the content back into a string.
385
- html = "".join(str(tag) for tag in soup.contents)
386
-
387
- return html
388
-
389
-
390
  @tool
391
  def libretext_book_search(query: str) -> dict:
392
  """
 
2
 
3
  import time
4
  import logging
 
5
  import requests
 
6
  from smolagents import tool
7
  from googlesearch import search
8
  from bs4 import BeautifulSoup
 
15
  from functions.tool_helper_functions import (
16
  libretext_book_parser,
17
  libretext_chapter_parser,
18
+ save_libretext_book_as_markdown,
19
+ WikipediaFetcher
20
  )
21
 
22
  # Get logger for this module
 
118
  content = content.split(
119
  '<div class="mw-heading mw-heading2"><h2 id="Further_reading">Further reading</h2></div>'
120
  )[0]
121
+
122
  content = content.split(
123
  '<div class="mw-heading mw-heading2"><h2 id="References">References</h2></div>'
124
  )[0]
 
126
  return content
127
 
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  @tool
130
  def libretext_book_search(query: str) -> dict:
131
  """
results.csv CHANGED
@@ -2,14 +2,14 @@ Task ID,Question,Submitted Answer
2
  8e867cd7-cff9-4e6c-867a-ff5ddc2550be,How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.,3
3
  2d83110e-a098-4ebb-9987-066c06fa42d0,".rewsna eht sa ""tfel"" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI",right
4
  4fc2f1ae-8625-45b5-ab34-ad4433bc21f8,Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?,FunkMonk
5
- cabe07ed-9eca-40ea-8ead-410ef5e83f91,What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?,not found in materials
6
  3cef3a44-215e-4aed-8e3b-b1e3f08063b7,"I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:
7
 
8
  milk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts
9
 
10
- I need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.","broccoli, celery, green beans, lettuce, sweet potatoes, zucchini"
11
  305ac316-eef6-4446-960a-92d80d542f82,Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.,Wojciech
12
- 3f57289b-8c60-48be-bd80-01f8099ca449,How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?,519
13
  cf106601-ab4f-4af9-b045-5295fe67b37d,"What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.",CUB
14
  a0c07678-e491-4bbc-8f0b-07405144218f,"Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.","Yamasaki, Uehara"
15
- 5a0c1adf-205e-4841-a666-7c3ef95def9d,What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?,Claus Peter
 
2
  8e867cd7-cff9-4e6c-867a-ff5ddc2550be,How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.,3
3
  2d83110e-a098-4ebb-9987-066c06fa42d0,".rewsna eht sa ""tfel"" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI",right
4
  4fc2f1ae-8625-45b5-ab34-ad4433bc21f8,Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?,FunkMonk
5
+ cabe07ed-9eca-40ea-8ead-410ef5e83f91,What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?,No equine veterinarian mentioned in 1.E Exercises
6
  3cef3a44-215e-4aed-8e3b-b1e3f08063b7,"I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:
7
 
8
  milk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts
9
 
10
+ I need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.","bell pepper, broccoli, celery, corn, green beans, lettuce, sweet potatoes, zucchini"
11
  305ac316-eef6-4446-960a-92d80d542f82,Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.,Wojciech
12
+ 3f57289b-8c60-48be-bd80-01f8099ca449,How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?,525
13
  cf106601-ab4f-4af9-b045-5295fe67b37d,"What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.",CUB
14
  a0c07678-e491-4bbc-8f0b-07405144218f,"Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.","Yamasaki, Uehara"
15
+ 5a0c1adf-205e-4841-a666-7c3ef95def9d,What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?,Claus Peter Flor