Bhanu-Chander-ABB commited on
Commit
b86e7fd
·
1 Parent(s): 324000b

updated tools

Browse files
Files changed (2) hide show
  1. app.py +154 -214
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,22 +1,15 @@
1
  import os
2
  import gradio as gr
3
- import requests
4
- import tempfile
5
- import mimetypes
6
- import base64
7
- import json
8
  import pandas as pd
9
- import datetime
10
  from langchain.tools import tool
11
  from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
12
  from langchain.agents import initialize_agent, AgentType
13
  from bs4 import BeautifulSoup
14
- import base64
15
  from langchain_openai import ChatOpenAI
16
- import fitz
 
17
  import yt_dlp
18
- import re
19
- import subprocess
20
  from PIL import Image
21
  from transformers import pipeline
22
 
@@ -87,20 +80,6 @@ def current_events_news_search_tool(query: str) -> str:
87
  # tool.name is set to the function name (e.g., `search_tool`), and
88
  # tool.description is set to the docstring of the function (the triple-quoted string right under def ...) (e.g., "Answer general knowledge or current events queries using DuckDuckGo.").
89
 
90
- # --- TOOL 2: Weather Tool (OpenWeatherMap) ---
91
- @tool
92
- def get_weather(city: str) -> str:
93
- """Get current temperature in Celsius for a city."""
94
- import os
95
- api_key = os.environ.get("WEATHER_API_KEY")
96
- url = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid={WEATHER_API_KEY}&units=metric"
97
- try:
98
- resp = requests.get(url, timeout=120)
99
- resp.raise_for_status()
100
- data = resp.json()
101
- return str(round(data["main"]["temp"]))
102
- except Exception:
103
- return "error"
104
 
105
  # --- TOOL 3: Calculator Tool ---
106
  @tool
@@ -115,45 +94,6 @@ def calculator(expression: str) -> str:
115
  except Exception:
116
  return "error"
117
 
118
- # --- TOOL 4: Unit Conversion Tool ---
119
- @tool
120
- def convert_units(args: str) -> str:
121
- """
122
- Convert between metric and imperial units (length, mass, temperature).
123
- Input format: '<value> <from_unit> to <to_unit>', e.g. '10 meters to feet'
124
- """
125
- try:
126
- parts = args.lower().split()
127
- value = float(parts[0])
128
- from_unit = parts[1]
129
- to_unit = parts[3]
130
- conversions = {
131
- ("meters", "feet"): lambda v: v * 3.28084,
132
- ("feet", "meters"): lambda v: v / 3.28084,
133
- ("kg", "lb"): lambda v: v * 2.20462,
134
- ("lb", "kg"): lambda v: v / 2.20462,
135
- ("celsius", "fahrenheit"): lambda v: v * 9/5 + 32,
136
- ("fahrenheit", "celsius"): lambda v: (v - 32) * 5/9,
137
- }
138
- func = conversions.get((from_unit, to_unit))
139
- if func:
140
- return str(round(func(value), 2))
141
- return "error"
142
- except Exception:
143
- return "error"
144
-
145
- # --- TOOL 5: Date & Time Tool ---
146
- @tool
147
- def get_time(input: str) -> str:
148
- """Get current UTC time as HH:MM."""
149
- return datetime.datetime.utc().strftime("%H:%M")
150
-
151
- @tool
152
- def get_date(input: str) -> str:
153
- """Get current date as YYYY-MM-DD."""
154
- return datetime.datetime.utc().strftime("%Y-%m-%d")
155
-
156
-
157
  # --- TOOL 6: Wikipedia Summary Tool ---
158
  @tool
159
  def wikipedia_and_generalknowledge_search(query: str) -> str:
@@ -195,38 +135,6 @@ def wikipedia_and_generalknowledge_search(query: str) -> str:
195
  except Exception as e:
196
  return f"error: {e}"
197
 
198
- # --- TOOL 7: Dictionary Tool ---
199
- @tool
200
- def dictionary_lookup(word: str) -> str:
201
- """Get the definition of an English word using dictionary."""
202
- url = f"https://api.dictionaryapi.dev/api/v2/entries/en/{word}"
203
- try:
204
- resp = requests.get(url, timeout=120)
205
- resp.raise_for_status()
206
- data = resp.json()
207
- return data[0]["meanings"][0]["definitions"][0]["definition"]
208
- except Exception:
209
- return "error"
210
-
211
- # --- TOOL 8: Currency Conversion Tool ---
212
- @tool
213
- def currency_convert(args: str) -> str:
214
- """
215
- Convert an amount from one currency to another.
216
- Input format: '<amount> <from_currency> to <to_currency>', e.g. '100 USD to EUR'
217
- """
218
- try:
219
- parts = args.upper().split()
220
- amount = float(parts[0])
221
- from_currency = parts[1]
222
- to_currency = parts[3]
223
- url = f"https://api.exchangerate.host/convert?from={from_currency}&to={to_currency}&amount={amount}"
224
- resp = requests.get(url, timeout=120)
225
- resp.raise_for_status()
226
- data = resp.json()
227
- return str(round(data["result"], 2))
228
- except Exception:
229
- return "error"
230
 
231
  # --- TOOL 9: Image Captioning Tool ---
232
  @tool
@@ -246,33 +154,50 @@ def image_caption(image_url: str) -> str:
246
  # --- TOOL 10: Optical Character Recognition (OCR) Tool ---
247
  @tool
248
  def ocr_image(image_url: str) -> str:
249
- """Extract text from an image given its URL."""
250
- api_url = "https://api-inference.huggingface.co/models/impira/layoutlm-document-qa"
251
- headers = {"Authorization": f"Bearer {HF_ACCESS_KEY}"}
252
- payload = {"inputs": {"image": image_url, "question": "What text is in the image?"}}
 
 
 
 
 
 
 
 
253
  try:
254
- resp = requests.post(api_url, headers=headers, json=payload, timeout=120)
255
  resp.raise_for_status()
256
  data = resp.json()
257
- return data.get("answer", "no_text_found")
258
- except Exception:
259
- return "error"
260
 
261
  # --- TOOL 11: Image Classification Tool ---
262
  @tool
263
- def classify_image(image_url: str) -> str:
264
- """Classify the main object or scene in an image given its URL."""
265
- api_url = "https://api-inference.huggingface.co/models/google/vit-base-patch16-224"
 
 
 
 
 
266
  headers = {"Authorization": f"Bearer {HF_ACCESS_KEY}"}
267
- payload = {"inputs": image_url}
268
  try:
269
- resp = requests.post(api_url, headers=headers, json=payload, timeout=120)
270
- resp.raise_for_status()
271
- data = resp.json()
272
- return data[0]["label"] if isinstance(data, list) else data.get("label", "no_label")
273
- except Exception:
274
- return "error"
275
 
 
 
 
 
 
 
 
276
  # --- TOOL 12: Web Scraping Tool ---
277
  @tool
278
  def URL_scrape_tool(url: str) -> str:
@@ -342,34 +267,69 @@ def python_executor(code: str) -> str:
342
  @tool
343
  def python_excel_audio_video_attached_file_tool(input_str: str) -> str:
344
  """
345
- Processes an input attachment (audio, image, video, Excel, or Python .py file) and returns extracted info (text, encoded information, metadata, etc.) to be used by LLM.
346
- This function accepts a JSON string 'input_str' with keys: 'file_bytes' (base64), and 'filename'. So input the file and filename as json strings.
 
 
 
 
 
 
 
 
 
 
347
  """
348
- import pandas as pd
349
 
 
350
  try:
351
- # Extract only the JSON object from the input string
352
  match = re.search(r'(\{.*\})', input_str, re.DOTALL)
353
- if match:
354
- input_str = match.group(1)
355
- data = json.loads(input_str)
356
- file_bytes = base64.b64decode(data["file_bytes"])
357
- filename = data["filename"]
358
  except Exception as e:
359
- return f"error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
 
361
  # Detect file type
362
  mime_type, _ = mimetypes.guess_type(filename)
 
363
  if not mime_type:
364
- # Fallback for .py and .csv files
365
- if filename.lower().endswith(".py"):
366
- mime_type = "text/x-python"
367
- elif filename.lower().endswith(".csv"):
368
- mime_type = "text/csv"
369
- elif filename.lower().endswith((".xls", ".xlsx")):
370
- mime_type = "application/vnd.ms-excel"
371
- else:
372
- return "error: Could not determine file type. Skip the file"
373
 
374
  # Handle audio files
375
  if mime_type.startswith("audio"):
@@ -390,7 +350,23 @@ def python_excel_audio_video_attached_file_tool(input_str: str) -> str:
390
 
391
  # Handle image files
392
  elif mime_type.startswith("image"):
393
- image_b64 = base64.b64encode(file_bytes).decode()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  return f"Attached image (base64): {image_b64}"
395
 
396
  # Handle video files (extract audio, then transcribe)
@@ -402,7 +378,7 @@ def python_excel_audio_video_attached_file_tool(input_str: str) -> str:
402
  video_path = tmp_video.name
403
 
404
  audio_path = video_path + ".wav"
405
- import subprocess
406
  subprocess.run([
407
  "ffmpeg", "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_path
408
  ], check=True)
@@ -464,70 +440,18 @@ def python_excel_audio_video_attached_file_tool(input_str: str) -> str:
464
 
465
  # --- TOOL 16: Research Paper Info Extraction Tool ---
466
  @tool
467
- def search_and_extract_research_paper_info(query: str) -> str:
468
  """
469
- Searches for research and online papers/journals using the Semantic Scholar API.
470
- Input: A search query (e.g., topic, paper title, or keywords).
471
- Output: A summary with title, authors, abstract, and a longer excerpt from the main sections of the top result.
472
  """
473
- try:
474
- # Search for papers using Semantic Scholar API
475
- search_url = "https://api.semanticscholar.org/graph/v1/paper/search"
476
- params = {
477
- "query": query,
478
- "limit": 1,
479
- "fields": "title,authors,abstract,url,openAccessPdf"
480
- }
481
- resp = requests.get(search_url, params=params, timeout=120)
482
- resp.raise_for_status()
483
- data = resp.json()
484
- if not data.get("data"):
485
- return "No papers found for this query."
486
- paper = data["data"][0]
487
- title = paper.get("title", "")
488
- authors = ", ".join([a["name"] for a in paper.get("authors", [])])
489
- abstract = paper.get("abstract", "")
490
- paper_url = paper.get("url", "")
491
- pdf_url = paper.get("openAccessPdf", {}).get("url")
492
- if not pdf_url:
493
- return f"Paper found: {title}\nAuthors: {authors}\nAbstract: {abstract}\nURL: {paper_url}\n(No open access PDF available.)"
494
-
495
- # Download the PDF
496
- pdf_resp = requests.get(pdf_url, timeout=120)
497
- pdf_resp.raise_for_status()
498
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
499
- tmp_pdf.write(pdf_resp.content)
500
- tmp_pdf.flush()
501
- pdf_path = tmp_pdf.name
502
-
503
- # Extract text from PDF
504
- doc = fitz.open(pdf_path)
505
- full_text = ""
506
- for page in doc:
507
- full_text += page.get_text("text") + "\n"
508
-
509
- # Extract a longer excerpt from the main sections (e.g., Introduction + first 2000 chars)
510
- lines = full_text.splitlines()
511
- main_sections = ""
512
- in_main = False
513
- for line in lines:
514
- if "introduction" in line.lower():
515
- in_main = True
516
- if in_main:
517
- main_sections += line.strip() + " "
518
- if len(main_sections) > 2000:
519
- break
520
-
521
- summary = (
522
- f"Title: {title}\n"
523
- f"Authors: {authors}\n"
524
- f"Abstract: {abstract}\n"
525
- f"URL: {paper_url}\n"
526
- f"Main Sections (excerpt): {main_sections.strip() if main_sections else full_text[:2000]}"
527
- )
528
- return summary if summary.strip() else "No information extracted."
529
- except Exception as e:
530
- return f"error: {e}"
531
 
532
 
533
  # --- TOOL 17:Tool for sports, awards, competitions etc. ---
@@ -599,6 +523,27 @@ def sports_awards_historicalfacts_tool(query: str) -> str:
599
  except Exception as e:
600
  return f"error: {e}"
601
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
602
  # --- TOOL 18: YouTube Transcript Tool ---
603
  @tool
604
  def video_url_to_transcript_tool(media_url: str) -> str:
@@ -749,24 +694,27 @@ tools_list = [
749
  python_excel_audio_video_attached_file_tool,
750
  wikipedia_and_generalknowledge_search,
751
  # sports_awards_historicalfacts_tool,
752
- search_and_extract_research_paper_info,
753
  python_executor,
754
  # get_weather,
755
  # calculator,
756
  # convert_units,
757
  # get_time,
758
  # get_date,
759
- dictionary_lookup,
760
  # currency_convert,
761
  # image_caption,
762
  # ocr_image,
763
  # classify_image,
764
  current_events_news_search_tool,
 
 
765
  URL_scrape_tool,
766
  # audio_url_to_text,
767
  # sports_awards_historicalfacts_tool,
768
- video_url_to_transcript_tool,
769
- # max_object_in_video,
 
770
  ]
771
 
772
  tool_descriptions = "\n".join(f"- {tool.name}: {tool.description}" for tool in tools_list)
@@ -777,7 +725,7 @@ tool_descriptions = "\n".join(f"- {tool.name}: {tool.description}" for tool in t
777
  # --- System Prompt for the Agent ---
778
 
779
  system_prompt = f"""
780
- You are a general AI assistant, who can answer about general knowledge, historical facts, and also can analyze audios, images and videos. You should think through the input question step-by-step and use tools if needed.
781
 
782
  Use this reasoning format repeatedly:
783
  Thought: (what you think is happening or what you want to do next)
@@ -785,26 +733,17 @@ Action: (the tool to use, if needed)
785
  Action Input: (input to the tool)
786
  Observation: (result of the tool call)
787
 
788
- Repeat this process as needed. ONLY AFTER finishing your reasoning and tool use, provide: YOUR FINAL ANSWER
789
- YOUR FINAL ANSWER should be just a number, string, or comma-separated list with no explanations, no thoughts, just the answer
790
 
791
  You also have access to a set of tools, which you can use to answer the question. The available tools are:
792
  {tool_descriptions}
793
 
794
  If the question is related to sports, awards, historical facts or similar topic that can be answered from wikipedia, you should use the 'wikipedia_and_generalknowledge_search'.
795
- If the question is based on current events or news kind, then you can utilize the tool 'current_events_news_search_tool' to fetch relevant page information and answer from it.
796
  If the tool returns a long text, table, or list, extract only the most relevant information/paragraphs or data from which you can derive the answer, and return that as your final answer.
797
  You must not use multiple tools in a single call. Don't hallucinate.
798
 
799
- Instructions to follow for YOUR FINAL ANSWER:
800
- Guidance:
801
- - Prefer reasoning and tool use before jumping to final answer.
802
- - Only use one tool per iteration.
803
- - If the question relates to historical data or general knowledge, use 'wikipedia_and_generalknowledge_search'.
804
- - If it relates to recent news, use 'current_events_news_search_tool'.
805
- - If tool output is long, extract only what is relevant and use it to answer the question.
806
- - Never hallucinate. If you cannot answer even after 12 iterations, return YOUR FINAL ANSWER as: no_answer
807
- - Don't include explanations, thoughts, or tool calls in YOUR FINAL ANSWER.
808
 
809
  **Examples:**
810
  Q: Which country had the least number of athletes at the 1928 Summer Olympics?
@@ -818,8 +757,9 @@ Q: What are the top 3 programming languages?
818
  Thought: This is common knowledge.
819
  YOUR FINAL ANSWER: Python, JavaScript, Java
820
 
821
- If after 12 iterations also a tool usage is not useful then try to answer directly based on your knowledge without any hallucination. If you cannot answer then just say "no_answer" as YOUR FINAL ANSWER.
822
  """
 
823
  # If your final answer is something like 'there were 5 studio albums published between 2000 and 2009' then modify YOUR FINAL ANSWER as: '5'
824
  # If your final answer is something like 'b, e' then YOUR FINAL ANSWER be: 'b, e'
825
  # For each question, follow this format:
 
1
  import os
2
  import gradio as gr
3
+ import requests, tempfile, base64, json, datetime, re, subprocess, mimetypes, fitz
 
 
 
 
4
  import pandas as pd
 
5
  from langchain.tools import tool
6
  from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
7
  from langchain.agents import initialize_agent, AgentType
8
  from bs4 import BeautifulSoup
 
9
  from langchain_openai import ChatOpenAI
10
+ from langchain_community.utilities import ArxivAPIWrapper
11
+ from youtube_transcript_api import YouTubeTranscriptApi
12
  import yt_dlp
 
 
13
  from PIL import Image
14
  from transformers import pipeline
15
 
 
80
  # tool.name is set to the function name (e.g., `search_tool`), and
81
  # tool.description is set to the docstring of the function (the triple-quoted string right under def ...) (e.g., "Answer general knowledge or current events queries using DuckDuckGo.").
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  # --- TOOL 3: Calculator Tool ---
85
  @tool
 
94
  except Exception:
95
  return "error"
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  # --- TOOL 6: Wikipedia Summary Tool ---
98
  @tool
99
  def wikipedia_and_generalknowledge_search(query: str) -> str:
 
135
  except Exception as e:
136
  return f"error: {e}"
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  # --- TOOL 9: Image Captioning Tool ---
140
  @tool
 
154
  # --- TOOL 10: Optical Character Recognition (OCR) Tool ---
155
  @tool
156
  def ocr_image(image_url: str) -> str:
157
+ """
158
+ Extracts all readable text from an image using HuggingFace TrOCR (microsoft/trocr-base-stage1).
159
+ Input: URL to an image (e.g., PNG or JPG).
160
+ Output: Recognized text string.
161
+ """
162
+ api_url = "https://api-inference.huggingface.co/models/microsoft/trocr-base-stage1"
163
+ headers = {
164
+ "Authorization": f"Bearer {HF_ACCESS_KEY}",
165
+ "Content-Type": "application/json"
166
+ }
167
+ payload = {"inputs": image_url}
168
+
169
  try:
170
+ resp = requests.post(api_url, headers=headers, json=payload, timeout=60)
171
  resp.raise_for_status()
172
  data = resp.json()
173
+ return data[0]["generated_text"]
174
+ except Exception as e:
175
+ return f"OCR error: {e}"
176
 
177
  # --- TOOL 11: Image Classification Tool ---
178
  @tool
179
+ def clasify_describe_image(image_url: str) -> str:
180
+ """
181
+ Generates a caption describing the contents of an image using HuggingFace (ViT-GPT2).
182
+ Use this tool to identify the main subject of an image so that an LLM can use it to answer further.
183
+ Input: image URL
184
+ Output: caption like 'A golden retriever lying on a couch.'
185
+ """
186
+ api_url = "https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning"
187
  headers = {"Authorization": f"Bearer {HF_ACCESS_KEY}"}
188
+
189
  try:
190
+ img_resp = requests.get(image_url, timeout=120)
191
+ img_resp.raise_for_status()
192
+ image_bytes = img_resp.content
 
 
 
193
 
194
+ response = requests.post(api_url, headers=headers, data=image_bytes, timeout=60)
195
+ response.raise_for_status()
196
+ result = response.json()
197
+ return result[0]["generated_text"] if isinstance(result, list) else "no_caption"
198
+ except Exception as e:
199
+ return f"caption error: {e}"
200
+
201
  # --- TOOL 12: Web Scraping Tool ---
202
  @tool
203
  def URL_scrape_tool(url: str) -> str:
 
267
  @tool
268
  def python_excel_audio_video_attached_file_tool(input_str: str) -> str:
269
  """
270
+ Accepts a JSON string with one of:
271
+ 'file_bytes' : base-64–encoded bytes (existing behaviour)
272
+ • 'file_path' : local absolute/relative path to a file
273
+ • 'file_url' : downloadable URL (e.g. Hugging Face dataset link)
274
+
275
+ Keys (at least one bytes / path / url required):
276
+ • filename (str) – original name with extension
277
+ • file_bytes (str, base-64) – optional
278
+ • file_path (str) – optional
279
+ • file_url (str) – optional
280
+
281
+ Returns: textual summary / preview ready for the LLM.
282
  """
 
283
 
284
+ # ---------- 1. Parse JSON ------------------------------------------------
285
  try:
286
+ # Robustly pull out the first {...} block even if extra tokens are around it
287
  match = re.search(r'(\{.*\})', input_str, re.DOTALL)
288
+ payload = json.loads(match.group(1) if match else input_str)
 
 
 
 
289
  except Exception as e:
290
+ return f"error: Could not parse JSON → {e}"
291
+
292
+ filename = payload.get("filename")
293
+ b64_data = payload.get("file_bytes")
294
+ file_path = payload.get("file_path")
295
+ file_url = payload.get("file_url")
296
+
297
+ if not filename:
298
+ return "error: 'filename' is required."
299
+
300
+ # ---------- 2. Acquire raw bytes ----------------------------------------
301
+ try:
302
+ if b64_data: # inline bytes
303
+ file_bytes = base64.b64decode(b64_data)
304
+
305
+ elif file_path and os.path.exists(file_path): # local path
306
+ with open(file_path, "rb") as f:
307
+ file_bytes = f.read()
308
+
309
+ elif file_url: # remote URL
310
+ # stream to avoid loading huge files into memory at once
311
+ r = requests.get(file_url, timeout=60, stream=True)
312
+ r.raise_for_status()
313
+ file_bytes = r.content
314
+
315
+ else:
316
+ return "error: Provide 'file_bytes', 'file_path', or 'file_url'."
317
+ except Exception as e:
318
+ return f"error: Could not load file → {e}"
319
 
320
  # Detect file type
321
  mime_type, _ = mimetypes.guess_type(filename)
322
+ # fallback for common extensions if guess_type fails
323
  if not mime_type:
324
+ ext = filename.lower()
325
+ mime_type = (
326
+ "text/x-python" if ext.endswith(".py") else
327
+ "text/csv" if ext.endswith(".csv") else
328
+ "application/vnd.ms-excel" if ext.endswith((".xls", ".xlsx")) else
329
+ None
330
+ )
331
+ if not mime_type:
332
+ return "error: Could not determine file type. Skip the file."
333
 
334
  # Handle audio files
335
  if mime_type.startswith("audio"):
 
350
 
351
  # Handle image files
352
  elif mime_type.startswith("image"):
353
+ # image_b64 = base64.b64encode(file_bytes).decode()
354
+ api_url = "https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning"
355
+ headers = {"Authorization": f"Bearer {os.getenv('HF_ACCESS_KEY', '')}"}
356
+ try:
357
+ resp = requests.post(api_url, headers=headers, data=file_bytes, timeout=60)
358
+ resp.raise_for_status()
359
+ result = resp.json()
360
+ if isinstance(result, list) and result and "generated_text" in result[0]:
361
+ caption = result[0]["generated_text"]
362
+ else:
363
+ caption = "no_caption"
364
+
365
+ # Optionally also include base-64 so the LLM can refer to the raw image
366
+ b64 = base64.b64encode(file_bytes).decode()
367
+ return f"Image caption: {caption}\nAttached image (base64): {b64}"
368
+ except Exception as e:
369
+ return f"caption error: {e}"
370
  return f"Attached image (base64): {image_b64}"
371
 
372
  # Handle video files (extract audio, then transcribe)
 
378
  video_path = tmp_video.name
379
 
380
  audio_path = video_path + ".wav"
381
+ # import subprocess
382
  subprocess.run([
383
  "ffmpeg", "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_path
384
  ], check=True)
 
440
 
441
  # --- TOOL 16: Research Paper Info Extraction Tool ---
442
  @tool
443
+ def research_paper_search(query: str) -> str:
444
  """
445
+ Search arXiv for journals/research/technical papers matching a query.
446
+ Returns top results including title, authors, abstract, and PDF link.
 
447
  """
448
+ wrapper = ArxivAPIWrapper(
449
+ top_k_results=2, # how many papers to return
450
+ doc_content_chars_max=2000 # max chars of abstract to show
451
+ )
452
+
453
+ results_text = wrapper.run(query)
454
+ return results_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
 
456
 
457
  # --- TOOL 17:Tool for sports, awards, competitions etc. ---
 
523
  except Exception as e:
524
  return f"error: {e}"
525
 
526
+ # --- TOOL 17: YouTube Transcript Tool ---
527
+ @tool
528
+ def youtube_transcript_tool(video_url: str) -> str:
529
+ """
530
+ Get transcript (if available) for a YouTube video without downloading audio.
531
+ Works only if subtitles or auto-captions exist.
532
+ """
533
+ try:
534
+ # Extract video ID
535
+ match = re.search(r"(?:v=|youtu\.be/)([a-zA-Z0-9_-]{11})", video_url)
536
+ if not match:
537
+ return "Invalid YouTube URL."
538
+ video_id = match.group(1)
539
+
540
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
541
+ full_text = " ".join([chunk['text'] for chunk in transcript])
542
+ return full_text[:5000] # truncate to keep LLM input manageable
543
+ except Exception as e:
544
+ return f"Transcript error: {e}"
545
+
546
+
547
  # --- TOOL 18: YouTube Transcript Tool ---
548
  @tool
549
  def video_url_to_transcript_tool(media_url: str) -> str:
 
694
  python_excel_audio_video_attached_file_tool,
695
  wikipedia_and_generalknowledge_search,
696
  # sports_awards_historicalfacts_tool,
697
+ research_paper_search,
698
  python_executor,
699
  # get_weather,
700
  # calculator,
701
  # convert_units,
702
  # get_time,
703
  # get_date,
704
+ # dictionary_lookup,
705
  # currency_convert,
706
  # image_caption,
707
  # ocr_image,
708
  # classify_image,
709
  current_events_news_search_tool,
710
+ ocr_image,
711
+ clasify_describe_image,
712
  URL_scrape_tool,
713
  # audio_url_to_text,
714
  # sports_awards_historicalfacts_tool,
715
+ youtube_transcript_tool,
716
+ # video_url_to_transcript_tool,
717
+ max_object_in_video,
718
  ]
719
 
720
  tool_descriptions = "\n".join(f"- {tool.name}: {tool.description}" for tool in tools_list)
 
725
  # --- System Prompt for the Agent ---
726
 
727
  system_prompt = f"""
728
+ You are a general AI assistant, who can answer about general knowledge, historical facts, and also can analyze audios, images, and videos. You should think through the input question step-by-step and use tools if needed.
729
 
730
  Use this reasoning format repeatedly:
731
  Thought: (what you think is happening or what you want to do next)
 
733
  Action Input: (input to the tool)
734
  Observation: (result of the tool call)
735
 
736
+ Repeat this process as needed. ONLY AFTER finishing your reasoning and/or tool use, provide: YOUR FINAL ANSWER
737
+ YOUR FINAL ANSWER should be just a number, string, or comma-separated list with no explanations and no thoughts, but just the answer
738
 
739
  You also have access to a set of tools, which you can use to answer the question. The available tools are:
740
  {tool_descriptions}
741
 
742
  If the question is related to sports, awards, historical facts or similar topic that can be answered from wikipedia, you should use the 'wikipedia_and_generalknowledge_search'.
743
+ If the question is about current events or news or similar current affairs category, you can utilize the tool 'current_events_news_search_tool' to fetch relevant page information and answer from it.
744
  If the tool returns a long text, table, or list, extract only the most relevant information/paragraphs or data from which you can derive the answer, and return that as your final answer.
745
  You must not use multiple tools in a single call. Don't hallucinate.
746
 
 
 
 
 
 
 
 
 
 
747
 
748
  **Examples:**
749
  Q: Which country had the least number of athletes at the 1928 Summer Olympics?
 
757
  Thought: This is common knowledge.
758
  YOUR FINAL ANSWER: Python, JavaScript, Java
759
 
760
+ If even after 12 iterations, a tool usage is not useful then try to answer directly based on your knowledge without any hallucination. If you cannot answer then just say "no_answer" as YOUR FINAL ANSWER.
761
  """
762
+
763
  # If your final answer is something like 'there were 5 studio albums published between 2000 and 2009' then modify YOUR FINAL ANSWER as: '5'
764
  # If your final answer is something like 'b, e' then YOUR FINAL ANSWER be: 'b, e'
765
  # For each question, follow this format:
requirements.txt CHANGED
@@ -13,4 +13,6 @@ mimetype
13
  PyMuPDF
14
  yt_dlp
15
  pandas
16
- pillow
 
 
 
13
  PyMuPDF
14
  yt_dlp
15
  pandas
16
+ pillow
17
+ arxiv
18
+ youtube-transcript-api