ihanif commited on
Commit
1aeb176
Β·
1 Parent(s): 81917a3

feat: add agent feature.

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. agent.py +299 -0
  3. app.py +15 -4
  4. requirements.txt +16 -1
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Template Final Assignment
3
  emoji: πŸ•΅πŸ»β€β™‚οΈ
4
  colorFrom: indigo
5
  colorTo: indigo
 
1
  ---
2
+ title: Agent Final Assignment
3
  emoji: πŸ•΅πŸ»β€β™‚οΈ
4
  colorFrom: indigo
5
  colorTo: indigo
agent.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import google.generativeai as genai
3
+ from langchain_core.messages import HumanMessage, SystemMessage
4
+ from langchain_google_genai import ChatGoogleGenerativeAI
5
+ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper
6
+ from langchain.agents import Tool, AgentExecutor, ConversationalAgent, initialize_agent
7
+ from langchain.memory import ConversationBufferMemory
8
+ from langchain.tools import Tool
9
+ from google.generativeai.types import HarmCategory, HarmBlockThreshold
10
+ from PIL import Image
11
+
12
+ import os
13
+ import tempfile
14
+ import time
15
+ import re
16
+ import json
17
+ from typing import List, Optional, Dict, Any
18
+ from urllib.parse import urlparse
19
+ import requests
20
+ import yt_dlp
21
+ from bs4 import BeautifulSoup
22
+ from difflib import SequenceMatcher
23
+
24
+ class Agent:
25
+ def __init__(self, model_name:str ="gemini", api_key:str ="BasicAgent"):
26
+ self.model = model_name
27
+ self.api_key = api_key
28
+ # if model_name starts with "gemini", use the gemini agent
29
+ self.tools = [
30
+ Tool(
31
+ name='web_search',
32
+ func=self._web_search,
33
+ description="A tool to search the web for information."
34
+ ),
35
+ Tool(
36
+ name='analyze_video',
37
+ func=self._analyze_video,
38
+ description="A tool to analyze video content."
39
+ ),
40
+ Tool(
41
+ name='analyze_image',
42
+ func=self._analyze_image,
43
+ description="A tool to analyze image content."
44
+ ),
45
+ Tool(
46
+ name='analyze_list',
47
+ func=self._analyze_list,
48
+ description="A tool to analyze a list."
49
+ ),
50
+ Tool(
51
+ name='analyze_table',
52
+ func=self._analyze_table,
53
+ description="A tool to analyze a table."
54
+ ),
55
+ Tool(
56
+ name='analyze_text',
57
+ func=self._analyze_text,
58
+ description="A tool to analyze text content."
59
+ ),
60
+ Tool(
61
+ name='analyze_url',
62
+ func=self._analyze_url,
63
+ description="A tool to analyze a URL."
64
+ ),
65
+ Tool(
66
+ name='wikipedia_search',
67
+ func=WikipediaAPIWrapper().run,
68
+ description="A tool to search Wikipedia."
69
+ ),
70
+ ]
71
+ self.memory = ConversationBufferMemory(
72
+ memory_key="chat_history",
73
+ return_messages=True,
74
+ output_key="output",
75
+ input_key="input"
76
+ )
77
+ self.llm = self._initialize_model(model_name, api_key)
78
+ self.agent = initialize_agent()
79
+
80
+ def _initialize_model(self, model_name:str, api_key:str):
81
+ if model_name.startswith("gemini"):
82
+ return self._initialize_gemini(model_name)
83
+ else:
84
+ raise ValueError(f"Unsupported model name: {model_name}. Please use a valid model name.")
85
+
86
+ def _initialize_gemini(self, model_name:str = "gemini-2.0-flash"):
87
+ generation_config = {
88
+ "temperature": 0.0,
89
+ "max_output_tokens": 2000,
90
+ "candidate_count": 1,
91
+ }
92
+
93
+ safety_settings = {
94
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
95
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
96
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
97
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
98
+ }
99
+
100
+ return ChatGoogleGenerativeAI(
101
+ model=model_name,
102
+ google_api_key=self.api_key,
103
+ temperature=0,
104
+ max_output_tokens=2000,
105
+ generation_config=generation_config,
106
+ safety_settings=safety_settings,
107
+ system_message=SystemMessage(content=(
108
+ "You are a precise AI assistant that helps users find information and analyze content. "
109
+ "You can directly understand and analyze YouTube videos, images, and other content. "
110
+ "When analyzing videos, focus on relevant details like dialogue, text, and key visual elements. "
111
+ "For lists, tables, and structured data, ensure proper formatting and organization. "
112
+ "If you need additional context, clearly explain what is needed."
113
+ ))
114
+ )
115
+
116
+ def initialize_agent(self):
117
+ PREAMBLE = (
118
+ "You are a helpful assistant. You can use the tools provided to search the web, analyze videos, images, lists, and tables. "
119
+ "Please provide clear and concise answers."
120
+ "TOOLS: You have access to the following tools: "
121
+ )
122
+ FORMAT_PROMPT = (
123
+ "To use a tool, follow this format: "
124
+ "Though: Do I need to use a tool? "
125
+ "Action: the action to take, should be one of the {{tool_names}} "
126
+ "Action Input: the input to the action "
127
+ "Observation: the result of the action "
128
+ "When you have the final answer or if you don't need to use a tool, you MUST use the format: "
129
+ "Thought: Do I need to use a tool? "
130
+ "Final Answer: {your final response} "
131
+ ""
132
+ )
133
+ POSTFIX = (
134
+ "Previous conersation: {chat_history} "
135
+ "{chat_history} "
136
+ "New question: {input} "
137
+ "{agent_scratchpad} "
138
+ )
139
+
140
+ agent = ConversationalAgent.from_agent_and_tools(
141
+ llm=self.llm,
142
+ tools=self.tools,
143
+ prefix=PREAMBLE,
144
+ suffix=POSTFIX,
145
+ format_instructions=FORMAT_PROMPT,
146
+ handle_tool_errors=True,
147
+ input_variables=["input", "chat_history", "agent_scratchpad", "tool_names"],
148
+ )
149
+ return AgentExecutor.from_agent_and_tools(
150
+ agent=agent,
151
+ tools=self.tools,
152
+ memory=self.memory,
153
+ verbose=True,
154
+ handle_parsing_errors=True,
155
+ max_iterations=3,
156
+ return_only_outputs=True,
157
+ )
158
+
159
+ def run(self, query: str) -> str:
160
+ """
161
+ Run the agent with the given input text.
162
+ """
163
+ max_retries = 3
164
+ retry_delay = 2
165
+ for attempt in range(max_retries):
166
+ try:
167
+ result = self.agent.run(input=query)
168
+ return result
169
+ except Exception as e:
170
+ sleep_time = retry_delay * (attempt + 1)
171
+ print(f"Attempt {attempt + 1} failed: {e}. Retrying in {sleep_time} seconds...")
172
+ time.sleep(sleep_time)
173
+ continue
174
+ return f"Error: request failed after {max_retries} attempts. Please try again later."
175
+ print(f"All questions have been answered.")
176
+
177
+ def _web_search(self, query: str, site: Optional[str] = None) -> str:
178
+ """
179
+ Perform a web search using DuckDuckGo and return the top result.
180
+ """
181
+ search = DuckDuckGoSearchAPIWrapper(max_results=5)
182
+ results = search.run(f"{query} {f'site:{site}' if site else ''}")
183
+ if results:
184
+ return results
185
+ else:
186
+ return "No results found."
187
+
188
+ def _analyze_video(self, video_url: str) -> str:
189
+ """
190
+ Analyze a YouTube video and return the transcript.
191
+ """
192
+ ydl_opts = {
193
+ 'quiet': True,
194
+ 'skip_download': True,
195
+ 'no_warnings': True,
196
+ 'extract_flat': True,
197
+ 'no_playlist': True,
198
+ 'youtube_include_dash_manifest': False
199
+ }
200
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
201
+ try:
202
+ info = ydl.extract_info(video_url, download=False, process=False)
203
+ if 'entries' in info:
204
+ info = info['entries'][0]
205
+ title = info.get('title', 'No title available.')
206
+ description = info.get('description', 'No transcript available.')
207
+
208
+
209
+ prompt = f"""Please analyze this YouTube video:
210
+ Title: {title}
211
+ URL: {video_url}
212
+ Description: {description}
213
+ Please provide a detailed analysis focusing on:
214
+ 1. Main topic and key points from the title and description
215
+ 2. Expected visual elements and scenes
216
+ 3. Overall message or purpose
217
+ 4. Target audience"""
218
+
219
+
220
+ messages = [HumanMessage(content=prompt)]
221
+ response = self.llm.invoke(messages)
222
+ return response.content if hasattr(response, 'content') else str(response)
223
+ except Exception as e:
224
+ if 'Sign in to confirm' in str(e):
225
+ return "This video requires sign-in. Please provide a different video URL."
226
+ return f"Error accessing video: {str(e)}"
227
+
228
+ def _analyze_image(self, image_url: str) -> str:
229
+ """
230
+ Analyze an image and return a description.
231
+ """
232
+ try:
233
+ response = requests.get(image_url)
234
+ if response.status_code == 200:
235
+ with tempfile.NamedTemporaryFile(delete=True) as temp_file:
236
+ temp_file.write(response.content)
237
+ temp_file.flush()
238
+ image = Image.open(temp_file.name)
239
+ prompt = f"Please analyze this image: {image_url}. Provide a detailed description of the content with focus on the following aspects:\n1. Main subjects and objects in the image\n2. Colors, textures, and patterns\n3. Overall mood or atmosphere\n4. Any text or symbols present in the image\n5. Possible context or background information"
240
+
241
+ messages = [HumanMessage(content=prompt)]
242
+ response = self.llm.invoke(messages)
243
+ return response.content if hasattr(response, 'content') else str(response)
244
+ else:
245
+ return f"Error accessing image: {response.status_code}"
246
+ except Exception as e:
247
+ return f"Error processing image: {str(e)}"
248
+
249
+ def _analyze_list(self, input_list: List[str]) -> str:
250
+ """
251
+ Analyze a list and return a summary.
252
+ """
253
+ prompt = f"Please analyze this list: {input_list}. Provide a detailed summary focusing on:\n1. Main themes or categories\n2. Key items or elements\n3. Possible relationships or connections\n4. Any patterns or trends observed"
254
+
255
+ messages = [HumanMessage(content=prompt)]
256
+ response = self.llm.invoke(messages)
257
+ return response.content if hasattr(response, 'content') else str(response)
258
+
259
+ def _analyze_table(self, input_table: List[List[Any]]) -> str:
260
+ """
261
+ Analyze a table and return a summary.
262
+ """
263
+ prompt = f"Please analyze this table: {input_table}. Provide a detailed summary focusing on:\n1. Main themes or categories\n2. Key items or elements\n3. Possible relationships or connections\n4. Any patterns or trends observed"
264
+
265
+ messages = [HumanMessage(content=prompt)]
266
+ response = self.llm.invoke(messages)
267
+ return response.content if hasattr(response, 'content') else str(response)
268
+
269
+ def _analyze_text(self, text: str) -> str:
270
+ """
271
+ Analyze a text and return a summary.
272
+ """
273
+ prompt = f"Please analyze this text: {text}. Provide a detailed summary focusing on:\n1. Main themes or categories\n2. Key items or elements\n3. Possible relationships or connections\n4. Any patterns or trends observed"
274
+
275
+ messages = [HumanMessage(content=prompt)]
276
+ response = self.llm.invoke(messages)
277
+ return response.content if hasattr(response, 'content') else str(response)
278
+
279
+ def _analyze_url(self, url: str) -> str:
280
+ """
281
+ Analyze a URL and return a summary.
282
+ """
283
+ try:
284
+ response = requests.get(url)
285
+ if response.status_code == 200:
286
+ content = response.text
287
+ soup = BeautifulSoup(content, 'html.parser')
288
+ text = soup.get_text()
289
+ prompt = f"Please analyze this URL: {url}. Provide a detailed summary focusing on:\n1. Main themes or categories\n2. Key items or elements\n3. Possible relationships or connections\n4. Any patterns or trends observed"
290
+
291
+ messages = [HumanMessage(content=prompt)]
292
+ response = self.llm.invoke(messages)
293
+ return response.content if hasattr(response, 'content') else str(response)
294
+ else:
295
+ return f"Error accessing URL: {response.status_code}"
296
+ except Exception as e:
297
+ return f"Error processing URL: {str(e)}"
298
+
299
+
app.py CHANGED
@@ -3,6 +3,10 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
@@ -12,12 +16,19 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
  def __init__(self):
15
- print("BasicAgent initialized.")
 
 
 
 
 
 
 
16
  def __call__(self, question: str) -> str:
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from dotenv import load_dotenv
7
+ from agent import Agent
8
+ # Load environment variables from .env file
9
+ load_dotenv()
10
 
11
  # (Keep Constants as is)
12
  # --- Constants ---
 
16
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
17
  class BasicAgent:
18
  def __init__(self):
19
+ print("BasicAgent initialized.")
20
+ api_key = os.getenv('GEMINI_API_KEY')
21
+ print(f"API Key: {api_key[:4]}...") # Print only the first 4 characters for security
22
+ if not api_key:
23
+ raise ValueError("GEMINI_API_KEY environment variable not set.")
24
+
25
+ self.agent = Agent(api_key=api_key)
26
+ print("Agent initialized successfully")
27
  def __call__(self, question: str) -> str:
28
  print(f"Agent received question (first 50 chars): {question[:50]}...")
29
+ final_answer = self.agent.run(question)
30
+ print(f"Agent returned final answer: {final_answer}")
31
+ return final_answer
32
 
33
  def run_and_submit_all( profile: gr.OAuthProfile | None):
34
  """
requirements.txt CHANGED
@@ -1,2 +1,17 @@
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ requests
3
+ langchain
4
+ langchain-core
5
+ langchain-community
6
+ langchain-google-genai
7
+ google-generativeai
8
+ python-dotenv
9
+ google-api-python-client
10
+ duckduckgo-search
11
+ tiktoken
12
+ google-cloud-speech
13
+ pydub
14
+ yt-dlp
15
+ wikipedia
16
+ Pillow
17
+ wikipedia-api