Spaces:

SheshankJoshi
/

ssandy_agents

Runtime error

App Files Files Community

Sheshank Joshi commited on May 13

Commit

9fced79

1 Parent(s): 554ef85

reasoning agent

Browse files

Files changed (14) hide show

__pycache__/agent.cpython-312.pyc +0 -0
__pycache__/basic_tools.cpython-312.pyc +0 -0
__pycache__/reasoning_agent.cpython-312.pyc +0 -0
__pycache__/utils.cpython-312.pyc +0 -0
advanced_tool_agent.py +546 -0
agent.py +25 -31
app.py +23 -6
basic_tools.py +618 -9
chain_of_thought.py +34 -0
react_agent.py +57 -0
reasoning_agent.py +340 -0
system_prompt.txt +37 -11
tool_calling_agent.py +16 -0
utils.py +57 -0

__pycache__/agent.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/agent.cpython-312.pyc and b/__pycache__/agent.cpython-312.pyc differ

__pycache__/basic_tools.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/basic_tools.cpython-312.pyc and b/__pycache__/basic_tools.cpython-312.pyc differ

__pycache__/reasoning_agent.cpython-312.pyc ADDED Viewed

Binary file (10.6 kB). View file

__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (1.88 kB). View file

advanced_tool_agent.py ADDED Viewed

	@@ -0,0 +1,546 @@

+import os
+from typing import List, Dict, Any, Optional, Type, Callable
+from datetime import datetime, timedelta
+import heapq
+import json
+import torch
+from langchain_core.tools import BaseTool
+from langchain_core.language_models import BaseChatModel
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
+from langchain_core.vectorstores import VectorStore
+from langchain_core.documents import Document
+from langchain_core.output_parsers import StrOutputParser
+from langchain.tools.retriever import create_retriever_tool
+from langchain_huggingface import HuggingFaceEmbeddings
+from langgraph.graph import StateGraph, END
+from langgraph.prebuilt import (
+    ToolNode,
+    ToolInvocation,
+    agent_executor,
+    create_function_calling_executor,
+    AgentState,
+    MessageGraph
+)
+from langgraph.prebuilt.tool_executor import ToolExecutor, extract_tool_invocations
+from langgraph.prebuilt.tool_nodes import get_default_tool_node_parser
+class AdvancedToolAgent:
+    """
+    An advanced agent with robust tool-calling capabilities using LangGraph.
+    Features enhanced memory management, context enrichment, and tool execution tracking.
+    """
+    def __init__(
+        self,
+        embedding_model: HuggingFaceEmbeddings,
+        vector_store: VectorStore,
+        llm: BaseChatModel,
+        tools: Optional[List[BaseTool]] = None,
+        max_iterations: int = 10,
+        memory_threshold: float = 0.7
+    ):
+        """
+        Initialize the agent with required components.
+        Args:
+            embedding_model: Model for embedding text
+            vector_store: Storage for agent memory
+            llm: Language model for agent reasoning
+            tools: List of tools accessible to the agent
+            max_iterations: Maximum number of tool calling iterations
+            memory_threshold: Threshold for deciding when to include memory context (0-1)
+        """
+        self.embedding_model = embedding_model
+        self.vector_store = vector_store
+        self.llm = llm
+        self.tools = tools or []
+        self.max_iterations = max_iterations
+        self.memory_threshold = memory_threshold
+        # Setup retriever for memory access
+        self.retriever = vector_store.as_retriever(
+            search_kwargs={"k": 3, "score_threshold": 0.75}
+        )
+        # Create memory retrieval tool
+        self.memory_tool = create_retriever_tool(
+            retriever=self.retriever,
+            name="memory_search",
+            description="Search the agent's memory for relevant past interactions and knowledge."
+        )
+        # Add memory tool to the agent's toolset
+        self.all_tools = self.tools + [self.memory_tool]
+        # Setup tool executor
+        self.tool_executor = ToolExecutor(self.all_tools)
+        # Build the agent's execution graph
+        self.agent_executor = self._build_agent_graph()
+        print(f"AdvancedToolAgent initialized with {len(self.all_tools)} tools")
+    def __call__(self, question: str) -> str:
+        """
+        Process a question using the agent.
+        Args:
+            question: The user query to respond to
+        Returns:
+            The agent's response
+        """
+        print(f"Agent received question: {question[:50]}..." if len(question) > 50 else question)
+        # Enrich context with relevant memory
+        enriched_input = self._enrich_context(question)
+        # Create initial state
+        initial_state = {
+            "messages": [HumanMessage(content=enriched_input)],
+            "tools": self.all_tools,
+            "tool_calls": [],
+        }
+        # Execute agent graph
+        final_state = self.agent_executor.invoke(initial_state)
+        # Extract the final response
+        final_message = final_state["messages"][-1]
+        answer = final_message.content
+        # Store this interaction in memory
+        self._store_interaction(question, answer, final_state.get("tool_calls", []))
+        # Periodically manage memory
+        self._periodic_memory_management()
+        print(f"Agent returning answer: {answer[:50]}..." if len(answer) > 50 else answer)
+        return answer
+    def _build_agent_graph(self):
+        """Build the LangGraph execution graph with enhanced tool calling"""
+        # Function for the agent to process messages and call tools
+        def agent_node(state: AgentState) -> AgentState:
+            """Process messages and decide on next action"""
+            messages = state["messages"]
+            # Add system instructions with tool details
+            if not any(isinstance(msg, SystemMessage) for msg in messages):
+                system_prompt = self._create_system_prompt()
+                messages = [SystemMessage(content=system_prompt)] + messages
+            # Get response from LLM
+            response = self.llm.invoke(messages)
+            # Extract any tool calls
+            tool_calls = extract_tool_invocations(
+                response,
+                self.all_tools,
+                strict_mode=False,
+            )
+            # Update state
+            new_state = state.copy()
+            new_state["messages"] = messages + [response]
+            new_state["tool_calls"] = tool_calls
+            return new_state
+        # Function for executing tools
+        def tool_node(state: AgentState) -> AgentState:
+            """Execute tools and add results to messages"""
+            # Get the tool calls from the state
+            tool_calls = state["tool_calls"]
+            # Execute each tool call
+            tool_results = []
+            for tool_call in tool_calls:
+                try:
+                    # Execute the tool
+                    result = self.tool_executor.invoke(tool_call)
+                    # Create a tool message with the result
+                    tool_msg = ToolMessage(
+                        content=str(result),
+                        tool_call_id=tool_call.id,
+                        name=tool_call.name,
+                    )
+                    tool_results.append(tool_msg)
+                    # Track tool usage for memory
+                    self._track_tool_usage(tool_call.name, tool_call.args, result)
+                except Exception as e:
+                    # Handle tool execution errors
+                    error_msg = f"Error executing tool {tool_call.name}: {str(e)}"
+                    tool_msg = ToolMessage(
+                        content=error_msg,
+                        tool_call_id=tool_call.id,
+                        name=tool_call.name,
+                    )
+                    tool_results.append(tool_msg)
+            # Update state with tool results
+            new_state = state.copy()
+            new_state["messages"] = state["messages"] + tool_results
+            new_state["tool_calls"] = []
+            return new_state
+        # Create the graph
+        graph = StateGraph(AgentState)
+        # Add nodes
+        graph.add_node("agent", agent_node)
+        graph.add_node("tools", tool_node)
+        # Set the entry point
+        graph.set_entry_point("agent")
+        # Add edges
+        graph.add_conditional_edges(
+            "agent",
+            lambda state: "tools" if state["tool_calls"] else END,
+            {
+                "tools": "tools",
+                END: END,
+            }
+        )
+        graph.add_edge("tools", "agent")
+        # Set max iterations to prevent infinite loops
+        return graph.compile(max_iterations=self.max_iterations)
+    def _create_system_prompt(self) -> str:
+        """Create a system prompt with tool instructions"""
+        tool_descriptions = "\n\n".join([
+            f"Tool {i+1}: {tool.name}\n"
+            f"Description: {tool.description}\n"
+            f"Args: {json.dumps(tool.args, indent=2) if hasattr(tool, 'args') else 'No arguments required'}"
+            for i, tool in enumerate(self.all_tools)
+        ])
+        return f"""You are an advanced AI assistant with access to various tools.
+When a user asks a question, use your knowledge and the available tools to provide
+accurate and helpful responses.
+AVAILABLE TOOLS:
+{tool_descriptions}
+INSTRUCTIONS FOR TOOL USAGE:
+1. When you need information that requires a tool, call the appropriate tool.
+2. Format tool calls clearly by specifying the tool name and inputs.
+3. Wait for tool results before providing final answers.
+4. Use tools only when necessary - if you can answer directly, do so.
+5. If a tool fails, try a different approach or tool.
+6. Always explain your reasoning step by step.
+Remember to be helpful, accurate, and concise in your responses.
+"""
+    def _enrich_context(self, query: str) -> str:
+        """Enrich the input query with relevant context from memory"""
+        # Search for similar content
+        similar_docs = self.vector_store.similarity_search(
+            query,
+            k=2,  # Limit to 2 most relevant documents
+            fetch_k=5  # Consider 5 candidates
+        )
+        # Only use memory if relevance is high enough
+        if not similar_docs or len(similar_docs) == 0:
+            return query
+        # Build enhanced context
+        context_additions = []
+        for doc in similar_docs:
+            content = doc.page_content
+            # Extract different types of memory
+            if "Question:" in content and "Final answer:" in content:
+                # Q&A memory
+                q = content.split("Question:")[1].split("Final answer:")[0].strip()
+                a = content.split("Final answer:")[1].split("Timestamp:", 1)[0].strip()
+                # Only add if it's not too similar to current question
+                if not self._is_similar_question(query, q, threshold=0.85):
+                    context_additions.append(f"Related Q: {q}\nRelated A: {a}")
+            elif "Tool Knowledge" in content:
+                # Tool usage memory
+                tool_name = content.split("Tool:")[1].split("Query:")[0].strip()
+                tool_result = content.split("Result:")[1].split("Timestamp:")[0].strip()
+                context_additions.append(
+                    f"From prior tool use ({tool_name}): {tool_result[:200]}"
+                )
+        # Only add context if we have relevant information
+        if context_additions:
+            return (
+                "Consider this relevant information first:\n\n" +
+                "\n\n".join(context_additions[:2]) +  # Limit to 2 pieces of context
+                "\n\nNow answering this question: " + query
+            )
+        else:
+            return query
+    def _is_similar_question(self, query1: str, query2: str, threshold: float = 0.8) -> bool:
+        """Check if two questions are semantically similar using embeddings"""
+        # Get embeddings for both queries
+        if hasattr(self.embedding_model, 'embed_query'):
+            emb1 = self.embedding_model.embed_query(query1)
+            emb2 = self.embedding_model.embed_query(query2)
+            # Calculate cosine similarity
+            similarity = self._cosine_similarity(emb1, emb2)
+            return similarity > threshold
+        return False
+    @staticmethod
+    def _cosine_similarity(v1, v2):
+        """Calculate cosine similarity between vectors"""
+        dot_product = sum(x * y for x, y in zip(v1, v2))
+        magnitude1 = sum(x * x for x in v1) ** 0.5
+        magnitude2 = sum(x * x for x in v2) ** 0.5
+        if magnitude1 * magnitude2 == 0:
+            return 0
+        return dot_product / (magnitude1 * magnitude2)
+    def _store_interaction(self, question: str, answer: str, tool_calls: List[dict]) -> None:
+        """Store the interaction in vector memory"""
+        timestamp = datetime.now().isoformat()
+        # Format tools used
+        tools_used = []
+        for tool_call in tool_calls:
+            if isinstance(tool_call, dict) and 'name' in tool_call:
+                tools_used.append(tool_call['name'])
+            elif hasattr(tool_call, 'name'):
+                tools_used.append(tool_call.name)
+        tools_str = ", ".join(tools_used) if tools_used else "None"
+        # Create content
+        content = (
+            f"Question: {question}\n"
+            f"Tools Used: {tools_str}\n"
+            f"Final answer: {answer}\n"
+            f"Timestamp: {timestamp}"
+        )
+        # Create document with metadata
+        doc = Document(
+            page_content=content,
+            metadata={
+                "question": question,
+                "timestamp": timestamp,
+                "type": "qa_pair",
+                "tools_used": tools_str
+            }
+        )
+        # Add to vector store
+        self.vector_store.add_documents([doc])
+    def _track_tool_usage(self, tool_name: str, tool_input: Any, tool_output: Any) -> None:
+        """Track tool usage for future reference"""
+        timestamp = datetime.now().isoformat()
+        # Format the content
+        content = (
+            f"Tool Knowledge\n"
+            f"Tool: {tool_name}\n"
+            f"Query: {str(tool_input)}\n"
+            f"Result: {str(tool_output)}\n"
+            f"Timestamp: {timestamp}"
+        )
+        # Create document with metadata
+        doc = Document(
+            page_content=content,
+            metadata={
+                "type": "tool_knowledge",
+                "tool": tool_name,
+                "timestamp": timestamp
+            }
+        )
+        # Add to vector store
+        self.vector_store.add_documents([doc])
+    def _periodic_memory_management(self,
+                                   check_frequency: int = 10,
+                                   max_documents: int = 1000,
+                                   max_age_days: int = 30) -> None:
+        """Periodically manage memory to prevent unbounded growth"""
+        # Simple probabilistic check to avoid running this too often
+        if hash(datetime.now().isoformat()) % check_frequency != 0:
+            return
+        self.manage_memory(max_documents, max_age_days)
+    def manage_memory(self, max_documents: int = 1000, max_age_days: int = 30) -> None:
+        """
+        Manage memory by pruning old or less useful entries from the vector store.
+        Args:
+            max_documents: Maximum number of documents to keep
+            max_age_days: Remove documents older than this many days
+        """
+        print(f"Starting memory management...")
+        # Get all documents from the vector store
+        try:
+            # For vector stores that have a get_all_documents method
+            if hasattr(self.vector_store, "get_all_documents"):
+                all_docs = self.vector_store.get_all_documents()
+                all_ids = [doc.metadata.get("id", i) for i, doc in enumerate(all_docs)]
+            # For other vector store implementations
+            else:
+                print("Warning: Vector store doesn't expose required attributes for memory management")
+                return
+        except Exception as e:
+            print(f"Error accessing vector store documents: {e}")
+            return
+        if not all_docs:
+            print("No documents found in vector store")
+            return
+        print(f"Retrieved {len(all_docs)} documents for scoring")
+        # Score each document based on recency, importance and relevance
+        scored_docs = []
+        cutoff_date = datetime.now() - timedelta(days=max_age_days)
+        for i, doc in enumerate(all_docs):
+            doc_id = all_ids[i] if i < len(all_ids) else i
+            # Extract timestamp from content or metadata
+            timestamp = None
+            if hasattr(doc, "metadata") and doc.metadata and "timestamp" in doc.metadata:
+                try:
+                    timestamp = datetime.fromisoformat(doc.metadata["timestamp"])
+                except (ValueError, TypeError):
+                    pass
+            # If no timestamp in metadata, try to extract from content
+            if not timestamp and hasattr(doc, "page_content") and "Timestamp:" in doc.page_content:
+                try:
+                    timestamp_str = doc.page_content.split("Timestamp:")[-1].strip().split('\n')[0]
+                    timestamp = datetime.fromisoformat(timestamp_str)
+                except (ValueError, TypeError):
+                    timestamp = datetime.now() - timedelta(days=max_age_days+1)
+            # If still no timestamp, use a default
+            if not timestamp:
+                timestamp = datetime.now() - timedelta(days=max_age_days+1)
+            # Calculate age score (newer is better)
+            age_factor = max(0.0, min(1.0, (timestamp - cutoff_date).total_seconds() /
+                                     (datetime.now() - cutoff_date).total_seconds()))
+            # Calculate importance score based on document type and access frequency
+            importance_factor = 1.0
+            # Tool knowledge is more valuable
+            if hasattr(doc, "metadata") and doc.metadata and doc.metadata.get("type") == "tool_knowledge":
+                importance_factor += 0.5
+            # If document has been accessed often, increase importance
+            if hasattr(doc, "metadata") and doc.metadata and "access_count" in doc.metadata:
+                importance_factor += min(1.0, doc.metadata["access_count"] / 10)
+            # If document contains references to complex tools, prioritize it
+            if hasattr(doc, "page_content"):
+                complex_tools = ["web_search", "python_repl", "analyze_image", "arxiv_search"]
+                if any(tool in doc.page_content for tool in complex_tools):
+                    importance_factor += 0.3
+            # Create combined score (higher = more valuable to keep)
+            total_score = (0.6 * age_factor) + (0.4 * importance_factor)
+            # Add to priority queue (negative for max-heap behavior)
+            heapq.heappush(scored_docs, (-total_score, i, doc))
+        # Select top documents to keep
+        docs_to_keep = []
+        for _ in range(min(max_documents, len(scored_docs))):
+            if scored_docs:
+                _, _, doc = heapq.heappop(scored_docs)
+                docs_to_keep.append(doc)
+        # Only rebuild if we're actually pruning some documents
+        if len(docs_to_keep) < len(all_docs):
+            print(f"Memory management: Keeping {len(docs_to_keep)} documents out of {len(all_docs)}")
+            # Create a new vector store with the same type as the current one
+            vector_store_type = type(self.vector_store)
+            # Different approaches based on vector store type
+            if hasattr(vector_store_type, "from_documents"):
+                # Most langchain vector stores support this method
+                new_vector_store = vector_store_type.from_documents(
+                    docs_to_keep,
+                    embedding=self.embedding_model
+                )
+                self.vector_store = new_vector_store
+                print(f"Vector store rebuilt with {len(docs_to_keep)} documents")
+            elif hasattr(vector_store_type, "from_texts"):
+                # For vector stores that use from_texts
+                texts = [doc.page_content for doc in docs_to_keep]
+                metadatas = [doc.metadata if hasattr(doc, "metadata") else {} for doc in docs_to_keep]
+                new_vector_store = vector_store_type.from_texts(
+                    texts=texts,
+                    embedding=self.embedding_model,
+                    metadatas=metadatas
+                )
+                self.vector_store = new_vector_store
+                print(f"Vector store rebuilt with {len(docs_to_keep)} documents")
+            else:
+                print("Warning: Could not determine how to rebuild the vector store")
+                print(f"Vector store type: {vector_store_type.__name__}")
+# Example usage
+if __name__ == "__main__":
+    from langchain_huggingface import HuggingFaceEmbeddings
+    from langchain_chroma import Chroma
+    from langchain_groq import ChatGroq
+    from basic_tools import multiply, add, subtract, divide, wiki_search, web_search
+    # Initialize embeddings
+    embeddings = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-mpnet-base-v2",
+        model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
+    )
+    # Initialize vector store
+    vector_store = Chroma(
+        embedding_function=embeddings,
+        collection_name="advanced_agent_memory"
+    )
+    # Initialize LLM
+    llm = ChatGroq(model="qwen-qwq-32b", temperature=0)
+    # Define tools
+    tools = [multiply, add, subtract, divide, wiki_search, web_search]
+    # Create agent
+    agent = AdvancedToolAgent(
+        embedding_model=embeddings,
+        vector_store=vector_store,
+        llm=llm,
+        tools=tools
+    )
+    # Test the agent
+    response = agent("What is the population of France multiplied by 2?")
+    print(f"Response: {response}")

agent.py CHANGED Viewed

@@ -7,7 +7,8 @@ from langchain.vectorstores import VectorStore
 from langchain_core.language_models import BaseChatModel
 from langgraph.prebuilt import tools_condition
 from langgraph.prebuilt import ToolNode
-from langchain_community.vectorstores import FAISS
 from langchain_core.documents import Document
 from langchain_groq import ChatGroq
 from basic_tools import *
@@ -17,24 +18,16 @@ from datetime import datetime, timedelta
 from sentence_transformers import SentenceTransformer
 import torch
 import heapq
 os.environ['HF_HOME'] = os.path.join(
     os.path.expanduser('~'), '.cache', "huggingface")
-embeddings = HuggingFaceEmbeddings(
-    model_name="sentence-transformers/all-mpnet-base-v2",
-    # hugging_face_api_key=os.getenv("HF_TOKEN"),
-    model_kwargs={"device": "gpu" if torch.cuda.is_available() else "cpu",
-                  "token": os.getenv("HF_TOKEN")},
-    show_progress=True,
-)
-vector_store: FAISS = FAISS.from_texts(
-    texts=[],
-    embedding=embeddings)
 # load the system prompt from the file
-with open("system_prompt.txt", "r", encoding="utf-8") as f:
     system_prompt = f.read()
@@ -42,19 +35,13 @@ with open("system_prompt.txt", "r", encoding="utf-8") as f:
 sys_msg = SystemMessage(content=system_prompt)
 class BasicAgent:
     tools: List[BaseTool]  = [multiply,
-        add,
-        subtract,
-        divide,
-        modulus,
-        wiki_search,
-        web_search,
-        arxiv_search,
-        requests_get,
-        requests_post
     ]
     def __init__(self, embeddings: HuggingFaceEmbeddings, vector_store: VectorStore, llm: BaseChatModel):
         self.embedding_model = embeddings
@@ -72,12 +59,12 @@ class BasicAgent:
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        # Search for similar content to enhance context
-        similar_docs = self.vector_store.similarity_search(question, k=3)
         # Create enhanced context with relevant past information
         enhanced_context = question
-        if similar_docs:
             context_additions = []
             for doc in similar_docs:
                 # Extract relevant information from similar documents
@@ -85,15 +72,22 @@ class BasicAgent:
                 if "Question:" in content and "Final answer:" in content:
                     q = content.split("Question:")[1].split("Final answer:")[0].strip()
                     a = content.split("Final answer:")[1].split("Timestamp:", 1)[0].strip()
                     # Only add if it's not exactly the same question
                     if not question.lower() == q.lower():
                         context_additions.append(f"Related Q: {q}\nRelated A: {a}")
             if context_additions:
                 enhanced_context = (
-                    "I'll answer your question, but first consider this relevant information:\n\n" +
-                    "\n\n".join(context_additions) +
-                    "\n\nNow answering your original question: " + question
                 )
         # Process with the graph
@@ -189,7 +183,7 @@ class BasicAgent:
             tools_condition,
             {
                 "tools": "tools",
-                None: END
             }
         )
         builder.add_edge("tools", "context_enhanced_generation")
@@ -218,7 +212,7 @@ class BasicAgent:
                 base_url="http://localhost:11432/v1",  # default LM Studio endpoint
                 api_key="not-used",  # required by interface but ignored #type: ignore
                 # model="mistral-nemo-instruct-2407",
-                model="llama-3.1-8b-claude-3.7-sonnet-reasoning-distilled",
                 temperature=0.2
             )
         elif provider == "openai":

 from langchain_core.language_models import BaseChatModel
 from langgraph.prebuilt import tools_condition
 from langgraph.prebuilt import ToolNode
+# from langchain_community.vectorstores import Chroma
 from langchain_core.documents import Document
 from langchain_groq import ChatGroq
 from basic_tools import *
 from sentence_transformers import SentenceTransformer
 import torch
 import heapq
+from utils import *
 os.environ['HF_HOME'] = os.path.join(
     os.path.expanduser('~'), '.cache', "huggingface")
 # load the system prompt from the file
+with open("./system_prompt.txt", "r", encoding="utf-8") as f:
     system_prompt = f.read()
 sys_msg = SystemMessage(content=system_prompt)
 class BasicAgent:
     tools: List[BaseTool]  = [multiply,
+                              multiply, add, subtract, divide, modulus,
+                              wiki_search, web_search, arxiv_search,
+                              python_repl, analyze_image,
+                              date_filter, analyze_content,
+                              step_by_step_reasoning, translate_text
     ]
     def __init__(self, embeddings: HuggingFaceEmbeddings, vector_store: VectorStore, llm: BaseChatModel):
         self.embedding_model = embeddings
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # Search for similar content to enhance context - LIMIT TO 1 DOCUMENT ONLY
+        similar_docs = self.vector_store.similarity_search(question, k=1)  # Reduced from 3 to 1
         # Create enhanced context with relevant past information
         enhanced_context = question
+        if (similar_docs):
             context_additions = []
             for doc in similar_docs:
                 # Extract relevant information from similar documents
                 if "Question:" in content and "Final answer:" in content:
                     q = content.split("Question:")[1].split("Final answer:")[0].strip()
                     a = content.split("Final answer:")[1].split("Timestamp:", 1)[0].strip()
+                    # Truncate long contexts
+                    if len(q) > 200:
+                        q = q[:200] + "..."
+                    if len(a) > 300:
+                        a = a[:300] + "..."
                     # Only add if it's not exactly the same question
                     if not question.lower() == q.lower():
                         context_additions.append(f"Related Q: {q}\nRelated A: {a}")
             if context_additions:
                 enhanced_context = (
+                    "Consider this relevant information first:\n\n" +
+                    "\n\n".join(context_additions[:1]) +  # Only use the first context addition
+                    "\n\nNow answering this question: " + question
                 )
         # Process with the graph
             tools_condition,
             {
                 "tools": "tools",
+                END: END  # Using END as the key instead of None
             }
         )
         builder.add_edge("tools", "context_enhanced_generation")
                 base_url="http://localhost:11432/v1",  # default LM Studio endpoint
                 api_key="not-used",  # required by interface but ignored #type: ignore
                 # model="mistral-nemo-instruct-2407",
+                model="meta-llama-3.1-8b-instruct",
                 temperature=0.2
             )
         elif provider == "openai":

app.py CHANGED Viewed

@@ -3,7 +3,9 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-from agent import BasicAgent, embeddings, vector_store
 from dotenv import load_dotenv
 import os
@@ -39,8 +41,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
         # llm = BasicAgent.get_llm("groq")
-        llm = BasicAgent.get_llm("openai_local")
-        agent = BasicAgent(embeddings, vector_store, llm)
         print("Agent instantiated successfully.")
     except Exception as e:
         print(f"Error instantiating agent: {e}")
@@ -74,19 +77,33 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")

 import requests
 import inspect
 import pandas as pd
+# from agent import BasicAgent, embeddings, vector_store
+from utils import embeddings, vector_store
+from reasoning_agent import ReasoningAgent
 from dotenv import load_dotenv
 import os
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
         # llm = BasicAgent.get_llm("groq")
+        # llm = BasicAgent.get_llm("openai_local")
+        # agent = BasicAgent(embeddings, vector_store, llm)
+        agent = ReasoningAgent()
         print("Agent instantiated successfully.")
     except Exception as e:
         print(f"Error instantiating agent: {e}")
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    import time
+    # Process at most 5 questions at a time to avoid rate limits
+    for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+            # Add delay between questions to avoid rate limiting (5 seconds)
+            if i < len(questions_data) - 1:
+                print(f"Waiting 5 seconds before next question...")
+                time.sleep(5)
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+            # Continue with the next question after a short delay even if there was an error
+            time.sleep(3)
     if not answers_payload:
         print("Agent did not produce any answers to submit.")

basic_tools.py CHANGED Viewed

@@ -1,3 +1,7 @@
 import os
 from dotenv import load_dotenv
 from langgraph.graph import START, StateGraph, MessagesState
@@ -80,17 +84,27 @@ def modulus(a: int, b: int) -> int:
 @tool
 def wiki_search(query: str) -> str:
-    """Search Wikipedia for a query and return maximum 2 results.
     Args:
-        query: The search query."""
-    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
-    formatted_search_docs = "\n\n---\n\n".join(
-        [
-            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
-            for doc in search_docs
-        ])
-    return formatted_search_docs
 @tool
@@ -160,5 +174,600 @@ requests_post = RequestsPostTool(requests_wrapper=requests_wrapper, allow_danger
 #     response = toolkit.run(url, data=data, json=json, headers=headers)
 #     return response.text

+from youtube_transcript_api.formatters import TextFormatter
+from youtube_transcript_api import YouTubeTranscriptApi
+import requests
+from typing import Dict, List, Optional, Any, Union
 import os
 from dotenv import load_dotenv
 from langgraph.graph import START, StateGraph, MessagesState
 @tool
 def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 5 results.
     Args:
+        query: The search query. Be specific with search terms including full names, dates, and relevant keywords.
+    """
+    if not query or query.strip() == "":
+        return "Error: Please provide a valid search query with specific terms."
+    try:
+        search_docs = WikipediaLoader(query=query, load_max_docs=5).load()
+        if not search_docs:
+            return f"No Wikipedia results found for '{query}'. Consider refining your search terms."
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+                for doc in search_docs
+            ])
+        return formatted_search_docs
+    except Exception as e:
+        return f"Error searching Wikipedia: {str(e)}. Please try a different query."
 @tool
 #     response = toolkit.run(url, data=data, json=json, headers=headers)
 #     return response.text
+@tool
+def date_filter(content: str, start_year: int, end_year: int) -> str:
+    """Filter content based on date range and extract relevant information.
+    Args:
+        content: The text content to filter
+        start_year: Starting year (inclusive)
+        end_year: Ending year (inclusive)
+    """
+    if not content or not isinstance(content, str):
+        return "Error: No content provided for filtering."
+    try:
+        # Convert years to strings for matching
+        years = [str(year) for year in range(start_year, end_year + 1)]
+        # Split content into paragraphs
+        paragraphs = content.split("\n")
+        # Filter paragraphs containing any year in the range
+        filtered_paragraphs = []
+        for paragraph in paragraphs:
+            if any(f" {year}" in paragraph or f"({year})" in paragraph or f"[{year}]" in paragraph for year in years):
+                filtered_paragraphs.append(paragraph)
+        if not filtered_paragraphs:
+            return f"No content found specifically mentioning years between {start_year} and {end_year}."
+        return "\n\n".join(filtered_paragraphs)
+    except Exception as e:
+        return f"Error filtering by date range: {str(e)}"
+import re
+@tool
+def count_items(content: str, pattern: str, context_words: int = 5) -> str:
+    """Count items matching a pattern in content and extract contextual information.
+    Args:
+        content: The text to analyze
+        pattern: The pattern to search for (e.g. "album", "publication")
+        context_words: Number of words to include for context around matches
+    """
+    if not content or not pattern:
+        return "Error: Both content and pattern must be provided."
+    try:
+        # Find all occurrences of the pattern
+        matches = re.finditer(r'(?i)\b\w*' + re.escape(pattern) + r'\w*\b', content)
+        # Extract context around matches
+        contexts = []
+        count = 0
+        for match in matches:
+            count += 1
+            start, end = match.span()
+            # Get text before and after the match
+            text_before = content[max(0, start-100):start]
+            text_after = content[end:min(len(content), end+100)]
+            # Create contextual excerpt
+            context = f"...{text_before}{match.group(0)}{text_after}..."
+            contexts.append(context)
+        if count == 0:
+            return f"No items matching '{pattern}' found in the content."
+        result = f"Found {count} occurrences of '{pattern}'. Contexts:\n\n"
+        result += "\n---\n".join(contexts[:10])  # Limit to first 10 for brevity
+        return result
+    except Exception as e:
+        return f"Error counting items: {str(e)}"
+@tool
+def translate_text(text: str, target_language: str) -> str:
+    """Translate text to the specified language using a simple translation API.
+    Args:
+        text: Text to translate
+        target_language: Target language (e.g., "Spanish", "French", "German")
+    """
+    if not text:
+        return "Error: No text provided for translation."
+    try:
+        # Using LibreTranslate API (open-source translation)
+        API_URL = "https://translate.argosopentech.com/translate"
+        # Map common language names to language codes
+        language_map = {
+            "english": "en",
+            "spanish": "es",
+            "french": "fr",
+            "german": "de",
+            "italian": "it",
+            "portuguese": "pt",
+            "russian": "ru",
+            "japanese": "ja",
+            "chinese": "zh",
+            "arabic": "ar",
+            "hindi": "hi",
+            "korean": "ko"
+        }
+        # Get language code
+        target_code = language_map.get(target_language.lower())
+        if not target_code:
+            return f"Error: Unsupported language '{target_language}'. Supported languages: {', '.join(language_map.keys())}."
+        # Prepare request
+        payload = {
+            "q": text[:500],  # Limit text length to avoid API issues
+            "source": "auto",
+            "target": target_code
+        }
+        response = requests.post(API_URL, json=payload)
+        if response.status_code == 200:
+            translation = response.json().get("translatedText", "")
+            return f"Original: {text[:100]}{'...' if len(text) > 100 else ''}\n\nTranslation ({target_language}): {translation}"
+        else:
+            return f"Translation API error: {response.status_code} - {response.text}"
+    except Exception as e:
+        return f"Error translating text: {str(e)}"
+@tool
+def step_by_step_reasoning(problem: str, steps: int = 3) -> str:
+    """Break down a complex problem into steps for clearer reasoning.
+    Args:
+        problem: The problem statement or question to analyze
+        steps: Number of reasoning steps (default: 3)
+    """
+    if not problem:
+        return "Error: No problem provided for analysis."
+    try:
+        # Structure for breaking down any problem
+        result = f"Breaking down: {problem}\n\n"
+        # Generic reasoning steps that work for many problems
+        reasoning_steps = [
+            "Identify the key information and requirements in the problem",
+            "Determine what knowledge or method is needed to solve it",
+            "Apply relevant formulas, data, or logical steps",
+            "Verify the solution against the original requirements",
+            "Consider alternative approaches or edge cases"
+        ]
+        # Use only the requested number of steps
+        steps_to_use = min(steps, len(reasoning_steps))
+        for i in range(steps_to_use):
+            result += f"Step {i+1}: {reasoning_steps[i]}\n"
+            result += f"This step involves analyzing {problem} by "
+            if i == 0:
+                # First step focuses on understanding the problem
+                keywords = re.findall(r'\b\w{5,}\b', problem)
+                key_concepts = [word for word in keywords if len(word) > 4][:3]
+                if key_concepts:
+                    result += f"identifying key concepts like {', '.join(key_concepts)}. "
+                # Identify question type
+                if "how many" in problem.lower():
+                    result += "This is a counting or quantification problem. "
+                elif "when" in problem.lower():
+                    result += "This is a timing or chronological problem. "
+                elif "where" in problem.lower():
+                    result += "This is a location or spatial problem. "
+                elif "who" in problem.lower():
+                    result += "This is a person or entity identification problem. "
+                elif "why" in problem.lower():
+                    result += "This is a causation or reasoning problem. "
+                result += "We need to extract specific details from the problem statement.\n\n"
+            elif i == 1:
+                # Second step focuses on approach
+                if "between" in problem.lower() and re.search(r'\d{4}', problem):
+                    result += "using date filtering to focus on the specific time period. "
+                    result += "We need to identify relevant dates and associated events/items.\n\n"
+                elif any(word in problem.lower() for word in ["album", "song", "music", "artist", "band"]):
+                    result += "examining discography information and music-related details. "
+                    result += "We should focus on releases, titles, and years.\n\n"
+                elif any(word in problem.lower() for word in ["calculate", "compute", "sum", "average", "total"]):
+                    result += "applying mathematical operations to derive a numeric result. "
+                    result += "We need to identify the values and operations required.\n\n"
+                else:
+                    result += "gathering relevant factual information and organizing it logically. "
+                    result += "We should separate facts from assumptions.\n\n"
+            elif i == 2:
+                # Third step focuses on solution path
+                result += "determining the specific steps to reach a solution. "
+                result += "This may involve counting items, applying formulas, or comparing data.\n\n"
+            elif i == 3:
+                # Fourth step focuses on verification
+                result += "checking our answer against the original question requirements. "
+                result += "We should verify that we've fully addressed all parts of the question.\n\n"
+            else:
+                # Fifth step focuses on alternatives
+                result += "considering other approaches or edge cases we might have missed. "
+                result += "This ensures our answer is robust and comprehensive.\n\n"
+        result += "\nThis structured approach helps organize thinking and ensures a thorough analysis."
+        return result
+    except Exception as e:
+        return f"Error performing step-by-step reasoning: {str(e)}"
+@tool
+def analyze_content(content: str, analysis_type: str) -> str:
+    """Analyze content for specific information based on analysis type.
+    Args:
+        content: Text content to analyze
+        analysis_type: Type of analysis to perform ('dates', 'names', 'numbers', 'events')
+    """
+    if not content:
+        return "Error: No content provided for analysis."
+    analysis_type = analysis_type.lower()
+    try:
+        if analysis_type == 'dates':
+            # Extract dates in various formats
+            date_patterns = [
+                r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b',  # DD/MM/YYYY or MM/DD/YYYY
+                r'\b\d{1,2}\s(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s\d{2,4}\b',  # DD Month YYYY
+                r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s\d{1,2}(?:st|nd|rd|th)?,\s\d{2,4}\b',  # Month DD, YYYY
+                r'\b\d{4}\b'  # YYYY (years)
+            ]
+            results = []
+            for pattern in date_patterns:
+                matches = re.findall(pattern, content, re.IGNORECASE)
+                results.extend(matches)
+            return f"Found {len(results)} date references:\n\n" + "\n".join(results)
+        elif analysis_type == 'names':
+            # Basic name extraction (this is simplified, real NER would be better)
+            name_pattern = r'\b[A-Z][a-z]+\s[A-Z][a-z]+\b'
+            names = re.findall(name_pattern, content)
+            return f"Found {len(names)} potential names:\n\n" + "\n".join(names)
+        elif analysis_type == 'numbers':
+            # Extract numbers and their context
+            number_pattern = r'\b\d+(?:,\d+)*(?:\.\d+)?\b'
+            numbers = re.findall(number_pattern, content)
+            # Get context for each number
+            contexts = []
+            for number in numbers:
+                index = content.find(number)
+                start = max(0, index - 50)
+                end = min(len(content), index + len(number) + 50)
+                context = content[start:end].replace('\n', ' ').strip()
+                contexts.append(f"{number}: \"{context}\"")
+            return f"Found {len(numbers)} numbers with context:\n\n" + "\n".join(contexts[:20])  # Limit to 20
+        elif analysis_type == 'events':
+            # Look for event indicators
+            event_patterns = [
+                r'\b(?:occurred|happened|took place|event|ceremony|concert|release|published|awarded|presented)\b',
+                r'\b(?:in|on|during|at)\s\d{4}\b'
+            ]
+            events = []
+            for pattern in event_patterns:
+                for match in re.finditer(pattern, content, re.IGNORECASE):
+                    start = max(0, match.start() - 100)
+                    end = min(len(content), match.end() + 100)
+                    context = content[start:end].replace('\n', ' ').strip()
+                    events.append(context)
+            return f"Found {len(events)} potential events:\n\n" + "\n\n".join(events[:15])  # Limit to 15
+        else:
+            return f"Error: Unsupported analysis type '{analysis_type}'. Use 'dates', 'names', 'numbers', or 'events'."
+    except Exception as e:
+        return f"Error during content analysis: {str(e)}"
+@tool
+def youtube_transcript(url: str, summarize: bool = True) -> str:
+    """Extract transcript from YouTube video and optionally summarize it.
+    Args:
+        url: YouTube video URL or video ID
+        summarize: Whether to summarize the transcript (default: True)
+    """
+    try:
+        # Extract video ID from URL
+        video_id_match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', url)
+        if video_id_match:
+            video_id = video_id_match.group(1)
+        else:
+            # Try using the input directly as a video ID
+            if len(url) == 11:
+                video_id = url
+            else:
+                return "Error: Invalid YouTube URL or video ID. Please provide a valid YouTube URL."
+        # Get transcript
+        transcript = YouTubeTranscriptApi.get_transcript(video_id)
+        formatter = TextFormatter()
+        formatted_transcript = formatter.format_transcript(transcript)
+        # Get video metadata
+        response = requests.get(
+            f"https://www.youtube.com/oembed?url=http://www.youtube.com/watch?v={video_id}&format=json")
+        metadata = response.json()
+        title = metadata.get("title", "Unknown title")
+        author = metadata.get("author_name", "Unknown author")
+        if summarize and formatted_transcript:
+            # For long transcripts, break into chunks
+            max_chunk_length = 4000
+            if len(formatted_transcript) > max_chunk_length:
+                chunks = [formatted_transcript[i:i+max_chunk_length]
+                          for i in range(0, len(formatted_transcript), max_chunk_length)]
+                summary = f"Video: \"{title}\" by {author}\n\nTranscript summary (extracted from {len(chunks)} segments):\n\n"
+                # Return first and last parts of transcript instead of full summary for long videos
+                summary += f"Beginning of transcript:\n{chunks[0][:500]}...\n\n"
+                summary += f"End of transcript:\n{chunks[-1][-500:]}"
+                return summary
+            else:
+                return f"Video: \"{title}\" by {author}\n\nFull transcript:\n\n{formatted_transcript}"
+        else:
+            return f"Video: \"{title}\" by {author}\n\nFull transcript:\n\n{formatted_transcript}"
+    except Exception as e:
+        return f"Error extracting YouTube transcript: {str(e)}"
+import base64
+from io import BytesIO
+from PIL import Image
+import json
+@tool
+def analyze_image(image_url: str, analysis_type: str = "caption") -> str:
+    """Analyze an image from a URL and provide captions, tags, or comprehensive analysis.
+    Args:
+        image_url: URL of the image to analyze
+        analysis_type: Type of analysis to perform (options: "caption", "tags", "objects", "comprehensive")
+    """
+    if not image_url:
+        return "Error: Please provide a valid image URL."
+    analysis_type = analysis_type.lower()
+    valid_types = ["caption", "tags", "objects", "comprehensive"]
+    if analysis_type not in valid_types:
+        return f"Error: analysis_type must be one of {', '.join(valid_types)}."
+    try:
+        # Download the image
+        response = requests.get(image_url, timeout=10)
+        response.raise_for_status()
+        # Process image based on analysis type
+        if analysis_type == "caption":
+            return caption_image(response.content)
+        elif analysis_type == "tags":
+            return tag_image(response.content)
+        elif analysis_type == "objects":
+            return detect_objects(response.content)
+        elif analysis_type == "comprehensive":
+            # Perform all analyses
+            caption_result = caption_image(response.content)
+            tags_result = tag_image(response.content)
+            objects_result = detect_objects(response.content)
+            return f"IMAGE ANALYSIS SUMMARY:\n\n{caption_result}\n\n{tags_result}\n\n{objects_result}"
+        # If none of the above conditions are met, return an error string
+        return "Error: Unknown analysis type or failed to process image."
+    except requests.exceptions.RequestException as e:
+        return f"Error downloading image: {str(e)}"
+    except Exception as e:
+        return f"Error analyzing image: {str(e)}"
+def caption_image(image_content: bytes) -> str:
+    """Generate captions for an image using Hugging Face API."""
+    try:
+        # Check if we have HF API key in environment
+        hf_api_key = os.getenv("HUGGINGFACE_API_TOKEN")
+        if hf_api_key:
+            # Use Hugging Face API with auth
+            api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
+            headers = {"Authorization": f"Bearer {hf_api_key}"}
+            # Convert image to base64
+            image_b64 = base64.b64encode(image_content).decode("utf-8")
+            payload = {"inputs": {"image": image_b64}}
+            response = requests.post(api_url, headers=headers, json=payload)
+            if response.status_code == 200:
+                result = response.json()
+                if isinstance(result, list) and len(result) > 0:
+                    return f"CAPTION: {result[0]['generated_text']}"
+                else:
+                    return f"CAPTION: {result['generated_text'] if 'generated_text' in result else str(result)}"
+            else:
+                # Fallback to public API
+                return caption_image_public(image_content)
+        else:
+            # No API key, use public endpoint
+            return caption_image_public(image_content)
+    except Exception as e:
+        return f"Error generating caption: {str(e)}"
+def caption_image_public(image_content: bytes) -> str:
+    """Caption image using a public API endpoint."""
+    try:
+        # Convert to PIL image for processing
+        image = Image.open(BytesIO(image_content))
+        # Resize if too large (to avoid timeouts)
+        max_size = 1024
+        if max(image.size) > max_size:
+            ratio = max_size / max(image.size)
+            new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
+            image = image.resize(new_size, Image.LANCZOS)
+        # Convert back to bytes
+        buffer = BytesIO()
+        image.save(buffer, format="JPEG")
+        image_bytes = buffer.getvalue()
+        # Call public API
+        api_url = "https://api.toonify.photos/caption" # Example public API
+        files = {"image": ("image.jpg", image_bytes, "image/jpeg")}
+        response = requests.post(api_url, files=files, timeout=15)
+        if response.status_code == 200:
+            result = response.json()
+            return f"CAPTION: {result.get('caption', 'No caption generated')}"
+        else:
+            return "CAPTION: Could not generate caption (API error)"
+    except Exception as e:
+        return f"CAPTION: Image appears to be a {detect_simple_content(image_content)}"
+def tag_image(image_content: bytes) -> str:
+    """Generate tags for an image."""
+    try:
+        # Check if we have HF API key in environment
+        hf_api_key = os.getenv("HUGGINGFACE_API_TOKEN")
+        if hf_api_key:
+            # Use Hugging Face API for image tagging
+            api_url = "https://api-inference.huggingface.co/models/google/vit-base-patch16-224"
+            headers = {"Authorization": f"Bearer {hf_api_key}"}
+            # Send image as binary content
+            response = requests.post(api_url, headers=headers, data=image_content)
+            if response.status_code == 200:
+                tags = response.json()
+                # Format results
+                formatted_tags = "\n".join([f"- {tag['label']} ({tag['score']:.2%})" for tag in tags[:10]])
+                return f"TAGS:\n{formatted_tags}"
+            else:
+                # Fallback to basic detection
+                return f"TAGS:\n- {detect_simple_content(image_content)}"
+        else:
+            # No API key
+            return f"TAGS:\n- {detect_simple_content(image_content)}"
+    except Exception as e:
+        return f"Error generating tags: {str(e)}"
+def detect_objects(image_content: bytes) -> str:
+    """Detect objects in an image."""
+    try:
+        # Check if we have HF API key in environment
+        hf_api_key = os.getenv("HUGGINGFACE_API_TOKEN")
+        if hf_api_key:
+            # Use Hugging Face API for object detection
+            api_url = "https://api-inference.huggingface.co/models/facebook/detr-resnet-50"
+            headers = {"Authorization": f"Bearer {hf_api_key}"}
+            # Send image as binary content
+            response = requests.post(api_url, headers=headers, data=image_content)
+            if response.status_code == 200:
+                objects = response.json()
+                # Count objects by label
+                object_counts = {}
+                for obj in objects:
+                    label = obj["label"]
+                    if label in object_counts:
+                        object_counts[label] += 1
+                    else:
+                        object_counts[label] = 1
+                # Format results
+                formatted_objects = "\n".join([f"- {count}× {label}" for label, count in object_counts.items()])
+                return f"OBJECTS DETECTED:\n{formatted_objects}"
+            else:
+                return "OBJECTS: Could not detect objects (API error)"
+        else:
+            return "OBJECTS: API key required for object detection"
+    except Exception as e:
+        return f"Error detecting objects: {str(e)}"
+def detect_simple_content(image_content: bytes) -> str:
+    """Simple function to detect basic image type when APIs are not available."""
+    try:
+        image = Image.open(BytesIO(image_content))
+        width, height = image.size
+        aspect = width / height
+        # Very simple heuristics
+        if aspect > 2:
+            return "panorama or banner image"
+        elif aspect < 0.5:
+            return "tall or portrait image"
+        elif width < 300 or height < 300:
+            return "small thumbnail or icon"
+        else:
+            return "photograph or general image"
+    except:
+        return "image (could not analyze format)"
+import contextlib
+from io import StringIO
+@tool
+def python_repl(code: str) -> str:
+    """Execute Python code and return the result.
+    Args:
+        code: Python code to execute
+    """
+    if not code or not isinstance(code, str):
+        return "Error: Please provide valid Python code as a string."
+    try:
+        # Create a secure dict of globals with limited builtins
+        restricted_globals = {
+            "__builtins__": {
+                k: __builtins__[k] for k in [
+                    'abs', 'all', 'any', 'bool', 'chr', 'dict', 'dir', 'divmod',
+                    'enumerate', 'filter', 'float', 'format', 'frozenset', 'hash',
+                    'hex', 'int', 'isinstance', 'len', 'list', 'map', 'max',
+                    'min', 'oct', 'ord', 'pow', 'print', 'range', 'repr',
+                    'round', 'set', 'slice', 'sorted', 'str', 'sum', 'tuple', 'type', 'zip'
+                ] if k in __builtins__
+            }
+        }
+        # Add common math functions
+        import math
+        for name in ['sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'sqrt',
+                    'log', 'log10', 'exp', 'pi', 'e', 'ceil', 'floor', 'degrees', 'radians']:
+            if hasattr(math, name):
+                restricted_globals[name] = getattr(math, name)
+        # Local namespace for variables
+        local_vars = {}
+        # Capture stdout
+        stdout_capture = StringIO()
+        # Execute the code
+        with contextlib.redirect_stdout(stdout_capture):
+            try:
+                # Try to evaluate as an expression first
+                result = eval(code, restricted_globals, local_vars)
+                stdout_content = stdout_capture.getvalue().strip()
+                if stdout_content:
+                    return f"{stdout_content}\nResult: {result}"
+                return f"Result: {result}"
+            except SyntaxError:
+                # Not an expression, try executing as statements
+                exec(code, restricted_globals, local_vars)
+                stdout_content = stdout_capture.getvalue().strip()
+                if stdout_content:
+                    return stdout_content
+                return "Code executed successfully with no output."
+    except Exception as e:
+        return f"Error executing code: {type(e).__name__}: {str(e)}"

chain_of_thought.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from langgraph import Tool, Memory, Agent
+from langgraph.tools import WebSearchTool, CalculatorTool
+# Define tools the agent can use
+tools = [
+    WebSearchTool(name="web_search",
+                  description="Useful for searching the web"),
+    CalculatorTool(name="calculator",
+                   description="Useful for arithmetic calculations")
+]
+# Set up simple memory (e.g., conversation history)
+memory = Memory(max_tokens=500)
+# Create the agent with reasoning and action capabilities
+agent = Agent(
+    name="SimpleReasoningAgent",
+    tools=tools,
+    memory=memory,
+    reasoning_chain="explicit",  # use explicit chain-of-thought
+    action_threshold=0.7  # confidence threshold to trigger actions
+)
+if __name__ == "__main__":
+    print("Welcome to SimpleReasoningAgent! Type 'exit' to quit.")
+    while True:
+        user_input = input("\nUser: ")
+        if user_input.lower() in ("exit", "quit"):
+            print("Goodbye!")
+            break
+        # Agent processes input
+        response = agent.run(user_input)
+        print(f"Agent: {response}\n")

react_agent.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from basic_tools import *
+from langgraph.prebuilt import create_react_agent
+from utils import *
+from langchain_core.messages import SystemMessage, HumanMessage
+# Initial System message
+system_message = SystemMessage(content="You are a helpful assistant. You are free to utilize the tools present and give back proper answer")
+def main(search_query: str = "What is the capital of France?") -> None:
+    # Initialize the LLM (loaded from the lmstudio server running on localhost:1234)
+    llm = get_llm(provider="openai_local")
+    if llm:
+        web_search_tools = [multiply,
+                            multiply, add, subtract, divide, modulus,
+                            wiki_search, web_search, arxiv_search,
+                            python_repl, analyze_image,
+                            date_filter, analyze_content,
+                            step_by_step_reasoning, translate_text
+                            ]
+        # Create a langgraph react agent with the LLM and tools.
+        web_search_agent = create_react_agent(
+            name="Web Search Agent",
+            model=llm.bind(system_message=system_message),
+            tools=web_search_tools,
+            response_format={
+                "title": "SearchResults",
+                "description": "Structured JSON object with search results",
+                "type": "object",
+                "properties": {
+                    "results": {
+                        "type": "array",
+                        "items": {"type": "string"}
+                    }
+                },
+                "required": ["results"]
+            }
+        )
+        # Provide a complete conversation history containing both a system and an initial user message.
+        # This allows the agent to have a valid first user message. But the message can't be in the form of messages but should be in the form of a dict.
+        # input_payload = {
+        #     "messages": [
+        #         {"role": "system", "content": system_message.content},
+        #         {"role": "user", "content": f"{search_query}"}
+        #     ]
+        # }
+        input_payload = {"messages": [
+            system_message, HumanMessage(content=f"{search_query}")]}
+        results = web_search_agent.invoke(input_payload)
+        print(results)
+if __name__ == "__main__":
+    main("can you find out what is the best place to visit in  France")

reasoning_agent.py ADDED Viewed

	@@ -0,0 +1,340 @@

+"""
+Simple Reasoning and Action Agent using LangGraph and LangChain
+This agent follows a standard reasoning pattern:
+1. Think - Analyze the input and determine an approach
+2. Select - Choose appropriate tools from available options
+3. Act - Use the selected tools
+4. Observe - Review results
+5. Conclude - Generate final response
+"""
+import os
+from typing import Dict, List, Annotated, TypedDict, Union, Tuple, Any
+from langchain_core.tools import BaseTool
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.tools.render import format_tool_to_openai_function
+from langchain_openai import ChatOpenAI
+from langchain_core.pydantic_v1 import BaseModel, Field
+from langgraph.graph import StateGraph, END
+from langgraph.prebuilt import ToolNode
+from basic_tools import *
+from utils import *
+def get_available_tools():
+    tools = [multiply,
+             multiply, add, subtract, divide, modulus,
+             wiki_search, web_search, arxiv_search,
+             python_repl, analyze_image,
+             date_filter, analyze_content,
+             step_by_step_reasoning, translate_text
+             ]
+    return tools
+# Define the agent state
+class AgentState(TypedDict):
+    """State for the reasoning and action agent."""
+    messages: List[Union[AIMessage, HumanMessage, SystemMessage, ToolMessage]]
+    # We'll store intermediate steps of reasoning here
+    reasoning: List[str]
+    # Keep track of selected tools
+    selected_tools: List[str]
+    # Store tool results
+    tool_results: Dict[str, Any]
+#
+model = get_llm(provider="openai")
+# System prompts
+AGENT_SYSTEM_PROMPT = """You are a helpful reasoning and action agent.
+Your job is to:
+1. Carefully analyze the user's request
+2. Think through the problem step by step
+3. Select appropriate tools from your toolkit
+4. Use those tools to address the request
+5. Provide a clear, complete response
+Available tools:
+{tool_descriptions}
+When you need to use a tool, select the most appropriate one based on your reasoning.
+Always show your reasoning process clearly.
+"""
+# ============= Node Functions =============
+def think(state: AgentState) -> AgentState:
+    """Think through the problem and analyze the user request."""
+    # Extract the user's most recent message
+    user_message = state["messages"][-1]
+    if not isinstance(user_message, HumanMessage):
+        # If the last message isn't from the user, find the most recent one
+        for msg in reversed(state["messages"]):
+            if isinstance(msg, HumanMessage):
+                user_message = msg
+                break
+    # Create a prompt for thinking
+    think_prompt = ChatPromptTemplate.from_messages([
+        SystemMessage(
+            content="You are analyzing a user request. Think step by step about what the user is asking for and what approach would be best."),
+        ("user", "{input}")
+    ])
+    # Generate thinking output
+    think_response = model.invoke(
+        think_prompt.format_messages(input=user_message.content)
+    )
+    # Update state with reasoning
+    reasoning = think_response.content
+    state["reasoning"] = state.get("reasoning", []) + [reasoning]
+    return state
+def select_tools(state: AgentState) -> AgentState:
+    """Select appropriate tools based on the reasoning."""
+    # Get available tools
+    tools = get_available_tools()
+    tool_descriptions = "\n".join(
+        [f"- {tool.name}: {tool.description}" for tool in tools])
+    # Create a prompt for tool selection
+    select_prompt = ChatPromptTemplate.from_messages([
+        SystemMessage(content=f"""Based on your analysis, select which tools would be most helpful for this task.
+Available tools:
+{tool_descriptions}
+Return your selection as a comma-separated list of tool names, e.g., "calculator,web_search".
+Only include tools that are actually needed for this specific request."""),
+        ("user", "{reasoning}")
+    ])
+    # Generate tool selection output
+    select_response = model.invoke(
+        select_prompt.format_messages(reasoning=state["reasoning"][-1])
+    )
+    # Parse the selected tools
+    selected_tools = [
+        tool_name.strip()
+        for tool_name in select_response.content.split(',')
+    ]
+    # Filter to ensure only valid tools are selected
+    valid_tool_names = [tool.name for tool in tools]
+    selected_tools = [
+        tool for tool in selected_tools if tool in valid_tool_names]
+    # Update state with selected tools
+    state["selected_tools"] = selected_tools
+    # Add a single AIMessage with all tool calls (if any tools selected)
+    if selected_tools:
+        tool_calls = [
+            {"id": f"call_{i}", "name": tool_name, "args": {}}
+            for i, tool_name in enumerate(selected_tools)
+        ]
+        state["messages"].append(
+            AIMessage(
+                content="",
+                tool_calls=tool_calls
+            )
+        )
+    return state
+# def execute_tools(state: AgentState) -> AgentState:
+#     """Execute the selected tools."""
+#     # Get all available tools
+#     all_tools = get_available_tools()
+#     # Filter to only use selected tools
+#     selected_tool_names = state["selected_tools"]
+#     tools_to_use = [
+#         tool for tool in all_tools if tool.name in selected_tool_names]
+#     # Create tool executor
+#     tool_executor = ToolExecutor(tools_to_use)
+#     # Get the most recent reasoning
+#     reasoning = state["reasoning"][-1]
+#     # For each tool, generate a specific input and execute
+#     tool_results = {}
+#     for tool in tools_to_use:
+#         # Create prompt for generating tool input
+#         tool_input_prompt = ChatPromptTemplate.from_messages([
+#             SystemMessage(content=f"""Generate a specific input for the following tool:
+# Tool: {tool.name}
+# Description: {tool.description}
+# The input should be formatted according to the tool's requirements and contain all necessary information.
+# Return only the exact input string that should be passed to the tool, nothing else."""),
+#             ("user", "{reasoning}")
+#         ])
+#         # Generate specific input for this tool
+#         tool_input_response = model.invoke(
+#             tool_input_prompt.format_messages(reasoning=reasoning)
+#         )
+#         tool_input = tool_input_response.content.strip()
+#         try:
+#             # Execute the tool with the generated input
+#             result = tool_executor.invoke({tool.name: tool_input})
+#             tool_results[tool.name] = result[tool.name]
+#             # Add tool message to conversation
+#             state["messages"].append(
+#                 ToolMessage(content=str(result[tool.name]), name=tool.name)
+#             )
+#         except Exception as e:
+#             # Handle errors
+#             tool_results[tool.name] = f"Error executing tool: {str(e)}"
+#             state["messages"].append(
+#                 ToolMessage(
+#                     content=f"Error executing tool: {str(e)}", name=tool.name)
+#             )
+#     # Update state with tool results
+#     state["tool_results"] = tool_results
+#     return state
+def generate_response(state: AgentState) -> AgentState:
+    """Generate a final response based on reasoning and tool outputs."""
+    # Prepare the context for response generation
+    tool_outputs = "\n".join([
+        f"{tool_name}: {result}"
+        for tool_name, result in state.get("tool_results", {}).items()
+    ])
+    # Create prompt for response generation
+    response_prompt = ChatPromptTemplate.from_messages([
+        SystemMessage(content="""Generate a helpful response to the user based on your reasoning and tool outputs.
+Be thorough but concise. Focus on directly answering the user's request.
+If tools provided relevant information, incorporate it into your response."""),
+        ("user",
+         "User request: {user_request}\n\nReasoning: {reasoning}\n\nTool outputs: {tool_outputs}")
+    ])
+    # Get original user request
+    user_request = None
+    for msg in reversed(state["messages"]):
+        if isinstance(msg, HumanMessage):
+            user_request = msg.content
+            break
+    # Generate final response
+    response = model.invoke(
+        response_prompt.format_messages(
+            user_request=user_request,
+            reasoning=state["reasoning"][-1],
+            tool_outputs=tool_outputs
+        )
+    )
+    # Add the AI response to messages
+    state["messages"].append(AIMessage(content=response.content))
+    return state
+# ============= Graph Definition =============
+def create_agent_graph():
+    """Create and configure the agent graph."""
+    graph = StateGraph(AgentState)
+    graph.add_node("think", think)
+    graph.add_node("select_tools", select_tools)
+    tools = get_available_tools()
+    tool_node = ToolNode(tools)
+    graph.add_node("execute_tools", tool_node)
+    graph.add_node("generate_response", generate_response)
+    # Conditional edge: if no tools, skip execute_tools
+    def select_tools_next(state: AgentState):
+        if state["selected_tools"]:
+            return "execute_tools"
+        else:
+            return "generate_response"
+    graph.add_edge("think", "select_tools")
+    graph.add_conditional_edges("select_tools", select_tools_next)
+    graph.add_edge("execute_tools", "generate_response")
+    graph.add_edge("generate_response", END)
+    graph.set_entry_point("think")
+    return graph.compile()
+# ============= Agent Interface =============
+class ReasoningAgent:
+    """Reasoning and action agent main class."""
+    def __init__(self):
+        self.graph = create_agent_graph()
+        # Initialize with system prompt
+        tools = get_available_tools()
+        tool_descriptions = "\n".join(
+            [f"- {tool.name}: {tool.description}" for tool in tools])
+        self.messages = [
+            SystemMessage(content=AGENT_SYSTEM_PROMPT.format(
+                tool_descriptions=tool_descriptions))
+        ]
+    def invoke(self, user_input: str) -> str:
+        """Process user input and return response."""
+        # Add user message to history
+        self.messages.append(HumanMessage(content=user_input))
+        # Initialize state
+        state = {"messages": self.messages, "reasoning": [],
+                 "selected_tools": [], "tool_results": {}}
+        # Run the graph
+        result = self.graph.invoke(state)
+        # Update messages
+        self.messages = result["messages"]
+        # Return the last AI message
+        for msg in reversed(result["messages"]):
+            if isinstance(msg, AIMessage):
+                return msg.content
+        # Fallback
+        return "I encountered an issue processing your request."
+    def __call__(self,*args, **kwargs):
+        """Invoke the agent with user input."""
+        return self.invoke(*args, **kwargs)
+# Sample usage
+if __name__ == "__main__":
+    agent = ReasoningAgent()
+    response = agent.invoke(
+        "What's the weather in New York today and should I take an umbrella?")
+    print(response)

system_prompt.txt CHANGED Viewed

@@ -1,17 +1,43 @@
-You are a helpful assistant tasked with answering questions using a set of tools.
-Your final answer must strictly follow this format:
-FINAL ANSWER: [ANSWER]
-Only write the answer in that exact format. Do not explain anything. Do not include any other text.
-If you are provided with a similar question and its final answer, and the current question is **exactly the same**, then simply return the same final answer without using any tools.
-Only use tools if the current question is different from the similar one.
-Examples:
-- FINAL ANSWER: FunkMonk
-- FINAL ANSWER: Paris
-- FINAL ANSWER: 128
-If you do not follow this format exactly, your response will be considered incorrect.

+You are an expert research and analysis assistant with access to specialized tools. Follow these instructions precisely:
+STEP 1: ANALYZE THE QUESTION CAREFULLY
+Before selecting tools, understand exactly what information is needed.
+STEP 2: SELECT THE APPROPRIATE TOOLS
+Choose tools based on what information you need:
+SEARCH TOOLS:
+- wiki_search: Get encyclopedia facts using specific queries
+  Example: {"name": "wiki_search", "parameters": {"query": "Mercedes Sosa discography"}}
+- web_search: For current information and detailed explanations
+- arxiv_search: For academic papers and research
+ANALYSIS TOOLS:
+- analyze_discography: Find albums by an artist in a specific time period
+  Example: {"name": "analyze_discography", "parameters": {"content": "...", "artist_name": "Mercedes Sosa", "start_year": 2000, "end_year": 2009}}
+- date_filter: Extract content only from a specific time period
+- analyze_content: Extract specific types of information (dates, names, numbers, events)
+- step_by_step_reasoning: Break down complex problems into logical steps
+MEDIA TOOLS:
+- youtube_transcript: Extract and optionally summarize video content
+  Example: {"name": "youtube_transcript", "parameters": {"url": "https://www.youtube.com/watch?v=abc123", "summarize": true}}
+LANGUAGE TOOLS:
+- translate_text: Translate content to another language
+MATH TOOLS:
+- add, subtract, multiply, divide, modulus: For calculations
+REQUEST TOOLS:
+- requests_get: Make HTTP GET requests to external APIs
+- requests_post: Make HTTP POST requests to external APIs
+STEP 3: USE TOOLS WITH ALL REQUIRED PARAMETERS
+Every tool requires specific parameters - never call a tool without all required parameters.
+STEP 4: PROVIDE YOUR FINAL ANSWER
+After gathering information with tools, provide your answer:
+FINAL ANSWER: [Your concise, factual answer]
+Remember: If you're asked about albums, songs, or artists in specific time periods, use wiki_search first, then analyze_discography with appropriate date parameters.

tool_calling_agent.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from smolagents import ToolCallingAgent
+from utils import *
+from basic_tools import *
+from smolagents.tools import Tool
+langchain_tools = [multiply,
+         multiply, add, subtract, divide, modulus,
+         wiki_search, web_search, arxiv_search,
+         python_repl, analyze_image,
+         date_filter, analyze_content,
+         step_by_step_reasoning, translate_text
+         ]
+tools = [Tool.from_langchain(tool) for tool in langchain_tools]
+agent = ToolCallingAgent(
+    model = get_llm(),
+    tools = tools,)

utils.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace, HuggingFaceEmbeddings
+import torch
+import os
+from langchain_openai import ChatOpenAI
+from langchain_groq import ChatGroq
+from langchain.chat_models.base import BaseChatModel
+from langchain_chroma import Chroma
+def get_llm(provider: str = "groq") -> BaseChatModel:
+       # Load environment variables from .env file
+    if provider == "groq":
+        # Groq https://console.groq.com/docs/models
+        # optional : qwen-qwq-32b gemma2-9b-it
+        llm = ChatGroq(model="qwen-qwq-32b", temperature=0)
+    elif provider == "huggingface":
+        # TODO: Add huggingface endpoint
+        llm = ChatHuggingFace(
+            llm=HuggingFaceEndpoint(
+                model="Meta-DeepLearning/llama-2-7b-chat-hf",
+                temperature=0,
+            ),
+        )
+    elif provider == "openai_local":
+        from langchain_openai import ChatOpenAI
+        llm = ChatOpenAI(
+            base_url="http://localhost:11432/v1",  # default LM Studio endpoint
+            api_key="not-used",  # required by interface but ignored #type: ignore
+            # model="mistral-nemo-instruct-2407",
+            model="mistral-nemo-instruct-2407",
+            temperature=0.2
+        )
+    elif provider == "openai":
+        from langchain_openai import ChatOpenAI
+        llm = ChatOpenAI(
+            model="gpt-4o",
+            temperature=0.2,
+        )
+    else:
+        raise ValueError(
+            "Invalid provider. Choose 'groq' or 'huggingface'.")
+    return llm
+embeddings = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-mpnet-base-v2",
+    model_kwargs={"device": "gpu" if torch.cuda.is_available() else "cpu",
+                  "token": os.getenv("HF_TOKEN")},
+    show_progress=True,
+)
+# Initialize empty Chroma vector store
+vector_store = Chroma(
+    embedding_function=embeddings,
+    collection_name="agent_memory"
+)