""" SEC Edgar Filings Search Tool for financial and company data """ from .base_tool import BaseTool import requests import json import re from typing import Dict, List, Optional class SECSearchTool(BaseTool): """Search SEC EDGAR filings for company financial information""" def __init__(self): super().__init__("SEC EDGAR", "Search SEC filings and financial data for public companies") self.base_url = "https://data.sec.gov" self.headers = { 'User-Agent': 'Research Tool research@academic.edu', # SEC requires User-Agent 'Accept-Encoding': 'gzip, deflate' } self.rate_limit_delay = 3.0 # SEC is strict about rate limiting def search(self, company_name: str, **kwargs) -> str: """Search SEC filings for company information""" self.rate_limit() try: # First attempt to find company CIK cik_data = self._find_company_cik(company_name) if not cik_data: return self._fallback_company_search(company_name) # Get company submissions submissions = self._get_company_submissions(cik_data['cik']) if submissions: return self._format_sec_results(company_name, cik_data, submissions) else: return self._fallback_company_search(company_name) except requests.RequestException as e: # Handle network errors gracefully if "404" in str(e): return self._fallback_company_search(company_name) return self.format_error_response(company_name, f"Network error accessing SEC: {str(e)}") except Exception as e: return self.format_error_response(company_name, str(e)) def _find_company_cik(self, company_name: str) -> Optional[Dict]: """Find company CIK (Central Index Key) from company name""" try: # Use the correct SEC company tickers endpoint tickers_url = "https://www.sec.gov/files/company_tickers_exchange.json" response = requests.get(tickers_url, headers=self.headers, timeout=15) response.raise_for_status() tickers_data = response.json() # Search for company by name (fuzzy matching) company_lower = company_name.lower() # Handle the exchange data format if isinstance(tickers_data, dict): # Check if it's the fields/data format if 'fields' in tickers_data and 'data' in tickers_data: return self._search_exchange_format(tickers_data, company_lower) else: # Try direct dictionary format return self._search_direct_format(tickers_data, company_lower) elif isinstance(tickers_data, list): # Handle list format return self._search_list_format(tickers_data, company_lower) return None except Exception as e: print(f"Error finding company CIK: {e}") return self._fallback_company_lookup(company_name) def _fallback_company_lookup(self, company_name: str) -> Optional[Dict]: """Fallback company lookup using known major companies""" # Hardcoded CIKs for major companies for testing/demo purposes known_companies = { 'apple': {'cik': '0000320193', 'ticker': 'AAPL', 'title': 'Apple Inc.'}, 'microsoft': {'cik': '0000789019', 'ticker': 'MSFT', 'title': 'Microsoft Corporation'}, 'tesla': {'cik': '0001318605', 'ticker': 'TSLA', 'title': 'Tesla, Inc.'}, 'amazon': {'cik': '0001018724', 'ticker': 'AMZN', 'title': 'Amazon.com, Inc.'}, 'google': {'cik': '0001652044', 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'}, 'alphabet': {'cik': '0001652044', 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'}, 'meta': {'cik': '0001326801', 'ticker': 'META', 'title': 'Meta Platforms, Inc.'}, 'facebook': {'cik': '0001326801', 'ticker': 'META', 'title': 'Meta Platforms, Inc.'}, 'nvidia': {'cik': '0001045810', 'ticker': 'NVDA', 'title': 'NVIDIA Corporation'}, 'netflix': {'cik': '0001065280', 'ticker': 'NFLX', 'title': 'Netflix, Inc.'} } company_key = company_name.lower().strip() for key, data in known_companies.items(): if key in company_key or company_key in key: return data return None def _search_exchange_format(self, tickers_data: dict, company_lower: str) -> Optional[Dict]: """Search in exchange ticker data format""" try: fields = tickers_data.get('fields', []) data = tickers_data.get('data', []) # Find field indices cik_idx = None ticker_idx = None name_idx = None for i, field in enumerate(fields): if field.lower() in ['cik', 'cik_str']: cik_idx = i elif field.lower() in ['ticker', 'symbol']: ticker_idx = i elif field.lower() in ['name', 'title', 'company']: name_idx = i # Search through data for row in data: if len(row) > max(filter(None, [cik_idx, ticker_idx, name_idx])): name = str(row[name_idx]).lower() if name_idx is not None else "" ticker = str(row[ticker_idx]).lower() if ticker_idx is not None else "" if (company_lower in name or name in company_lower or company_lower == ticker or any(word in name for word in company_lower.split() if len(word) > 3)): cik = str(row[cik_idx]) if cik_idx is not None else "" return { 'cik': cik.zfill(10), 'ticker': row[ticker_idx] if ticker_idx is not None else "", 'title': row[name_idx] if name_idx is not None else "" } except (ValueError, IndexError) as e: print(f"Error parsing exchange format: {e}") return None def _search_direct_format(self, tickers_data: dict, company_lower: str) -> Optional[Dict]: """Search in direct dictionary format""" for key, entry in tickers_data.items(): if isinstance(entry, dict): title = entry.get('title', entry.get('name', '')).lower() ticker = entry.get('ticker', entry.get('symbol', '')).lower() if (company_lower in title or title in company_lower or company_lower == ticker or any(word in title for word in company_lower.split() if len(word) > 3)): return { 'cik': str(entry.get('cik_str', entry.get('cik', ''))).zfill(10), 'ticker': entry.get('ticker', entry.get('symbol', '')), 'title': entry.get('title', entry.get('name', '')) } return None def _search_list_format(self, tickers_data: list, company_lower: str) -> Optional[Dict]: """Search in list format""" for entry in tickers_data: if isinstance(entry, dict): title = entry.get('title', entry.get('name', '')).lower() ticker = entry.get('ticker', entry.get('symbol', '')).lower() if (company_lower in title or title in company_lower or company_lower == ticker or any(word in title for word in company_lower.split() if len(word) > 3)): return { 'cik': str(entry.get('cik_str', entry.get('cik', ''))).zfill(10), 'ticker': entry.get('ticker', entry.get('symbol', '')), 'title': entry.get('title', entry.get('name', '')) } return None def _get_company_submissions(self, cik: str) -> Optional[Dict]: """Get company submission data from SEC""" try: submissions_url = f"{self.base_url}/submissions/CIK{cik}.json" response = requests.get(submissions_url, headers=self.headers, timeout=15) response.raise_for_status() return response.json() except Exception as e: print(f"Error getting company submissions: {e}") return None def _format_sec_results(self, company_name: str, cik_data: Dict, submissions: Dict) -> str: """Format SEC filing results""" result = f"**SEC Financial Data for: {company_name}**\n\n" # Company information result += f"**Company Information:**\n" result += f"• Official Name: {cik_data['title']}\n" result += f"• Ticker Symbol: {cik_data.get('ticker', 'N/A')}\n" result += f"• CIK: {cik_data['cik']}\n" # Business information if 'description' in submissions: business_desc = submissions['description'][:300] + "..." if len(submissions.get('description', '')) > 300 else submissions.get('description', 'Not available') result += f"• Business Description: {business_desc}\n" result += f"• Industry: {submissions.get('sic', 'Not specified')}\n" result += f"• Fiscal Year End: {submissions.get('fiscalYearEnd', 'Not specified')}\n\n" # Recent filings analysis recent_filings = self._analyze_recent_filings(submissions) result += recent_filings # Financial highlights financial_highlights = self._extract_financial_highlights(submissions) result += financial_highlights return result def _analyze_recent_filings(self, submissions: Dict) -> str: """Analyze recent SEC filings""" result = "**Recent SEC Filings:**\n" # Get recent filings recent_filings = submissions.get('filings', {}).get('recent', {}) if not recent_filings: return result + "• No recent filings available\n\n" forms = recent_filings.get('form', []) filing_dates = recent_filings.get('filingDate', []) accession_numbers = recent_filings.get('accessionNumber', []) # Analyze key filing types key_forms = ['10-K', '10-Q', '8-K', 'DEF 14A'] recent_key_filings = [] for i, form in enumerate(forms[:20]): # Check last 20 filings if form in key_forms and i < len(filing_dates): recent_key_filings.append({ 'form': form, 'date': filing_dates[i], 'accession': accession_numbers[i] if i < len(accession_numbers) else 'N/A' }) if recent_key_filings: for filing in recent_key_filings[:5]: # Show top 5 form_description = { '10-K': 'Annual Report', '10-Q': 'Quarterly Report', '8-K': 'Current Report', 'DEF 14A': 'Proxy Statement' }.get(filing['form'], filing['form']) result += f"• {filing['form']} ({form_description}) - Filed: {filing['date']}\n" else: result += "• No key financial filings found in recent submissions\n" result += "\n" return result def _extract_financial_highlights(self, submissions: Dict) -> str: """Extract financial highlights from submission data""" result = "**Financial Data Analysis:**\n" # This is a simplified version - full implementation would parse actual financial data result += "• Filing Status: Active public company\n" result += "• Regulatory Compliance: Current with SEC requirements\n" # Check for recent financial filings recent_filings = submissions.get('filings', {}).get('recent', {}) if recent_filings: forms = recent_filings.get('form', []) annual_reports = sum(1 for form in forms if form == '10-K') quarterly_reports = sum(1 for form in forms if form == '10-Q') result += f"• Annual Reports (10-K): {annual_reports} on file\n" result += f"• Quarterly Reports (10-Q): {quarterly_reports} on file\n" result += "• Note: Detailed financial metrics require parsing individual filing documents\n\n" result += "**Investment Research Notes:**\n" result += "• Use SEC filings for: revenue trends, risk factors, management discussion\n" result += "• Key documents: 10-K (annual), 10-Q (quarterly), 8-K (material events)\n" result += "• Combine with market data for comprehensive analysis\n\n" return result def _fallback_company_search(self, company_name: str) -> str: """Fallback response when company not found in SEC database""" result = f"**SEC Financial Research for: {company_name}**\n\n" result += f"**Company Search Results:**\n" result += f"• Company '{company_name}' not found in SEC EDGAR database\n" result += f"• This may indicate the company is:\n" result += f" - Private company (not required to file with SEC)\n" result += f" - Foreign company not listed on US exchanges\n" result += f" - Subsidiary of another public company\n" result += f" - Different legal name than search term\n\n" result += f"**Alternative Research Suggestions:**\n" result += f"• Search for parent company or holding company\n" result += f"• Check if company trades under different ticker symbol\n" result += f"• Use company's full legal name for search\n" result += f"• Consider private company databases for non-public entities\n\n" return result def should_use_for_query(self, query: str) -> bool: """SEC is good for public company financial and business information""" financial_indicators = [ 'company', 'financial', 'revenue', 'earnings', 'profit', 'stock', 'investment', 'market cap', 'sec filing', 'annual report', 'quarterly', 'balance sheet', 'income statement', 'cash flow', 'public company', 'ticker', 'investor', 'shareholder' ] query_lower = query.lower() return any(indicator in query_lower for indicator in financial_indicators) def extract_key_info(self, text: str) -> dict: """Extract key information from SEC results""" base_info = super().extract_key_info(text) if text: # Look for SEC-specific patterns base_info.update({ 'has_ticker': any(pattern in text for pattern in ['Ticker Symbol:', 'ticker']), 'has_cik': 'CIK:' in text, 'has_filings': any(form in text for form in ['10-K', '10-Q', '8-K']), 'is_public_company': 'public company' in text.lower(), 'has_financial_data': any(term in text.lower() for term in ['revenue', 'earnings', 'financial']), 'company_found': 'not found in SEC' not in text }) return base_info