File size: 15,950 Bytes
ce0bf87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
"""
SEC Edgar Filings Search Tool for financial and company data
"""
from .base_tool import BaseTool
import requests
import json
import re
from typing import Dict, List, Optional


class SECSearchTool(BaseTool):
    """Search SEC EDGAR filings for company financial information"""
    
    def __init__(self):
        super().__init__("SEC EDGAR", "Search SEC filings and financial data for public companies")
        self.base_url = "https://data.sec.gov"
        self.headers = {
            'User-Agent': 'Research Tool [email protected]',  # SEC requires User-Agent
            'Accept-Encoding': 'gzip, deflate'
        }
        self.rate_limit_delay = 3.0  # SEC is strict about rate limiting
    
    def search(self, company_name: str, **kwargs) -> str:
        """Search SEC filings for company information"""
        self.rate_limit()
        
        try:
            # First attempt to find company CIK
            cik_data = self._find_company_cik(company_name)
            
            if not cik_data:
                return self._fallback_company_search(company_name)
            
            # Get company submissions
            submissions = self._get_company_submissions(cik_data['cik'])
            
            if submissions:
                return self._format_sec_results(company_name, cik_data, submissions)
            else:
                return self._fallback_company_search(company_name)
                
        except requests.RequestException as e:
            # Handle network errors gracefully
            if "404" in str(e):
                return self._fallback_company_search(company_name)
            return self.format_error_response(company_name, f"Network error accessing SEC: {str(e)}")
        except Exception as e:
            return self.format_error_response(company_name, str(e))
    
    def _find_company_cik(self, company_name: str) -> Optional[Dict]:
        """Find company CIK (Central Index Key) from company name"""
        try:
            # Use the correct SEC company tickers endpoint
            tickers_url = "https://www.sec.gov/files/company_tickers_exchange.json"
            response = requests.get(tickers_url, headers=self.headers, timeout=15)
            response.raise_for_status()
            
            tickers_data = response.json()
            
            # Search for company by name (fuzzy matching)
            company_lower = company_name.lower()
            
            # Handle the exchange data format
            if isinstance(tickers_data, dict):
                # Check if it's the fields/data format
                if 'fields' in tickers_data and 'data' in tickers_data:
                    return self._search_exchange_format(tickers_data, company_lower)
                else:
                    # Try direct dictionary format
                    return self._search_direct_format(tickers_data, company_lower)
            elif isinstance(tickers_data, list):
                # Handle list format
                return self._search_list_format(tickers_data, company_lower)
            
            return None
            
        except Exception as e:
            print(f"Error finding company CIK: {e}")
            return self._fallback_company_lookup(company_name)
    
    def _fallback_company_lookup(self, company_name: str) -> Optional[Dict]:
        """Fallback company lookup using known major companies"""
        # Hardcoded CIKs for major companies for testing/demo purposes
        known_companies = {
            'apple': {'cik': '0000320193', 'ticker': 'AAPL', 'title': 'Apple Inc.'},
            'microsoft': {'cik': '0000789019', 'ticker': 'MSFT', 'title': 'Microsoft Corporation'},
            'tesla': {'cik': '0001318605', 'ticker': 'TSLA', 'title': 'Tesla, Inc.'},
            'amazon': {'cik': '0001018724', 'ticker': 'AMZN', 'title': 'Amazon.com, Inc.'},
            'google': {'cik': '0001652044', 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'},
            'alphabet': {'cik': '0001652044', 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'},
            'meta': {'cik': '0001326801', 'ticker': 'META', 'title': 'Meta Platforms, Inc.'},
            'facebook': {'cik': '0001326801', 'ticker': 'META', 'title': 'Meta Platforms, Inc.'},
            'nvidia': {'cik': '0001045810', 'ticker': 'NVDA', 'title': 'NVIDIA Corporation'},
            'netflix': {'cik': '0001065280', 'ticker': 'NFLX', 'title': 'Netflix, Inc.'}
        }
        
        company_key = company_name.lower().strip()
        for key, data in known_companies.items():
            if key in company_key or company_key in key:
                return data
        
        return None
    
    def _search_exchange_format(self, tickers_data: dict, company_lower: str) -> Optional[Dict]:
        """Search in exchange ticker data format"""
        try:
            fields = tickers_data.get('fields', [])
            data = tickers_data.get('data', [])
            
            # Find field indices
            cik_idx = None
            ticker_idx = None
            name_idx = None
            
            for i, field in enumerate(fields):
                if field.lower() in ['cik', 'cik_str']:
                    cik_idx = i
                elif field.lower() in ['ticker', 'symbol']:
                    ticker_idx = i
                elif field.lower() in ['name', 'title', 'company']:
                    name_idx = i
            
            # Search through data
            for row in data:
                if len(row) > max(filter(None, [cik_idx, ticker_idx, name_idx])):
                    name = str(row[name_idx]).lower() if name_idx is not None else ""
                    ticker = str(row[ticker_idx]).lower() if ticker_idx is not None else ""
                    
                    if (company_lower in name or 
                        name in company_lower or
                        company_lower == ticker or
                        any(word in name for word in company_lower.split() if len(word) > 3)):
                        
                        cik = str(row[cik_idx]) if cik_idx is not None else ""
                        return {
                            'cik': cik.zfill(10),
                            'ticker': row[ticker_idx] if ticker_idx is not None else "",
                            'title': row[name_idx] if name_idx is not None else ""
                        }
            
        except (ValueError, IndexError) as e:
            print(f"Error parsing exchange format: {e}")
        
        return None
    
    def _search_direct_format(self, tickers_data: dict, company_lower: str) -> Optional[Dict]:
        """Search in direct dictionary format"""
        for key, entry in tickers_data.items():
            if isinstance(entry, dict):
                title = entry.get('title', entry.get('name', '')).lower()
                ticker = entry.get('ticker', entry.get('symbol', '')).lower()
                
                if (company_lower in title or 
                    title in company_lower or
                    company_lower == ticker or
                    any(word in title for word in company_lower.split() if len(word) > 3)):
                    
                    return {
                        'cik': str(entry.get('cik_str', entry.get('cik', ''))).zfill(10),
                        'ticker': entry.get('ticker', entry.get('symbol', '')),
                        'title': entry.get('title', entry.get('name', ''))
                    }
        return None
    
    def _search_list_format(self, tickers_data: list, company_lower: str) -> Optional[Dict]:
        """Search in list format"""
        for entry in tickers_data:
            if isinstance(entry, dict):
                title = entry.get('title', entry.get('name', '')).lower()
                ticker = entry.get('ticker', entry.get('symbol', '')).lower()
                
                if (company_lower in title or 
                    title in company_lower or
                    company_lower == ticker or
                    any(word in title for word in company_lower.split() if len(word) > 3)):
                    
                    return {
                        'cik': str(entry.get('cik_str', entry.get('cik', ''))).zfill(10),
                        'ticker': entry.get('ticker', entry.get('symbol', '')),
                        'title': entry.get('title', entry.get('name', ''))
                    }
        return None
    
    def _get_company_submissions(self, cik: str) -> Optional[Dict]:
        """Get company submission data from SEC"""
        try:
            submissions_url = f"{self.base_url}/submissions/CIK{cik}.json"
            response = requests.get(submissions_url, headers=self.headers, timeout=15)
            response.raise_for_status()
            
            return response.json()
            
        except Exception as e:
            print(f"Error getting company submissions: {e}")
            return None
    
    def _format_sec_results(self, company_name: str, cik_data: Dict, submissions: Dict) -> str:
        """Format SEC filing results"""
        result = f"**SEC Financial Data for: {company_name}**\n\n"
        
        # Company information
        result += f"**Company Information:**\n"
        result += f"• Official Name: {cik_data['title']}\n"
        result += f"• Ticker Symbol: {cik_data.get('ticker', 'N/A')}\n"
        result += f"• CIK: {cik_data['cik']}\n"
        
        # Business information
        if 'description' in submissions:
            business_desc = submissions['description'][:300] + "..." if len(submissions.get('description', '')) > 300 else submissions.get('description', 'Not available')
            result += f"• Business Description: {business_desc}\n"
        
        result += f"• Industry: {submissions.get('sic', 'Not specified')}\n"
        result += f"• Fiscal Year End: {submissions.get('fiscalYearEnd', 'Not specified')}\n\n"
        
        # Recent filings analysis
        recent_filings = self._analyze_recent_filings(submissions)
        result += recent_filings
        
        # Financial highlights
        financial_highlights = self._extract_financial_highlights(submissions)
        result += financial_highlights
        
        return result
    
    def _analyze_recent_filings(self, submissions: Dict) -> str:
        """Analyze recent SEC filings"""
        result = "**Recent SEC Filings:**\n"
        
        # Get recent filings
        recent_filings = submissions.get('filings', {}).get('recent', {})
        
        if not recent_filings:
            return result + "• No recent filings available\n\n"
        
        forms = recent_filings.get('form', [])
        filing_dates = recent_filings.get('filingDate', [])
        accession_numbers = recent_filings.get('accessionNumber', [])
        
        # Analyze key filing types
        key_forms = ['10-K', '10-Q', '8-K', 'DEF 14A']
        recent_key_filings = []
        
        for i, form in enumerate(forms[:20]):  # Check last 20 filings
            if form in key_forms and i < len(filing_dates):
                recent_key_filings.append({
                    'form': form,
                    'date': filing_dates[i],
                    'accession': accession_numbers[i] if i < len(accession_numbers) else 'N/A'
                })
        
        if recent_key_filings:
            for filing in recent_key_filings[:5]:  # Show top 5
                form_description = {
                    '10-K': 'Annual Report',
                    '10-Q': 'Quarterly Report', 
                    '8-K': 'Current Report',
                    'DEF 14A': 'Proxy Statement'
                }.get(filing['form'], filing['form'])
                
                result += f"• {filing['form']} ({form_description}) - Filed: {filing['date']}\n"
        else:
            result += "• No key financial filings found in recent submissions\n"
        
        result += "\n"
        return result
    
    def _extract_financial_highlights(self, submissions: Dict) -> str:
        """Extract financial highlights from submission data"""
        result = "**Financial Data Analysis:**\n"
        
        # This is a simplified version - full implementation would parse actual financial data
        result += "• Filing Status: Active public company\n"
        result += "• Regulatory Compliance: Current with SEC requirements\n"
        
        # Check for recent financial filings
        recent_filings = submissions.get('filings', {}).get('recent', {})
        if recent_filings:
            forms = recent_filings.get('form', [])
            annual_reports = sum(1 for form in forms if form == '10-K')
            quarterly_reports = sum(1 for form in forms if form == '10-Q')
            
            result += f"• Annual Reports (10-K): {annual_reports} on file\n"
            result += f"• Quarterly Reports (10-Q): {quarterly_reports} on file\n"
        
        result += "• Note: Detailed financial metrics require parsing individual filing documents\n\n"
        
        result += "**Investment Research Notes:**\n"
        result += "• Use SEC filings for: revenue trends, risk factors, management discussion\n"
        result += "• Key documents: 10-K (annual), 10-Q (quarterly), 8-K (material events)\n"
        result += "• Combine with market data for comprehensive analysis\n\n"
        
        return result
    
    def _fallback_company_search(self, company_name: str) -> str:
        """Fallback response when company not found in SEC database"""
        result = f"**SEC Financial Research for: {company_name}**\n\n"
        result += f"**Company Search Results:**\n"
        result += f"• Company '{company_name}' not found in SEC EDGAR database\n"
        result += f"• This may indicate the company is:\n"
        result += f"  - Private company (not required to file with SEC)\n"
        result += f"  - Foreign company not listed on US exchanges\n"
        result += f"  - Subsidiary of another public company\n"
        result += f"  - Different legal name than search term\n\n"
        
        result += f"**Alternative Research Suggestions:**\n"
        result += f"• Search for parent company or holding company\n"
        result += f"• Check if company trades under different ticker symbol\n"
        result += f"• Use company's full legal name for search\n"
        result += f"• Consider private company databases for non-public entities\n\n"
        
        return result
    
    def should_use_for_query(self, query: str) -> bool:
        """SEC is good for public company financial and business information"""
        financial_indicators = [
            'company', 'financial', 'revenue', 'earnings', 'profit', 'stock',
            'investment', 'market cap', 'sec filing', 'annual report',
            'quarterly', 'balance sheet', 'income statement', 'cash flow',
            'public company', 'ticker', 'investor', 'shareholder'
        ]
        
        query_lower = query.lower()
        return any(indicator in query_lower for indicator in financial_indicators)
    
    def extract_key_info(self, text: str) -> dict:
        """Extract key information from SEC results"""
        base_info = super().extract_key_info(text)
        
        if text:
            # Look for SEC-specific patterns
            base_info.update({
                'has_ticker': any(pattern in text for pattern in ['Ticker Symbol:', 'ticker']),
                'has_cik': 'CIK:' in text,
                'has_filings': any(form in text for form in ['10-K', '10-Q', '8-K']),
                'is_public_company': 'public company' in text.lower(),
                'has_financial_data': any(term in text.lower() for term in ['revenue', 'earnings', 'financial']),
                'company_found': 'not found in SEC' not in text
            })
        
        return base_info