Abid Ali Awan commited on
Commit
3a614b5
·
1 Parent(s): d9e6f5f

Enhance portfolio extraction in financial agent: Update extraction logic to include total investment amount and improve handling of natural language inputs. Modify JSON response format and refine regex patterns for better accuracy in parsing holdings.

Browse files
Files changed (2) hide show
  1. agents/financial_agent.py +12 -7
  2. agents/tools.py +76 -76
agents/financial_agent.py CHANGED
@@ -169,20 +169,25 @@ Return only valid JSON, nothing else."""
169
 
170
  elif tool_name == "portfolio_analyzer":
171
  # Use OpenAI to extract portfolio information from natural language
172
- extraction_prompt = f"""Extract portfolio holdings from this message: "{message}"
173
 
174
- Convert the portfolio information to JSON format with holdings array.
175
  Each holding should have symbol and either shares or percentage.
176
 
177
  Return format:
178
- {{"holdings": [{{"symbol": "AAPL", "shares": 100}}, {{"symbol": "GOOGL", "percentage": 30}}]}}
179
 
180
  Examples:
181
- - "My portfolio: AAPL 100 shares, GOOGL 50 shares" -> {{"holdings": [{{"symbol": "AAPL", "shares": 100}}, {{"symbol": "GOOGL", "shares": 50}}]}}
182
- - "I have 40% AAPL, 30% MSFT, 30% TSLA" -> {{"holdings": [{{"symbol": "AAPL", "percentage": 40}}, {{"symbol": "MSFT", "percentage": 30}}, {{"symbol": "TSLA", "percentage": 30}}]}}
183
- - "Portfolio with Apple 200 shares and Microsoft 25%" -> {{"holdings": [{{"symbol": "AAPL", "shares": 200}}, {{"symbol": "MSFT", "percentage": 25}}]}}
184
 
185
- If no clear portfolio data is found, return: {{"holdings": []}}
 
 
 
 
 
186
 
187
  Return only valid JSON, nothing else."""
188
 
 
169
 
170
  elif tool_name == "portfolio_analyzer":
171
  # Use OpenAI to extract portfolio information from natural language
172
+ extraction_prompt = f"""Extract portfolio holdings and total investment from this message: "{message}"
173
 
174
+ Convert the portfolio information to JSON format with holdings array and total investment amount.
175
  Each holding should have symbol and either shares or percentage.
176
 
177
  Return format:
178
+ {{"holdings": [{{"symbol": "AAPL", "shares": 100}}, {{"symbol": "GOOGL", "percentage": 30}}], "total_investment": 100000}}
179
 
180
  Examples:
181
+ - "My portfolio: AAPL 100 shares, GOOGL 50 shares" -> {{"holdings": [{{"symbol": "AAPL", "shares": 100}}, {{"symbol": "GOOGL", "shares": 50}}], "total_investment": 0}}
182
+ - "I have 40% AAPL, 30% MSFT, 30% TSLA. I have invested total of 100K USD" -> {{"holdings": [{{"symbol": "AAPL", "percentage": 40}}, {{"symbol": "MSFT", "percentage": 30}}, {{"symbol": "TSLA", "percentage": 30}}], "total_investment": 100000}}
183
+ - "Portfolio with Apple 200 shares and Microsoft 25%, total investment $50,000" -> {{"holdings": [{{"symbol": "AAPL", "shares": 200}}, {{"symbol": "MSFT", "percentage": 25}}], "total_investment": 50000}}
184
 
185
+ Important:
186
+ - Extract total investment amount if mentioned (convert K=1000, M=1000000)
187
+ - If total investment not mentioned, set to 0
188
+ - Convert company names to stock symbols (Apple->AAPL, Microsoft->MSFT, Tesla->TSLA, etc.)
189
+
190
+ If no clear portfolio data is found, return: {{"holdings": [], "total_investment": 0}}
191
 
192
  Return only valid JSON, nothing else."""
193
 
agents/tools.py CHANGED
@@ -551,84 +551,84 @@ class FinancialTools:
551
  try:
552
  import re
553
 
554
- # Smart extraction using multiple approaches
555
  total_investment = 0
556
  holdings_info = []
557
-
558
- # First, try to extract investment amount using improved patterns
559
- def extract_investment_amount(text):
560
- patterns = [
561
- r"(?:invested|investment|total|have)\s*(?:of)?\s*(?:\$)?(\d+(?:[,\d]*)?(?:\.\d+)?)\s*([KMB]?)\s*(?:USD|dollars?|\$)?",
562
- r"(\d+(?:[,\d]*)?(?:\.\d+)?)\s*([KMB]?)\s*(?:USD|dollars?)",
563
- r"\$(\d+(?:[,\d]*)?(?:\.\d+)?)\s*([KMB]?)",
564
- ]
565
-
566
- for pattern in patterns:
567
- match = re.search(pattern, text, re.IGNORECASE)
568
- if match:
569
- amount_str = match.group(1).replace(",", "")
570
- suffix = match.group(2).upper() if len(match.groups()) > 1 else ""
571
-
572
- multiplier = {"K": 1000, "M": 1000000, "B": 1000000000}.get(suffix, 1)
573
- return float(amount_str) * multiplier
574
- return 0
575
-
576
- total_investment = extract_investment_amount(input_str)
577
-
578
- # Extract holdings - percentages vs shares
579
- def extract_holdings(text):
580
- holdings = []
581
-
582
- # First try percentage patterns (with % symbol)
583
- percentage_patterns = [
584
- r"([A-Z]{2,5})\s*[:\s]*(\d+(?:\.\d+)?)%",
585
- r"([A-Z]{2,5}):\s*(\d+(?:\.\d+)?)%",
586
- r"([A-Z]{2,5})\s+(\d+(?:\.\d+)?)%",
587
- ]
588
-
589
- for pattern in percentage_patterns:
590
- matches = re.findall(pattern, text, re.IGNORECASE)
591
- if matches:
592
- for symbol, percentage in matches:
593
- holdings.append({
594
- "symbol": symbol.upper(),
595
- "percentage": float(percentage)
596
- })
597
- return holdings
598
-
599
- # If no percentages found, try shares patterns (without % symbol)
600
- shares_patterns = [
601
- r"([A-Z]{2,5})\s*[:\s]*(\d+(?:\.\d+)?)\s*(?!%)",
602
- r"([A-Z]{2,5}):\s*(\d+(?:\.\d+)?)\s*(?!%)",
603
- r"([A-Z]{2,5})\s+(\d+(?:\.\d+)?)\s*(?!%)",
604
- ]
605
-
606
- for pattern in shares_patterns:
607
- matches = re.findall(pattern, text, re.IGNORECASE)
608
- if matches:
609
- for symbol, shares in matches:
610
- holdings.append({
611
- "symbol": symbol.upper(),
612
- "shares": float(shares)
613
- })
614
- return holdings
615
-
616
- # If no percentage matches, try JSON format
617
- if not holdings:
618
- json_match = re.search(r"\{.*\}|\[.*\]", text, re.DOTALL)
619
- if json_match:
620
- try:
621
- data = json.loads(json_match.group(0))
622
- if isinstance(data, list):
623
- holdings = data
624
- elif isinstance(data, dict) and "holdings" in data:
625
- holdings = data["holdings"]
626
- except:
627
- pass
628
-
629
- return holdings
630
-
631
- holdings_info = extract_holdings(input_str)
632
 
633
  # If no valid holdings found, return early to avoid using this tool
634
  if not holdings_info:
 
551
  try:
552
  import re
553
 
554
+ # Try to parse as JSON first (from OpenAI extraction)
555
  total_investment = 0
556
  holdings_info = []
557
+
558
+ try:
559
+ # First try to parse as JSON
560
+ data = json.loads(input_str)
561
+ if isinstance(data, dict):
562
+ holdings_info = data.get("holdings", [])
563
+ total_investment = data.get("total_investment", 0)
564
+ except:
565
+ # If JSON parsing fails, extract from natural language
566
+ pass
567
+
568
+ # If no JSON data found, extract from natural language using regex
569
+ if not holdings_info:
570
+ # Extract investment amount using improved patterns
571
+ def extract_investment_amount(text):
572
+ patterns = [
573
+ r"(?:invested|investment|total|have)\s*(?:of)?\s*(?:\$)?(\d+(?:[,\d]*)?(?:\.\d+)?)\s*([KMB]?)\s*(?:USD|dollars?|\$)?",
574
+ r"(\d+(?:[,\d]*)?(?:\.\d+)?)\s*([KMB]?)\s*(?:USD|dollars?)",
575
+ r"\$(\d+(?:[,\d]*)?(?:\.\d+)?)\s*([KMB]?)",
576
+ ]
577
+
578
+ for pattern in patterns:
579
+ match = re.search(pattern, text, re.IGNORECASE)
580
+ if match:
581
+ amount_str = match.group(1).replace(",", "")
582
+ suffix = match.group(2).upper() if len(match.groups()) > 1 else ""
583
+
584
+ multiplier = {"K": 1000, "M": 1000000, "B": 1000000000}.get(suffix, 1)
585
+ return float(amount_str) * multiplier
586
+ return 0
587
+
588
+ if total_investment == 0:
589
+ total_investment = extract_investment_amount(input_str)
590
+
591
+ # Extract holdings using regex
592
+ def extract_holdings(text):
593
+ holdings = []
594
+
595
+ # First try percentage patterns (with % symbol)
596
+ percentage_patterns = [
597
+ r"([A-Z]{2,5})\s*[:\s]*(\d+(?:\.\d+)?)%",
598
+ r"([A-Z]{2,5}):\s*(\d+(?:\.\d+)?)%",
599
+ r"([A-Z]{2,5})\s+(\d+(?:\.\d+)?)%",
600
+ ]
601
+
602
+ for pattern in percentage_patterns:
603
+ matches = re.findall(pattern, text, re.IGNORECASE)
604
+ if matches:
605
+ for symbol, percentage in matches:
606
+ holdings.append({
607
+ "symbol": symbol.upper(),
608
+ "percentage": float(percentage)
609
+ })
610
+ return holdings
611
+
612
+ # If no percentages found, try shares patterns (without % symbol)
613
+ shares_patterns = [
614
+ r"([A-Z]{2,5})\s*[:\s]*(\d+(?:\.\d+)?)\s*(?!%)",
615
+ r"([A-Z]{2,5}):\s*(\d+(?:\.\d+)?)\s*(?!%)",
616
+ r"([A-Z]{2,5})\s+(\d+(?:\.\d+)?)\s*(?!%)",
617
+ ]
618
+
619
+ for pattern in shares_patterns:
620
+ matches = re.findall(pattern, text, re.IGNORECASE)
621
+ if matches:
622
+ for symbol, shares in matches:
623
+ holdings.append({
624
+ "symbol": symbol.upper(),
625
+ "shares": float(shares)
626
+ })
627
+ return holdings
628
+
629
+ return holdings
630
+
631
+ holdings_info = extract_holdings(input_str)
632
 
633
  # If no valid holdings found, return early to avoid using this tool
634
  if not holdings_info: