Spaces:

AnilNiraula
/

FinChat

Running

App Files Files Community

AnilNiraula commited on Jul 8

Commit

5d661b6

verified ·

1 Parent(s): 67a27e0

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -19

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import pandas as pd
 import re
 import numpy as np
 import json
 # Set up logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -17,7 +18,7 @@ logger = logging.getLogger(__name__)
 device = torch.device("cpu")
 logger.info(f"Using device: {device}")
-# Load dataset at startup
 csv_path = "flat-ui__data-Sun Jul 06 2025.csv"
 try:
     df = pd.read_csv(csv_path)
@@ -30,6 +31,20 @@ except Exception as e:
     logger.error(f"Error loading dataset: {e}")
     df = None
 # Response cache with financial data entries
 response_cache = {
     "hi": "Hello! I'm FinChat, your financial advisor. How can I help with investing?",
@@ -162,6 +177,9 @@ response_cache = {
     "what is the average annual growth rate of the s&p 500 from 2011 to 2016?": (
         "The S&P 500’s average annual growth rate from 2011 to 2016 was approximately 12.7%, including dividends, driven by post-financial crisis recovery."
     ),
     "what will my return be in 10 years if i invest $5000 into s&p 500 right now?": (
         "Assuming a 10% average annual return, a $5,000 investment in the S&P 500 would grow to approximately $12,974 in 10 years with annual compounding. "
         "This is based on the historical average return of 10–12% (1927–2025). Future returns vary and are not guaranteed. Consult a financial planner."
@@ -225,26 +243,24 @@ prompt_prefix = (
     "1. This uses the historical average return of 10–12% (1927–2025).\n"
     "2. Future returns vary and are not guaranteed.\n\n"
     "Example 3:\n"
-    "Q: What is the average return rate of the S&P 500 in the past 10 years?\n"
-    "A: The S&P 500’s average annual return rate from 2015 to 2024 was approximately 12.2%, including dividends.\n"
-    "1. This period includes strong recovery years (e.g., 26.89% in 2021) and declines (e.g., -19.44% in 2022).\n"
     "2. Dividends contribute significantly to total returns.\n\n"
     "Q: "
 )
 prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
-# Substring matching for cache
 def get_closest_cache_key(message, cache_keys):
     message = message.lower().strip()
-    for key in cache_keys:
-        if key in message:
-            return key
-    return None
 # Parse period from user input
 def parse_period(query):
-    # Match specific year ranges (e.g., "2000 to 2008", "2011–2016")
-    match = re.search(r'(\d{4})\s*(?:to|-|–)\s*(\d{4})', query, re.IGNORECASE)
     if match:
         start_year, end_year = map(int, match.groups())
         return start_year, end_year, None
@@ -255,10 +271,10 @@ def parse_period(query):
         end_year = start_year + duration - 1
         return start_year, end_year, duration
     # Match general duration queries (e.g., "past 10 years", "3-year growth rate")
-    match = re.search(r'past\s*(\d+)-year|\b(\d+)-year.*growth\s*rate', query, re.IGNORECASE)
     if match:
         duration = int(match.group(1) or match.group(2))
-        max_year = df['Date'].dt.year.max() if df is not None else 2025
         start_year = max_year - duration + 1
         end_year = max_year
         return start_year, end_year, duration
@@ -266,16 +282,16 @@ def parse_period(query):
 # Calculate average growth rate
 def calculate_growth_rate(start_year, end_year, duration=None):
-    if df is None or start_year is None or end_year is None:
         return None, "Data not available or invalid period."
-    df_period = df[(df['Date'].dt.year >= start_year) & (df['Date'].dt.year <= end_year)]
     if df_period.empty:
         return None, f"No data available for {start_year} to {end_year}."
     avg_return = df_period['Return'].mean()
     if duration:
-        response = f"The S&P 500’s {duration}-year average annual growth rate from {start_year} to {end_year} was approximately {avg_return:.1f}%, including dividends."
     else:
-        response = f"The S&P 500’s average annual growth rate from {start_year} to {end_year} was approximately {avg_return:.1f}%, including dividends."
     return avg_return, response
 # Parse investment return query
@@ -289,7 +305,7 @@ def parse_investment_query(query):
 # Calculate future value
 def calculate_future_value(amount, years):
-    if df is None or amount is None or years is None:
         return None, "Data not available or invalid input."
     avg_annual_return = 10.0  # Historical S&P 500 average (1927–2025)
     future_value = amount * (1 + avg_annual_return / 100) ** years
@@ -385,7 +401,7 @@ def chat_with_model(user_input, history=None, is_processing=False):
             gen_start_time = time.time()
             outputs = model.generate(
                 **inputs,
-                max_new_tokens=50,
                 min_length=20,
                 do_sample=False,
                 repetition_penalty=2.0,

 import re
 import numpy as np
 import json
+import difflib
 # Set up logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 device = torch.device("cpu")
 logger.info(f"Using device: {device}")
+# Load dataset and precompute period data
 csv_path = "flat-ui__data-Sun Jul 06 2025.csv"
 try:
     df = pd.read_csv(csv_path)
     logger.error(f"Error loading dataset: {e}")
     df = None
+# Precompute yearly aggregates for faster lookups
+if df is not None:
+    df_yearly = df.groupby(df['Date'].dt.year).agg({
+        'SP500': 'mean',
+        'Return': 'mean',
+        'Real Return': 'mean',
+        'Dividend': 'mean',
+        'Earnings': 'mean',
+        'PE10': 'mean'
+    }).reset_index()
+    df_yearly = df_yearly.rename(columns={'Date': 'Year'})
+else:
+    df_yearly = None
 # Response cache with financial data entries
 response_cache = {
     "hi": "Hello! I'm FinChat, your financial advisor. How can I help with investing?",
     "what is the average annual growth rate of the s&p 500 from 2011 to 2016?": (
         "The S&P 500’s average annual growth rate from 2011 to 2016 was approximately 12.7%, including dividends, driven by post-financial crisis recovery."
     ),
+    "what was the average annual return of the s&p 500 between 2010 and 2020?": (
+        "The S&P 500’s average annual return from 2010 to 2020 was approximately 13.6%, including dividends, driven by post-financial crisis recovery."
+    ),
     "what will my return be in 10 years if i invest $5000 into s&p 500 right now?": (
         "Assuming a 10% average annual return, a $5,000 investment in the S&P 500 would grow to approximately $12,974 in 10 years with annual compounding. "
         "This is based on the historical average return of 10–12% (1927–2025). Future returns vary and are not guaranteed. Consult a financial planner."
     "1. This uses the historical average return of 10–12% (1927–2025).\n"
     "2. Future returns vary and are not guaranteed.\n\n"
     "Example 3:\n"
+    "Q: What was the average annual return of the S&P 500 between 2010 and 2020?\n"
+    "A: The S&P 500’s average annual return from 2010 to 2020 was approximately 13.6%, including dividends.\n"
+    "1. This period includes strong recovery post-financial crisis.\n"
     "2. Dividends contribute significantly to total returns.\n\n"
     "Q: "
 )
 prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
+# Substring matching for cache with fuzzy matching
 def get_closest_cache_key(message, cache_keys):
     message = message.lower().strip()
+    matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=0.8)
+    return matches[0] if matches else None
 # Parse period from user input
 def parse_period(query):
+    # Match specific year ranges (e.g., "between 2010 and 2020", "2000–2008")
+    match = re.search(r'(?:between|from)\s*(\d{4})\s*(?:and|to|-|–)\s*(\d{4})', query, re.IGNORECASE)
     if match:
         start_year, end_year = map(int, match.groups())
         return start_year, end_year, None
         end_year = start_year + duration - 1
         return start_year, end_year, duration
     # Match general duration queries (e.g., "past 10 years", "3-year growth rate")
+    match = re.search(r'past\s*(\d+)-year|\b(\d+)-year.*(?:return|growth\s*rate)', query, re.IGNORECASE)
     if match:
         duration = int(match.group(1) or match.group(2))
+        max_year = df_yearly['Year'].max() if df_yearly is not None else 2025
         start_year = max_year - duration + 1
         end_year = max_year
         return start_year, end_year, duration
 # Calculate average growth rate
 def calculate_growth_rate(start_year, end_year, duration=None):
+    if df_yearly is None or start_year is None or end_year is None:
         return None, "Data not available or invalid period."
+    df_period = df_yearly[(df_yearly['Year'] >= start_year) & (df_yearly['Year'] <= end_year)]
     if df_period.empty:
         return None, f"No data available for {start_year} to {end_year}."
     avg_return = df_period['Return'].mean()
     if duration:
+        response = f"The S&P 500’s {duration}-year average annual return from {start_year} to {end_year} was approximately {avg_return:.1f}%, including dividends."
     else:
+        response = f"The S&P 500’s average annual return from {start_year} to {end_year} was approximately {avg_return:.1f}%, including dividends."
     return avg_return, response
 # Parse investment return query
 # Calculate future value
 def calculate_future_value(amount, years):
+    if df_yearly is None or amount is None or years is None:
         return None, "Data not available or invalid input."
     avg_annual_return = 10.0  # Historical S&P 500 average (1927–2025)
     future_value = amount * (1 + avg_annual_return / 100) ** years
             gen_start_time = time.time()
             outputs = model.generate(
                 **inputs,
+                max_new_tokens=40,  # Reduced for faster inference
                 min_length=20,
                 do_sample=False,
                 repetition_penalty=2.0,