Spaces:

hopelessDev
/

Fin-senti

Running

App Files Files Community

hopelessDev commited on 21 days ago

Commit

1cb61c4

verified ·

1 Parent(s): 35634ec

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -68

app.py CHANGED Viewed

@@ -7,14 +7,12 @@ import yfinance as yf
 import numpy as np
 from transformers import pipeline
 from cachetools import TTLCache, cached
-from datetime import datetime, timezone
 # -----------------------------
 # CONFIG
 # -----------------------------
 NEWSAPI_KEY = os.environ.get("NEWSAPI_KEY", "").strip()
 MAX_HEADLINES = 10  # fetch more for robustness
-DECAY_HALF_LIFE_HOURS = 12  # half-life for old news
 MODEL_A = "yiyanghkust/finbert-tone"
 MODEL_B = "ProsusAI/finbert"
@@ -36,64 +34,42 @@ LABEL_MAP = {
 # -----------------------------
 stock_cache = TTLCache(maxsize=100, ttl=600)
-# -----------------------------
-# Finance keywords filter
-# -----------------------------
-FINANCE_KEYWORDS = [
-    "stock", "share", "market", "profit", "loss", "earnings",
-    "investment", "IPO", "dividend", "trading", "NASDAQ", "NYSE"
-]
-def is_relevant_headline(headline: str) -> bool:
-    headline_lower = headline.lower()
-    return any(k.lower() in headline_lower for k in FINANCE_KEYWORDS)
 # -----------------------------
 # News fetchers
 # -----------------------------
-def fetch_news_newsapi(stock: str, limit: int = MAX_HEADLINES) -> List[Dict]:
     if not NEWSAPI_KEY:
         return []
     url = "https://newsapi.org/v2/everything"
-    query = f'"{stock}" OR ${stock}'
     params = {
         "q": query,
         "language": "en",
-        "pageSize": limit*2,
         "sortBy": "publishedAt",
         "apiKey": NEWSAPI_KEY,
     }
     try:
         r = requests.get(url, params=params, timeout=6)
         r.raise_for_status()
-        articles = r.json().get("articles", [])
-        filtered = [
-            {"title": a.get("title"), "publishedAt": a.get("publishedAt")}
-            for a in articles if a.get("title") and is_relevant_headline(a.get("title"))
-        ]
-        return filtered[:limit]
     except Exception as e:
         print(f"[NewsAPI error] {e}")
         return []
-def fetch_news_yfinance(stock: str, limit: int = MAX_HEADLINES) -> List[Dict]:
     try:
-        t = yf.Ticker(stock)
         news_items = getattr(t, "news", None) or []
-        filtered = [
-            {"title": n.get("title"), "publishedAt": n.get("providerPublishTime")}
-            for n in news_items if n.get("title") and is_relevant_headline(n.get("title"))
-        ]
-        return filtered[:limit]
     except Exception as e:
         print(f"[Yahoo Finance error] {e}")
         return []
-def fetch_headlines(stock: str, limit: int = MAX_HEADLINES) -> List[Dict]:
     headlines = fetch_news_newsapi(stock, limit)
-    if len(headlines) < 2:
-        headlines_yf = fetch_news_yfinance(stock, limit)
-        headlines = list({h['title']: h for h in (headlines + headlines_yf)}.values())[:limit]
     return headlines
 # -----------------------------
@@ -119,63 +95,56 @@ def headline_score_ensemble(headline: str) -> np.ndarray:
     b = sentiment_b(headline)[0]
     return (model_to_vector(a) + model_to_vector(b)) / 2.0
-def aggregate_headlines_vectors(vectors: List[np.ndarray], timestamps: List[float]) -> np.ndarray:
     if not vectors:
         return np.array([0.0,1.0,0.0])
-    # Apply decay weights based on timestamps
-    now = datetime.now(timezone.utc).timestamp()
-    weights = np.array([0.5 ** ((now - ts)/(DECAY_HALF_LIFE_HOURS*3600)) for ts in timestamps])
-    weighted_vecs = np.array(vectors) * weights[:, None]
-    mean_vec = weighted_vecs.sum(axis=0) / weights.sum()
-    mean_vec = np.clip(mean_vec, 0.0, None)
     total = mean_vec.sum()
     return mean_vec / total if total > 0 else np.array([0.0,1.0,0.0])
 def vector_to_score(vec: np.ndarray) -> float:
     neg, neu, pos = vec.tolist()
-    return round(max(0.0, min(1.0, pos + 0.5 * neu)), 2)
 # -----------------------------
 # FastAPI app
 # -----------------------------
-app = FastAPI(title="Financial Sentiment API with Decay")
 class StocksRequest(BaseModel):
     stocks: List[str]
 @cached(stock_cache)
 def analyze_single_stock(stock: str) -> float | str:
-    headlines_data = fetch_headlines(stock)
-    headlines_data = [h for h in headlines_data if h.get("title") and len(h["title"].strip()) > 10]
-    if not headlines_data:
         return "NO_DATA"
-    vectors = []
-    timestamps = []
-    for h in headlines_data:
-        vectors.append(headline_score_ensemble(h["title"]))
-        # convert publishedAt to timestamp
-        try:
-            ts = h.get("publishedAt")
-            if isinstance(ts, str):
-                ts = datetime.fromisoformat(ts.replace("Z","+00:00")).timestamp()
-            elif isinstance(ts, (int, float)):
-                ts = float(ts)
-            else:
-                ts = datetime.now(timezone.utc).timestamp()
-        except:
-            ts = datetime.now(timezone.utc).timestamp()
-        timestamps.append(ts)
-    agg = aggregate_headlines_vectors(vectors, timestamps)
-    return vector_to_score(agg)
 @app.get("/")
 def root():
-    return {"message": "Fin-senti API with Decay is running! Use POST /analyze"}
 @app.post("/analyze")
 def analyze(req: StocksRequest):

 import numpy as np
 from transformers import pipeline
 from cachetools import TTLCache, cached
 # -----------------------------
 # CONFIG
 # -----------------------------
 NEWSAPI_KEY = os.environ.get("NEWSAPI_KEY", "").strip()
 MAX_HEADLINES = 10  # fetch more for robustness
 MODEL_A = "yiyanghkust/finbert-tone"
 MODEL_B = "ProsusAI/finbert"
 # -----------------------------
 stock_cache = TTLCache(maxsize=100, ttl=600)
 # -----------------------------
 # News fetchers
 # -----------------------------
+def fetch_news_newsapi(query: str, limit: int = MAX_HEADLINES) -> List[str]:
     if not NEWSAPI_KEY:
         return []
     url = "https://newsapi.org/v2/everything"
     params = {
         "q": query,
         "language": "en",
+        "pageSize": limit,
         "sortBy": "publishedAt",
         "apiKey": NEWSAPI_KEY,
     }
     try:
         r = requests.get(url, params=params, timeout=6)
         r.raise_for_status()
+        articles = r.json().get("articles", [])[:limit]
+        return [a.get("title", "") for a in articles if a.get("title")]
     except Exception as e:
         print(f"[NewsAPI error] {e}")
         return []
+def fetch_news_yfinance(ticker: str, limit: int = MAX_HEADLINES) -> List[str]:
     try:
+        t = yf.Ticker(ticker)
         news_items = getattr(t, "news", None) or []
+        return [n.get("title") for n in news_items if n.get("title")][:limit]
     except Exception as e:
         print(f"[Yahoo Finance error] {e}")
         return []
+def fetch_headlines(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
     headlines = fetch_news_newsapi(stock, limit)
+    if not headlines:
+        headlines = fetch_news_yfinance(stock, limit)
     return headlines
 # -----------------------------
     b = sentiment_b(headline)[0]
     return (model_to_vector(a) + model_to_vector(b)) / 2.0
+def aggregate_headlines_vectors(vectors: List[np.ndarray]) -> np.ndarray:
     if not vectors:
         return np.array([0.0,1.0,0.0])
+    mean_vec = np.mean(vectors, axis=0)
     total = mean_vec.sum()
     return mean_vec / total if total > 0 else np.array([0.0,1.0,0.0])
 def vector_to_score(vec: np.ndarray) -> float:
     neg, neu, pos = vec.tolist()
+    return max(0.0, min(1.0, pos + 0.5 * neu))
+# -----------------------------
+# Decay utilities
+# -----------------------------
+def get_decay_factor(num_headlines: int, max_headlines: int = MAX_HEADLINES,
+                     min_decay: float = 0.6, max_decay: float = 0.95) -> float:
+    """
+    Dynamic decay: more headlines → higher decay → score can approach extremes.
+    """
+    ratio = min(num_headlines / max_headlines, 1.0)
+    return min_decay + ratio * (max_decay - min_decay)
 # -----------------------------
 # FastAPI app
 # -----------------------------
+app = FastAPI(title="Financial Sentiment API")
 class StocksRequest(BaseModel):
     stocks: List[str]
 @cached(stock_cache)
 def analyze_single_stock(stock: str) -> float | str:
+    headlines = fetch_headlines(stock)
+    headlines = [h for h in headlines if h and len(h.strip()) > 10]
+    if not headlines or len(headlines) < 2:
         return "NO_DATA"
+    vectors = [headline_score_ensemble(h) for h in headlines]
+    agg = aggregate_headlines_vectors(vectors)
+    raw_score = vector_to_score(agg)
+    # Apply dynamic decay
+    decay = get_decay_factor(len(headlines))
+    adjusted_score = 0.5 + decay * (raw_score - 0.5)
+    return round(adjusted_score, 2)
 @app.get("/")
 def root():
+    return {"message": "Fin-senti API is running! Use POST /analyze"}
 @app.post("/analyze")
 def analyze(req: StocksRequest):