Spaces:

hopelessDev
/

Fin-senti

Sleeping

App Files Files Community

hopelessDev commited on 24 days ago

Commit

73bfb6f

verified ·

1 Parent(s): a309b7a

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -34

app.py CHANGED Viewed

@@ -12,8 +12,7 @@ from cachetools import TTLCache, cached
 # CONFIG
 # -----------------------------
 NEWSAPI_KEY = os.environ.get("NEWSAPI_KEY", "").strip()
-MAX_HEADLINES = 10  # fetch more headlines for robustness
-DECAY = 0.5  # weight for oldest headline
 MODEL_A = "yiyanghkust/finbert-tone"
 MODEL_B = "ProsusAI/finbert"
@@ -35,44 +34,62 @@ LABEL_MAP = {
 # -----------------------------
 stock_cache = TTLCache(maxsize=100, ttl=600)
 # -----------------------------
 # News fetchers
 # -----------------------------
-def fetch_news_newsapi(query: str, limit: int = MAX_HEADLINES) -> List[str]:
     if not NEWSAPI_KEY:
         return []
     url = "https://newsapi.org/v2/everything"
     params = {
         "q": query,
         "language": "en",
-        "pageSize": limit,
         "sortBy": "publishedAt",
         "apiKey": NEWSAPI_KEY,
     }
     try:
         r = requests.get(url, params=params, timeout=6)
         r.raise_for_status()
-        articles = r.json().get("articles", [])[:limit]
-        return [a.get("title", "") for a in articles if a.get("title")]
     except Exception as e:
         print(f"[NewsAPI error] {e}")
         return []
-def fetch_news_yfinance(ticker: str, limit: int = MAX_HEADLINES) -> List[str]:
     try:
-        t = yf.Ticker(ticker)
         news_items = getattr(t, "news", None) or []
-        return [n.get("title") for n in news_items if n.get("title")][:limit]
     except Exception as e:
         print(f"[Yahoo Finance error] {e}")
         return []
 def fetch_headlines(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
     headlines = fetch_news_newsapi(stock, limit)
-    if not headlines:
-        headlines = fetch_news_yfinance(stock, limit)
-    # Deduplicate and filter short headlines
-    return [h for h in list(dict.fromkeys(headlines)) if h and len(h.strip()) > 5]
 # -----------------------------
 # Ensemble utilities
@@ -97,24 +114,12 @@ def headline_score_ensemble(headline: str) -> np.ndarray:
     b = sentiment_b(headline)[0]
     return (model_to_vector(a) + model_to_vector(b)) / 2.0
-def weighted_aggregate_headlines_vectors(headlines: List[str], decay: float = DECAY) -> np.ndarray:
-    vectors = [headline_score_ensemble(h) for h in headlines]
     if not vectors:
-        return np.array([0.0, 1.0, 0.0])  # neutral fallback
-    n = len(vectors)
-    weights = np.linspace(1.0, decay, n)  # linear decay from 1 -> decay
-    weighted_sum = np.zeros(3)
-    total_weight = 0.0
-    for vec, w in zip(vectors, weights):
-        weighted_sum += vec * w
-        total_weight += w
-    agg_vec = weighted_sum / total_weight
-    agg_vec = np.clip(agg_vec, 0.0, None)
-    total = agg_vec.sum()
-    return agg_vec / total if total > 0 else np.array([0.0, 1.0, 0.0])
 def vector_to_score(vec: np.ndarray) -> float:
     neg, neu, pos = vec.tolist()
@@ -131,9 +136,11 @@ class StocksRequest(BaseModel):
 @cached(stock_cache)
 def analyze_single_stock(stock: str) -> float | str:
     headlines = fetch_headlines(stock)
-    if not headlines or len(headlines) < 2:
-        return "LOW_DATA"
-    agg = weighted_aggregate_headlines_vectors(headlines, decay=DECAY)
     return vector_to_score(agg)
 @app.get("/")
@@ -141,7 +148,7 @@ def root():
     return {"message": "Fin-senti API is running! Use POST /analyze"}
 @app.post("/analyze")
-def analyze_stocks(req: StocksRequest):
     results = {}
     for stock in req.stocks:
         results[stock] = analyze_single_stock(stock)

 # CONFIG
 # -----------------------------
 NEWSAPI_KEY = os.environ.get("NEWSAPI_KEY", "").strip()
+MAX_HEADLINES = 10  # fetch more for robustness
 MODEL_A = "yiyanghkust/finbert-tone"
 MODEL_B = "ProsusAI/finbert"
 # -----------------------------
 stock_cache = TTLCache(maxsize=100, ttl=600)
+# -----------------------------
+# Finance keywords filter
+# -----------------------------
+FINANCE_KEYWORDS = [
+    "stock", "share", "market", "profit", "loss", "earnings",
+    "investment", "IPO", "dividend", "trading", "NASDAQ", "NYSE"
+]
+def is_relevant_headline(headline: str) -> bool:
+    headline_lower = headline.lower()
+    return any(k.lower() in headline_lower for k in FINANCE_KEYWORDS)
 # -----------------------------
 # News fetchers
 # -----------------------------
+def fetch_news_newsapi(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
     if not NEWSAPI_KEY:
         return []
     url = "https://newsapi.org/v2/everything"
+    query = f'"{stock}" OR ${stock}'
     params = {
         "q": query,
         "language": "en",
+        "pageSize": limit*2,
         "sortBy": "publishedAt",
         "apiKey": NEWSAPI_KEY,
     }
     try:
         r = requests.get(url, params=params, timeout=6)
         r.raise_for_status()
+        articles = r.json().get("articles", [])
+        headlines = [a.get("title") for a in articles if a.get("title")]
+        filtered = [h for h in headlines if is_relevant_headline(h)]
+        return filtered[:limit]
     except Exception as e:
         print(f"[NewsAPI error] {e}")
         return []
+def fetch_news_yfinance(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
     try:
+        t = yf.Ticker(stock)
         news_items = getattr(t, "news", None) or []
+        headlines = [n.get("title") for n in news_items if n.get("title")]
+        filtered = [h for h in headlines if is_relevant_headline(h)]
+        return filtered[:limit]
     except Exception as e:
         print(f"[Yahoo Finance error] {e}")
         return []
 def fetch_headlines(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
     headlines = fetch_news_newsapi(stock, limit)
+    if len(headlines) < 2:  # fallback if insufficient headlines
+        headlines_yf = fetch_news_yfinance(stock, limit)
+        # combine and remove duplicates
+        headlines = list(dict.fromkeys(headlines + headlines_yf))[:limit]
+    return headlines
 # -----------------------------
 # Ensemble utilities
     b = sentiment_b(headline)[0]
     return (model_to_vector(a) + model_to_vector(b)) / 2.0
+def aggregate_headlines_vectors(vectors: List[np.ndarray]) -> np.ndarray:
     if not vectors:
+        return np.array([0.0,1.0,0.0])
+    mean_vec = np.mean(vectors, axis=0)
+    total = mean_vec.sum()
+    return mean_vec / total if total > 0 else np.array([0.0,1.0,0.0])
 def vector_to_score(vec: np.ndarray) -> float:
     neg, neu, pos = vec.tolist()
 @cached(stock_cache)
 def analyze_single_stock(stock: str) -> float | str:
     headlines = fetch_headlines(stock)
+    headlines = [h for h in headlines if h and len(h.strip()) > 10]
+    if not headlines:
+        return "NO_DATA"
+    vectors = [headline_score_ensemble(h) for h in headlines]
+    agg = aggregate_headlines_vectors(vectors)
     return vector_to_score(agg)
 @app.get("/")
     return {"message": "Fin-senti API is running! Use POST /analyze"}
 @app.post("/analyze")
+def analyze(req: StocksRequest):
     results = {}
     for stock in req.stocks:
         results[stock] = analyze_single_stock(stock)