hopelessDev commited on
Commit
73bfb6f
·
verified ·
1 Parent(s): a309b7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -34
app.py CHANGED
@@ -12,8 +12,7 @@ from cachetools import TTLCache, cached
12
  # CONFIG
13
  # -----------------------------
14
  NEWSAPI_KEY = os.environ.get("NEWSAPI_KEY", "").strip()
15
- MAX_HEADLINES = 10 # fetch more headlines for robustness
16
- DECAY = 0.5 # weight for oldest headline
17
 
18
  MODEL_A = "yiyanghkust/finbert-tone"
19
  MODEL_B = "ProsusAI/finbert"
@@ -35,44 +34,62 @@ LABEL_MAP = {
35
  # -----------------------------
36
  stock_cache = TTLCache(maxsize=100, ttl=600)
37
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # -----------------------------
39
  # News fetchers
40
  # -----------------------------
41
- def fetch_news_newsapi(query: str, limit: int = MAX_HEADLINES) -> List[str]:
42
  if not NEWSAPI_KEY:
43
  return []
44
  url = "https://newsapi.org/v2/everything"
 
45
  params = {
46
  "q": query,
47
  "language": "en",
48
- "pageSize": limit,
49
  "sortBy": "publishedAt",
50
  "apiKey": NEWSAPI_KEY,
51
  }
52
  try:
53
  r = requests.get(url, params=params, timeout=6)
54
  r.raise_for_status()
55
- articles = r.json().get("articles", [])[:limit]
56
- return [a.get("title", "") for a in articles if a.get("title")]
 
 
57
  except Exception as e:
58
  print(f"[NewsAPI error] {e}")
59
  return []
60
 
61
- def fetch_news_yfinance(ticker: str, limit: int = MAX_HEADLINES) -> List[str]:
62
  try:
63
- t = yf.Ticker(ticker)
64
  news_items = getattr(t, "news", None) or []
65
- return [n.get("title") for n in news_items if n.get("title")][:limit]
 
 
66
  except Exception as e:
67
  print(f"[Yahoo Finance error] {e}")
68
  return []
69
 
70
  def fetch_headlines(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
71
  headlines = fetch_news_newsapi(stock, limit)
72
- if not headlines:
73
- headlines = fetch_news_yfinance(stock, limit)
74
- # Deduplicate and filter short headlines
75
- return [h for h in list(dict.fromkeys(headlines)) if h and len(h.strip()) > 5]
 
76
 
77
  # -----------------------------
78
  # Ensemble utilities
@@ -97,24 +114,12 @@ def headline_score_ensemble(headline: str) -> np.ndarray:
97
  b = sentiment_b(headline)[0]
98
  return (model_to_vector(a) + model_to_vector(b)) / 2.0
99
 
100
- def weighted_aggregate_headlines_vectors(headlines: List[str], decay: float = DECAY) -> np.ndarray:
101
- vectors = [headline_score_ensemble(h) for h in headlines]
102
  if not vectors:
103
- return np.array([0.0, 1.0, 0.0]) # neutral fallback
104
-
105
- n = len(vectors)
106
- weights = np.linspace(1.0, decay, n) # linear decay from 1 -> decay
107
-
108
- weighted_sum = np.zeros(3)
109
- total_weight = 0.0
110
- for vec, w in zip(vectors, weights):
111
- weighted_sum += vec * w
112
- total_weight += w
113
-
114
- agg_vec = weighted_sum / total_weight
115
- agg_vec = np.clip(agg_vec, 0.0, None)
116
- total = agg_vec.sum()
117
- return agg_vec / total if total > 0 else np.array([0.0, 1.0, 0.0])
118
 
119
  def vector_to_score(vec: np.ndarray) -> float:
120
  neg, neu, pos = vec.tolist()
@@ -131,9 +136,11 @@ class StocksRequest(BaseModel):
131
  @cached(stock_cache)
132
  def analyze_single_stock(stock: str) -> float | str:
133
  headlines = fetch_headlines(stock)
134
- if not headlines or len(headlines) < 2:
135
- return "LOW_DATA"
136
- agg = weighted_aggregate_headlines_vectors(headlines, decay=DECAY)
 
 
137
  return vector_to_score(agg)
138
 
139
  @app.get("/")
@@ -141,7 +148,7 @@ def root():
141
  return {"message": "Fin-senti API is running! Use POST /analyze"}
142
 
143
  @app.post("/analyze")
144
- def analyze_stocks(req: StocksRequest):
145
  results = {}
146
  for stock in req.stocks:
147
  results[stock] = analyze_single_stock(stock)
 
12
  # CONFIG
13
  # -----------------------------
14
  NEWSAPI_KEY = os.environ.get("NEWSAPI_KEY", "").strip()
15
+ MAX_HEADLINES = 10 # fetch more for robustness
 
16
 
17
  MODEL_A = "yiyanghkust/finbert-tone"
18
  MODEL_B = "ProsusAI/finbert"
 
34
  # -----------------------------
35
  stock_cache = TTLCache(maxsize=100, ttl=600)
36
 
37
+ # -----------------------------
38
+ # Finance keywords filter
39
+ # -----------------------------
40
+ FINANCE_KEYWORDS = [
41
+ "stock", "share", "market", "profit", "loss", "earnings",
42
+ "investment", "IPO", "dividend", "trading", "NASDAQ", "NYSE"
43
+ ]
44
+
45
+ def is_relevant_headline(headline: str) -> bool:
46
+ headline_lower = headline.lower()
47
+ return any(k.lower() in headline_lower for k in FINANCE_KEYWORDS)
48
+
49
  # -----------------------------
50
  # News fetchers
51
  # -----------------------------
52
+ def fetch_news_newsapi(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
53
  if not NEWSAPI_KEY:
54
  return []
55
  url = "https://newsapi.org/v2/everything"
56
+ query = f'"{stock}" OR ${stock}'
57
  params = {
58
  "q": query,
59
  "language": "en",
60
+ "pageSize": limit*2,
61
  "sortBy": "publishedAt",
62
  "apiKey": NEWSAPI_KEY,
63
  }
64
  try:
65
  r = requests.get(url, params=params, timeout=6)
66
  r.raise_for_status()
67
+ articles = r.json().get("articles", [])
68
+ headlines = [a.get("title") for a in articles if a.get("title")]
69
+ filtered = [h for h in headlines if is_relevant_headline(h)]
70
+ return filtered[:limit]
71
  except Exception as e:
72
  print(f"[NewsAPI error] {e}")
73
  return []
74
 
75
+ def fetch_news_yfinance(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
76
  try:
77
+ t = yf.Ticker(stock)
78
  news_items = getattr(t, "news", None) or []
79
+ headlines = [n.get("title") for n in news_items if n.get("title")]
80
+ filtered = [h for h in headlines if is_relevant_headline(h)]
81
+ return filtered[:limit]
82
  except Exception as e:
83
  print(f"[Yahoo Finance error] {e}")
84
  return []
85
 
86
  def fetch_headlines(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
87
  headlines = fetch_news_newsapi(stock, limit)
88
+ if len(headlines) < 2: # fallback if insufficient headlines
89
+ headlines_yf = fetch_news_yfinance(stock, limit)
90
+ # combine and remove duplicates
91
+ headlines = list(dict.fromkeys(headlines + headlines_yf))[:limit]
92
+ return headlines
93
 
94
  # -----------------------------
95
  # Ensemble utilities
 
114
  b = sentiment_b(headline)[0]
115
  return (model_to_vector(a) + model_to_vector(b)) / 2.0
116
 
117
+ def aggregate_headlines_vectors(vectors: List[np.ndarray]) -> np.ndarray:
 
118
  if not vectors:
119
+ return np.array([0.0,1.0,0.0])
120
+ mean_vec = np.mean(vectors, axis=0)
121
+ total = mean_vec.sum()
122
+ return mean_vec / total if total > 0 else np.array([0.0,1.0,0.0])
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  def vector_to_score(vec: np.ndarray) -> float:
125
  neg, neu, pos = vec.tolist()
 
136
  @cached(stock_cache)
137
  def analyze_single_stock(stock: str) -> float | str:
138
  headlines = fetch_headlines(stock)
139
+ headlines = [h for h in headlines if h and len(h.strip()) > 10]
140
+ if not headlines:
141
+ return "NO_DATA"
142
+ vectors = [headline_score_ensemble(h) for h in headlines]
143
+ agg = aggregate_headlines_vectors(vectors)
144
  return vector_to_score(agg)
145
 
146
  @app.get("/")
 
148
  return {"message": "Fin-senti API is running! Use POST /analyze"}
149
 
150
  @app.post("/analyze")
151
+ def analyze(req: StocksRequest):
152
  results = {}
153
  for stock in req.stocks:
154
  results[stock] = analyze_single_stock(stock)