hopelessDev commited on
Commit
35634ec
·
verified ·
1 Parent(s): 73bfb6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -19
app.py CHANGED
@@ -7,12 +7,14 @@ import yfinance as yf
7
  import numpy as np
8
  from transformers import pipeline
9
  from cachetools import TTLCache, cached
 
10
 
11
  # -----------------------------
12
  # CONFIG
13
  # -----------------------------
14
  NEWSAPI_KEY = os.environ.get("NEWSAPI_KEY", "").strip()
15
  MAX_HEADLINES = 10 # fetch more for robustness
 
16
 
17
  MODEL_A = "yiyanghkust/finbert-tone"
18
  MODEL_B = "ProsusAI/finbert"
@@ -49,7 +51,7 @@ def is_relevant_headline(headline: str) -> bool:
49
  # -----------------------------
50
  # News fetchers
51
  # -----------------------------
52
- def fetch_news_newsapi(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
53
  if not NEWSAPI_KEY:
54
  return []
55
  url = "https://newsapi.org/v2/everything"
@@ -65,30 +67,33 @@ def fetch_news_newsapi(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
65
  r = requests.get(url, params=params, timeout=6)
66
  r.raise_for_status()
67
  articles = r.json().get("articles", [])
68
- headlines = [a.get("title") for a in articles if a.get("title")]
69
- filtered = [h for h in headlines if is_relevant_headline(h)]
 
 
70
  return filtered[:limit]
71
  except Exception as e:
72
  print(f"[NewsAPI error] {e}")
73
  return []
74
 
75
- def fetch_news_yfinance(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
76
  try:
77
  t = yf.Ticker(stock)
78
  news_items = getattr(t, "news", None) or []
79
- headlines = [n.get("title") for n in news_items if n.get("title")]
80
- filtered = [h for h in headlines if is_relevant_headline(h)]
 
 
81
  return filtered[:limit]
82
  except Exception as e:
83
  print(f"[Yahoo Finance error] {e}")
84
  return []
85
 
86
- def fetch_headlines(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
87
  headlines = fetch_news_newsapi(stock, limit)
88
- if len(headlines) < 2: # fallback if insufficient headlines
89
  headlines_yf = fetch_news_yfinance(stock, limit)
90
- # combine and remove duplicates
91
- headlines = list(dict.fromkeys(headlines + headlines_yf))[:limit]
92
  return headlines
93
 
94
  # -----------------------------
@@ -114,10 +119,17 @@ def headline_score_ensemble(headline: str) -> np.ndarray:
114
  b = sentiment_b(headline)[0]
115
  return (model_to_vector(a) + model_to_vector(b)) / 2.0
116
 
117
- def aggregate_headlines_vectors(vectors: List[np.ndarray]) -> np.ndarray:
118
  if not vectors:
119
  return np.array([0.0,1.0,0.0])
120
- mean_vec = np.mean(vectors, axis=0)
 
 
 
 
 
 
 
121
  total = mean_vec.sum()
122
  return mean_vec / total if total > 0 else np.array([0.0,1.0,0.0])
123
 
@@ -128,24 +140,42 @@ def vector_to_score(vec: np.ndarray) -> float:
128
  # -----------------------------
129
  # FastAPI app
130
  # -----------------------------
131
- app = FastAPI(title="Financial Sentiment API")
132
 
133
  class StocksRequest(BaseModel):
134
  stocks: List[str]
135
 
136
  @cached(stock_cache)
137
  def analyze_single_stock(stock: str) -> float | str:
138
- headlines = fetch_headlines(stock)
139
- headlines = [h for h in headlines if h and len(h.strip()) > 10]
140
- if not headlines:
 
141
  return "NO_DATA"
142
- vectors = [headline_score_ensemble(h) for h in headlines]
143
- agg = aggregate_headlines_vectors(vectors)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  return vector_to_score(agg)
145
 
146
  @app.get("/")
147
  def root():
148
- return {"message": "Fin-senti API is running! Use POST /analyze"}
149
 
150
  @app.post("/analyze")
151
  def analyze(req: StocksRequest):
 
7
  import numpy as np
8
  from transformers import pipeline
9
  from cachetools import TTLCache, cached
10
+ from datetime import datetime, timezone
11
 
12
  # -----------------------------
13
  # CONFIG
14
  # -----------------------------
15
  NEWSAPI_KEY = os.environ.get("NEWSAPI_KEY", "").strip()
16
  MAX_HEADLINES = 10 # fetch more for robustness
17
+ DECAY_HALF_LIFE_HOURS = 12 # half-life for old news
18
 
19
  MODEL_A = "yiyanghkust/finbert-tone"
20
  MODEL_B = "ProsusAI/finbert"
 
51
  # -----------------------------
52
  # News fetchers
53
  # -----------------------------
54
+ def fetch_news_newsapi(stock: str, limit: int = MAX_HEADLINES) -> List[Dict]:
55
  if not NEWSAPI_KEY:
56
  return []
57
  url = "https://newsapi.org/v2/everything"
 
67
  r = requests.get(url, params=params, timeout=6)
68
  r.raise_for_status()
69
  articles = r.json().get("articles", [])
70
+ filtered = [
71
+ {"title": a.get("title"), "publishedAt": a.get("publishedAt")}
72
+ for a in articles if a.get("title") and is_relevant_headline(a.get("title"))
73
+ ]
74
  return filtered[:limit]
75
  except Exception as e:
76
  print(f"[NewsAPI error] {e}")
77
  return []
78
 
79
+ def fetch_news_yfinance(stock: str, limit: int = MAX_HEADLINES) -> List[Dict]:
80
  try:
81
  t = yf.Ticker(stock)
82
  news_items = getattr(t, "news", None) or []
83
+ filtered = [
84
+ {"title": n.get("title"), "publishedAt": n.get("providerPublishTime")}
85
+ for n in news_items if n.get("title") and is_relevant_headline(n.get("title"))
86
+ ]
87
  return filtered[:limit]
88
  except Exception as e:
89
  print(f"[Yahoo Finance error] {e}")
90
  return []
91
 
92
+ def fetch_headlines(stock: str, limit: int = MAX_HEADLINES) -> List[Dict]:
93
  headlines = fetch_news_newsapi(stock, limit)
94
+ if len(headlines) < 2:
95
  headlines_yf = fetch_news_yfinance(stock, limit)
96
+ headlines = list({h['title']: h for h in (headlines + headlines_yf)}.values())[:limit]
 
97
  return headlines
98
 
99
  # -----------------------------
 
119
  b = sentiment_b(headline)[0]
120
  return (model_to_vector(a) + model_to_vector(b)) / 2.0
121
 
122
+ def aggregate_headlines_vectors(vectors: List[np.ndarray], timestamps: List[float]) -> np.ndarray:
123
  if not vectors:
124
  return np.array([0.0,1.0,0.0])
125
+
126
+ # Apply decay weights based on timestamps
127
+ now = datetime.now(timezone.utc).timestamp()
128
+ weights = np.array([0.5 ** ((now - ts)/(DECAY_HALF_LIFE_HOURS*3600)) for ts in timestamps])
129
+ weighted_vecs = np.array(vectors) * weights[:, None]
130
+ mean_vec = weighted_vecs.sum(axis=0) / weights.sum()
131
+
132
+ mean_vec = np.clip(mean_vec, 0.0, None)
133
  total = mean_vec.sum()
134
  return mean_vec / total if total > 0 else np.array([0.0,1.0,0.0])
135
 
 
140
  # -----------------------------
141
  # FastAPI app
142
  # -----------------------------
143
+ app = FastAPI(title="Financial Sentiment API with Decay")
144
 
145
  class StocksRequest(BaseModel):
146
  stocks: List[str]
147
 
148
  @cached(stock_cache)
149
  def analyze_single_stock(stock: str) -> float | str:
150
+ headlines_data = fetch_headlines(stock)
151
+ headlines_data = [h for h in headlines_data if h.get("title") and len(h["title"].strip()) > 10]
152
+
153
+ if not headlines_data:
154
  return "NO_DATA"
155
+
156
+ vectors = []
157
+ timestamps = []
158
+ for h in headlines_data:
159
+ vectors.append(headline_score_ensemble(h["title"]))
160
+ # convert publishedAt to timestamp
161
+ try:
162
+ ts = h.get("publishedAt")
163
+ if isinstance(ts, str):
164
+ ts = datetime.fromisoformat(ts.replace("Z","+00:00")).timestamp()
165
+ elif isinstance(ts, (int, float)):
166
+ ts = float(ts)
167
+ else:
168
+ ts = datetime.now(timezone.utc).timestamp()
169
+ except:
170
+ ts = datetime.now(timezone.utc).timestamp()
171
+ timestamps.append(ts)
172
+
173
+ agg = aggregate_headlines_vectors(vectors, timestamps)
174
  return vector_to_score(agg)
175
 
176
  @app.get("/")
177
  def root():
178
+ return {"message": "Fin-senti API with Decay is running! Use POST /analyze"}
179
 
180
  @app.post("/analyze")
181
  def analyze(req: StocksRequest):