Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -12,8 +12,7 @@ from cachetools import TTLCache, cached
|
|
12 |
# CONFIG
|
13 |
# -----------------------------
|
14 |
NEWSAPI_KEY = os.environ.get("NEWSAPI_KEY", "").strip()
|
15 |
-
MAX_HEADLINES = 10 # fetch more
|
16 |
-
DECAY = 0.5 # weight for oldest headline
|
17 |
|
18 |
MODEL_A = "yiyanghkust/finbert-tone"
|
19 |
MODEL_B = "ProsusAI/finbert"
|
@@ -35,44 +34,62 @@ LABEL_MAP = {
|
|
35 |
# -----------------------------
|
36 |
stock_cache = TTLCache(maxsize=100, ttl=600)
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
# -----------------------------
|
39 |
# News fetchers
|
40 |
# -----------------------------
|
41 |
-
def fetch_news_newsapi(
|
42 |
if not NEWSAPI_KEY:
|
43 |
return []
|
44 |
url = "https://newsapi.org/v2/everything"
|
|
|
45 |
params = {
|
46 |
"q": query,
|
47 |
"language": "en",
|
48 |
-
"pageSize": limit,
|
49 |
"sortBy": "publishedAt",
|
50 |
"apiKey": NEWSAPI_KEY,
|
51 |
}
|
52 |
try:
|
53 |
r = requests.get(url, params=params, timeout=6)
|
54 |
r.raise_for_status()
|
55 |
-
articles = r.json().get("articles", [])
|
56 |
-
|
|
|
|
|
57 |
except Exception as e:
|
58 |
print(f"[NewsAPI error] {e}")
|
59 |
return []
|
60 |
|
61 |
-
def fetch_news_yfinance(
|
62 |
try:
|
63 |
-
t = yf.Ticker(
|
64 |
news_items = getattr(t, "news", None) or []
|
65 |
-
|
|
|
|
|
66 |
except Exception as e:
|
67 |
print(f"[Yahoo Finance error] {e}")
|
68 |
return []
|
69 |
|
70 |
def fetch_headlines(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
|
71 |
headlines = fetch_news_newsapi(stock, limit)
|
72 |
-
if
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
76 |
|
77 |
# -----------------------------
|
78 |
# Ensemble utilities
|
@@ -97,24 +114,12 @@ def headline_score_ensemble(headline: str) -> np.ndarray:
|
|
97 |
b = sentiment_b(headline)[0]
|
98 |
return (model_to_vector(a) + model_to_vector(b)) / 2.0
|
99 |
|
100 |
-
def
|
101 |
-
vectors = [headline_score_ensemble(h) for h in headlines]
|
102 |
if not vectors:
|
103 |
-
return np.array([0.0,
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
weighted_sum = np.zeros(3)
|
109 |
-
total_weight = 0.0
|
110 |
-
for vec, w in zip(vectors, weights):
|
111 |
-
weighted_sum += vec * w
|
112 |
-
total_weight += w
|
113 |
-
|
114 |
-
agg_vec = weighted_sum / total_weight
|
115 |
-
agg_vec = np.clip(agg_vec, 0.0, None)
|
116 |
-
total = agg_vec.sum()
|
117 |
-
return agg_vec / total if total > 0 else np.array([0.0, 1.0, 0.0])
|
118 |
|
119 |
def vector_to_score(vec: np.ndarray) -> float:
|
120 |
neg, neu, pos = vec.tolist()
|
@@ -131,9 +136,11 @@ class StocksRequest(BaseModel):
|
|
131 |
@cached(stock_cache)
|
132 |
def analyze_single_stock(stock: str) -> float | str:
|
133 |
headlines = fetch_headlines(stock)
|
134 |
-
|
135 |
-
|
136 |
-
|
|
|
|
|
137 |
return vector_to_score(agg)
|
138 |
|
139 |
@app.get("/")
|
@@ -141,7 +148,7 @@ def root():
|
|
141 |
return {"message": "Fin-senti API is running! Use POST /analyze"}
|
142 |
|
143 |
@app.post("/analyze")
|
144 |
-
def
|
145 |
results = {}
|
146 |
for stock in req.stocks:
|
147 |
results[stock] = analyze_single_stock(stock)
|
|
|
12 |
# CONFIG
|
13 |
# -----------------------------
|
14 |
NEWSAPI_KEY = os.environ.get("NEWSAPI_KEY", "").strip()
|
15 |
+
MAX_HEADLINES = 10 # fetch more for robustness
|
|
|
16 |
|
17 |
MODEL_A = "yiyanghkust/finbert-tone"
|
18 |
MODEL_B = "ProsusAI/finbert"
|
|
|
34 |
# -----------------------------
|
35 |
stock_cache = TTLCache(maxsize=100, ttl=600)
|
36 |
|
37 |
+
# -----------------------------
|
38 |
+
# Finance keywords filter
|
39 |
+
# -----------------------------
|
40 |
+
FINANCE_KEYWORDS = [
|
41 |
+
"stock", "share", "market", "profit", "loss", "earnings",
|
42 |
+
"investment", "IPO", "dividend", "trading", "NASDAQ", "NYSE"
|
43 |
+
]
|
44 |
+
|
45 |
+
def is_relevant_headline(headline: str) -> bool:
|
46 |
+
headline_lower = headline.lower()
|
47 |
+
return any(k.lower() in headline_lower for k in FINANCE_KEYWORDS)
|
48 |
+
|
49 |
# -----------------------------
|
50 |
# News fetchers
|
51 |
# -----------------------------
|
52 |
+
def fetch_news_newsapi(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
|
53 |
if not NEWSAPI_KEY:
|
54 |
return []
|
55 |
url = "https://newsapi.org/v2/everything"
|
56 |
+
query = f'"{stock}" OR ${stock}'
|
57 |
params = {
|
58 |
"q": query,
|
59 |
"language": "en",
|
60 |
+
"pageSize": limit*2,
|
61 |
"sortBy": "publishedAt",
|
62 |
"apiKey": NEWSAPI_KEY,
|
63 |
}
|
64 |
try:
|
65 |
r = requests.get(url, params=params, timeout=6)
|
66 |
r.raise_for_status()
|
67 |
+
articles = r.json().get("articles", [])
|
68 |
+
headlines = [a.get("title") for a in articles if a.get("title")]
|
69 |
+
filtered = [h for h in headlines if is_relevant_headline(h)]
|
70 |
+
return filtered[:limit]
|
71 |
except Exception as e:
|
72 |
print(f"[NewsAPI error] {e}")
|
73 |
return []
|
74 |
|
75 |
+
def fetch_news_yfinance(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
|
76 |
try:
|
77 |
+
t = yf.Ticker(stock)
|
78 |
news_items = getattr(t, "news", None) or []
|
79 |
+
headlines = [n.get("title") for n in news_items if n.get("title")]
|
80 |
+
filtered = [h for h in headlines if is_relevant_headline(h)]
|
81 |
+
return filtered[:limit]
|
82 |
except Exception as e:
|
83 |
print(f"[Yahoo Finance error] {e}")
|
84 |
return []
|
85 |
|
86 |
def fetch_headlines(stock: str, limit: int = MAX_HEADLINES) -> List[str]:
|
87 |
headlines = fetch_news_newsapi(stock, limit)
|
88 |
+
if len(headlines) < 2: # fallback if insufficient headlines
|
89 |
+
headlines_yf = fetch_news_yfinance(stock, limit)
|
90 |
+
# combine and remove duplicates
|
91 |
+
headlines = list(dict.fromkeys(headlines + headlines_yf))[:limit]
|
92 |
+
return headlines
|
93 |
|
94 |
# -----------------------------
|
95 |
# Ensemble utilities
|
|
|
114 |
b = sentiment_b(headline)[0]
|
115 |
return (model_to_vector(a) + model_to_vector(b)) / 2.0
|
116 |
|
117 |
+
def aggregate_headlines_vectors(vectors: List[np.ndarray]) -> np.ndarray:
|
|
|
118 |
if not vectors:
|
119 |
+
return np.array([0.0,1.0,0.0])
|
120 |
+
mean_vec = np.mean(vectors, axis=0)
|
121 |
+
total = mean_vec.sum()
|
122 |
+
return mean_vec / total if total > 0 else np.array([0.0,1.0,0.0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
def vector_to_score(vec: np.ndarray) -> float:
|
125 |
neg, neu, pos = vec.tolist()
|
|
|
136 |
@cached(stock_cache)
|
137 |
def analyze_single_stock(stock: str) -> float | str:
|
138 |
headlines = fetch_headlines(stock)
|
139 |
+
headlines = [h for h in headlines if h and len(h.strip()) > 10]
|
140 |
+
if not headlines:
|
141 |
+
return "NO_DATA"
|
142 |
+
vectors = [headline_score_ensemble(h) for h in headlines]
|
143 |
+
agg = aggregate_headlines_vectors(vectors)
|
144 |
return vector_to_score(agg)
|
145 |
|
146 |
@app.get("/")
|
|
|
148 |
return {"message": "Fin-senti API is running! Use POST /analyze"}
|
149 |
|
150 |
@app.post("/analyze")
|
151 |
+
def analyze(req: StocksRequest):
|
152 |
results = {}
|
153 |
for stock in req.stocks:
|
154 |
results[stock] = analyze_single_stock(stock)
|