Ali Kefia
commited on
Commit
·
f2f47ac
1
Parent(s):
4c31c97
linear -> poly
Browse files- imgs/confusion_matrix.png +0 -0
- imgs/roc_curve.png +0 -0
- model/model.pickle +2 -2
- train.py +1 -1
- usage.py +5 -4
imgs/confusion_matrix.png
CHANGED
![]() |
![]() |
imgs/roc_curve.png
CHANGED
![]() |
![]() |
model/model.pickle
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2dc963399b56d847a6b5e93ab807d18d1dbc8b89285b5b151dc17e6608ef72d
|
3 |
+
size 3182918
|
train.py
CHANGED
@@ -54,7 +54,7 @@ def save_confusion_matrix(y: NDArray, pred: NDArray):
|
|
54 |
|
55 |
def main() -> None:
|
56 |
train_df = pl.read_parquet(DATA / "train.parquet")
|
57 |
-
clf = SVC(kernel="
|
58 |
clf.fit(
|
59 |
train_df.get_column("embeds").to_numpy(),
|
60 |
train_df.get_column("is_news").to_numpy(),
|
|
|
54 |
|
55 |
def main() -> None:
|
56 |
train_df = pl.read_parquet(DATA / "train.parquet")
|
57 |
+
clf = SVC(kernel="poly", probability=True)
|
58 |
clf.fit(
|
59 |
train_df.get_column("embeds").to_numpy(),
|
60 |
train_df.get_column("is_news").to_numpy(),
|
usage.py
CHANGED
@@ -18,11 +18,12 @@ def get_model():
|
|
18 |
|
19 |
|
20 |
def get_record():
|
21 |
-
df = pl.
|
|
|
|
|
22 |
return {
|
23 |
-
|
24 |
-
|
25 |
-
if col in ["text", "is_news"]
|
26 |
}
|
27 |
|
28 |
|
|
|
18 |
|
19 |
|
20 |
def get_record():
|
21 |
+
df = pl.read_csv(DATA / "eval.csv")
|
22 |
+
raw = {col: val for col, val in zip(df.columns, df.sample().row(0))}
|
23 |
+
text_fields = ["meta_title", "meta_description", "content"]
|
24 |
return {
|
25 |
+
"text": "\n\n".join(raw[k] for k in text_fields),
|
26 |
+
"is_news": raw["is_news_article"],
|
|
|
27 |
}
|
28 |
|
29 |
|