Ali Kefia commited on
Commit
f2f47ac
·
1 Parent(s): 4c31c97

linear -> poly

Browse files
imgs/confusion_matrix.png CHANGED
imgs/roc_curve.png CHANGED
model/model.pickle CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73bf71607b6b99d8576a79ec96cdf97e008134e7d348477f93b8cdcf057db19e
3
- size 3411728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2dc963399b56d847a6b5e93ab807d18d1dbc8b89285b5b151dc17e6608ef72d
3
+ size 3182918
train.py CHANGED
@@ -54,7 +54,7 @@ def save_confusion_matrix(y: NDArray, pred: NDArray):
54
 
55
  def main() -> None:
56
  train_df = pl.read_parquet(DATA / "train.parquet")
57
- clf = SVC(kernel="linear", probability=True)
58
  clf.fit(
59
  train_df.get_column("embeds").to_numpy(),
60
  train_df.get_column("is_news").to_numpy(),
 
54
 
55
  def main() -> None:
56
  train_df = pl.read_parquet(DATA / "train.parquet")
57
+ clf = SVC(kernel="poly", probability=True)
58
  clf.fit(
59
  train_df.get_column("embeds").to_numpy(),
60
  train_df.get_column("is_news").to_numpy(),
usage.py CHANGED
@@ -18,11 +18,12 @@ def get_model():
18
 
19
 
20
  def get_record():
21
- df = pl.read_parquet(DATA / "eval.parquet")
 
 
22
  return {
23
- col: val
24
- for col, val in zip(df.columns, df.sample().row(0))
25
- if col in ["text", "is_news"]
26
  }
27
 
28
 
 
18
 
19
 
20
  def get_record():
21
+ df = pl.read_csv(DATA / "eval.csv")
22
+ raw = {col: val for col, val in zip(df.columns, df.sample().row(0))}
23
+ text_fields = ["meta_title", "meta_description", "content"]
24
  return {
25
+ "text": "\n\n".join(raw[k] for k in text_fields),
26
+ "is_news": raw["is_news_article"],
 
27
  }
28
 
29