Spaces:
Runtime error
Runtime error
Model
Browse files- README.md +4 -0
- app/model/__init__.py +0 -0
- app/model/base.py +49 -0
- app/model/tfid_lr.py +35 -0
README.md
CHANGED
|
@@ -8,6 +8,10 @@ Sentiment Analysis
|
|
| 8 |
4. Run `just run --help` to see the available commands
|
| 9 |
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
### TODO
|
| 12 |
- [ ] CLI using `click` (commands: predict, train, evaluate) with settings set via flags or environment variables
|
| 13 |
- [ ] GUI using `gradio` (tabs: predict, train, evaluate, compare, settings)
|
|
|
|
| 8 |
4. Run `just run --help` to see the available commands
|
| 9 |
|
| 10 |
|
| 11 |
+
### Required tools
|
| 12 |
+
- `just`
|
| 13 |
+
- `poetry`
|
| 14 |
+
|
| 15 |
### TODO
|
| 16 |
- [ ] CLI using `click` (commands: predict, train, evaluate) with settings set via flags or environment variables
|
| 17 |
- [ ] GUI using `gradio` (tabs: predict, train, evaluate, compare, settings)
|
app/model/__init__.py
ADDED
|
File without changes
|
app/model/base.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from abc import ABC, abstractmethod
|
| 4 |
+
from typing import TYPE_CHECKING
|
| 5 |
+
|
| 6 |
+
import joblib
|
| 7 |
+
|
| 8 |
+
if TYPE_CHECKING:
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
from sklearn.pipeline import Pipeline
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Model(ABC):
|
| 15 |
+
"""Base class for all models"""
|
| 16 |
+
|
| 17 |
+
@property
|
| 18 |
+
@abstractmethod
|
| 19 |
+
def pipeline(self) -> Pipeline:
|
| 20 |
+
"""Pipeline used for the model"""
|
| 21 |
+
...
|
| 22 |
+
|
| 23 |
+
@property
|
| 24 |
+
@abstractmethod
|
| 25 |
+
def description(self) -> str:
|
| 26 |
+
"""Description of the architecture"""
|
| 27 |
+
...
|
| 28 |
+
|
| 29 |
+
@abstractmethod
|
| 30 |
+
def _predict(self, text: str) -> int:
|
| 31 |
+
"""Predict the sentiment of the given text"""
|
| 32 |
+
...
|
| 33 |
+
|
| 34 |
+
@staticmethod
|
| 35 |
+
def from_file(path: Path) -> Model:
|
| 36 |
+
"""Load the model from the given file"""
|
| 37 |
+
return joblib.load(path)
|
| 38 |
+
|
| 39 |
+
def to_file(self, path: Path) -> None:
|
| 40 |
+
"""Save the model to the given file"""
|
| 41 |
+
joblib.dump(self, path)
|
| 42 |
+
|
| 43 |
+
def predict(self, text: str) -> int:
|
| 44 |
+
"""Perform sentiment analysis on the given text"""
|
| 45 |
+
return self._predict(text)
|
| 46 |
+
|
| 47 |
+
def train(self, x: list[str], y: list[int]) -> None:
|
| 48 |
+
"""Train the model on the given data"""
|
| 49 |
+
self.pipeline.fit(x, y)
|
app/model/tfid_lr.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
|
| 4 |
+
from sklearn.linear_model import LogisticRegression
|
| 5 |
+
from sklearn.pipeline import Pipeline
|
| 6 |
+
|
| 7 |
+
from .base import Model
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TfidfLR(Model):
|
| 11 |
+
"""Sentiment analysis model using TF-IDF and Logistic Regression"""
|
| 12 |
+
|
| 13 |
+
def __init__(self):
|
| 14 |
+
self._pipeline = Pipeline(
|
| 15 |
+
[
|
| 16 |
+
(
|
| 17 |
+
"vectorize",
|
| 18 |
+
CountVectorizer(stop_words="english", ngram_range=(1, 2), max_features=10000),
|
| 19 |
+
),
|
| 20 |
+
("tfidf", TfidfTransformer()),
|
| 21 |
+
("clf", LogisticRegression(max_iter=1000, random_state=self.rng)),
|
| 22 |
+
],
|
| 23 |
+
memory=self.cache,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
@property
|
| 27 |
+
def pipeline(self) -> Pipeline:
|
| 28 |
+
return self._pipeline
|
| 29 |
+
|
| 30 |
+
@property
|
| 31 |
+
def description(self) -> str:
|
| 32 |
+
return "TF-IDF with Logistic Regression"
|
| 33 |
+
|
| 34 |
+
def _predict(self, text: str) -> int:
|
| 35 |
+
return self.pipeline.predict([text])[0]
|