File size: 1,183 Bytes
d4ef46b
 
85ac990
 
 
667fe9d
 
85ac990
183f8cd
 
85ac990
183f8cd
 
b0ade1a
183f8cd
85ac990
183f8cd
 
447f97e
85ac990
 
 
b0ade1a
85ac990
667fe9d
85ac990
 
667fe9d
e50b20c
183f8cd
e50b20c
e1645d7
183f8cd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
"""Constants used by the application."""

from __future__ import annotations

import os
from pathlib import Path

CACHE_DIR = Path(os.getenv("CACHE_DIR", ".cache"))
CACHE_DIR.mkdir(exist_ok=True, parents=True)

DATA_DIR = Path(os.getenv("DATA_DIR", "data"))
DATA_DIR.mkdir(exist_ok=True, parents=True)

MODEL_DIR = Path(os.getenv("MODEL_DIR", "models"))
MODEL_DIR.mkdir(exist_ok=True, parents=True)

TOKENIZER_CACHE_DIR = CACHE_DIR / "tokenizer"
TOKENIZER_CACHE_DIR.mkdir(exist_ok=True, parents=True)

SENTIMENT140_PATH = DATA_DIR / "sentiment140.csv"
SENTIMENT140_URL = "https://www.kaggle.com/datasets/kazanova/sentiment140"

AMAZONREVIEWS_PATH = DATA_DIR / "amazonreviews.txt.bz2"
AMAZONREVIEWS_URL = "https://www.kaggle.com/datasets/bittlingmayer/amazonreviews"

IMDB50K_PATH = DATA_DIR / "imdb50k.csv"
IMDB50K_URL = "https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews"

TEST_DATASET_PATH = DATA_DIR / "test.csv"
TEST_DATASET_URL = "https://github.com/Tymec/sentiment-analysis/blob/main/data/test.csv?raw=true"

SLANGMAP_PATH = DATA_DIR / "slang.json"
SLANGMAP_URL = "https://github.com/Tymec/sentiment-analysis/blob/main/data/slang.json?raw=true"