Tymec commited on
Commit
0ca5366
1 Parent(s): 63ffb6b

Slight optimizations

Browse files
Files changed (3) hide show
  1. app/data.py +7 -7
  2. app/model.py +1 -1
  3. justfile +2 -0
app/data.py CHANGED
@@ -93,19 +93,19 @@ def load_amazonreviews(merge: bool = True) -> tuple[list[str], list[int]]:
93
  raise FileNotFoundError(msg)
94
 
95
  # Load the datasets
 
96
  with bz2.BZ2File(AMAZONREVIEWS_PATH[1]) as train_file:
97
- train_data = [line.decode("utf-8") for line in train_file]
98
 
99
- test_data = []
100
  if merge:
101
  with bz2.BZ2File(AMAZONREVIEWS_PATH[0]) as test_file:
102
- test_data = [line.decode("utf-8") for line in test_file]
103
-
104
- # Merge the datasets
105
- data = train_data + test_data
106
 
107
  # Split the data into labels and text
108
- labels, texts = zip(*(line.split(" ", 1) for line in data))
 
 
 
109
 
110
  # Map sentiment values
111
  sentiments = [int(label.split("__label__")[1]) - 1 for label in labels]
 
93
  raise FileNotFoundError(msg)
94
 
95
  # Load the datasets
96
+ dataset = []
97
  with bz2.BZ2File(AMAZONREVIEWS_PATH[1]) as train_file:
98
+ dataset.extend([line.decode("utf-8") for line in train_file])
99
 
 
100
  if merge:
101
  with bz2.BZ2File(AMAZONREVIEWS_PATH[0]) as test_file:
102
+ dataset.extend([line.decode("utf-8") for line in test_file])
 
 
 
103
 
104
  # Split the data into labels and text
105
+ labels, texts = zip(*(line.split(" ", 1) for line in dataset)) # NOTE: Occasionally OOM
106
+
107
+ # Free up memory
108
+ del dataset
109
 
110
  # Map sentiment values
111
  sentiments = [int(label.split("__label__")[1]) - 1 for label in labels]
app/model.py CHANGED
@@ -121,7 +121,7 @@ def create_model(
121
  token_pattern=None,
122
  ),
123
  ),
124
- ("classifier", LogisticRegression(max_iter=1000, C=1.0, random_state=seed)),
125
  ],
126
  memory=Memory(CACHE_DIR, verbose=0),
127
  verbose=verbose,
 
121
  token_pattern=None,
122
  ),
123
  ),
124
+ ("classifier", LogisticRegression(max_iter=1000, random_state=seed)),
125
  ],
126
  memory=Memory(CACHE_DIR, verbose=0),
127
  verbose=verbose,
justfile CHANGED
@@ -13,9 +13,11 @@
13
  @install-dev:
14
  poetry self add poetry-plugin-export
15
  poetry install
 
16
 
17
  @requirements:
18
  poetry export -f requirements.txt --output requirements.txt --without dev
 
19
 
20
  [no-exit-message]
21
  @app *ARGS:
 
13
  @install-dev:
14
  poetry self add poetry-plugin-export
15
  poetry install
16
+ poetry run spacy download en_core_web_sm
17
 
18
  @requirements:
19
  poetry export -f requirements.txt --output requirements.txt --without dev
20
+ poetry export -f requirements.txt --output requirements-dev.txt
21
 
22
  [no-exit-message]
23
  @app *ARGS: