james-burton commited on
Commit
3752cdf
·
verified ·
1 Parent(s): abf2826

End of training

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. .gitkeep +0 -0
  3. 0.1-testing.ipynb +0 -0
  4. 0.10-rethinking_OM_splits.ipynb +770 -0
  5. 0.11-testing_bm_split_sizes.ipynb +644 -0
  6. 0.12-get_wandb_results.ipynb +0 -0
  7. 0.13-bm_dates_col.ipynb +0 -0
  8. 0.2-testing_image_scraping.ipynb +140 -0
  9. 0.3-testing_csv_join.ipynb +973 -0
  10. 0.4-testing_tif_images.ipynb +71 -0
  11. 0.5-testing_transparent_background.ipynb +321 -0
  12. 0.7Mahnaz-efficientnet.ipynb +492 -0
  13. 0.8-testing_segmented_data.ipynb +0 -0
  14. 0.9-testing_om_datasets.ipynb +459 -0
  15. 1.0-checking_dataset_size.ipynb +559 -0
  16. 1.1-exploring_OM_image_matching.ipynb +0 -0
  17. 2.0-assessing_OM_dataset.ipynb +1468 -0
  18. 3.0-efficientnet_example.ipynb +1062 -0
  19. 4.0-assessing_BM_dataset.ipynb +0 -0
  20. 5.0-assessing_date_prediction.ipynb +0 -0
  21. README.md +47 -0
  22. config.json +88 -0
  23. material_min3.csv +13 -0
  24. material_min3_max1.csv +13 -0
  25. material_min4.csv +13 -0
  26. material_min4_max1.csv +13 -0
  27. material_min5.csv +13 -0
  28. material_min5_max1.csv +13 -0
  29. material_min6.csv +13 -0
  30. material_min6_max1.csv +13 -0
  31. material_x_plus3Ds.csv +13 -0
  32. model.safetensors +3 -0
  33. object_name_min3.csv +1 -0
  34. object_name_min3_max1.csv +1 -0
  35. object_name_min4.csv +19 -0
  36. object_name_min4_max1.csv +19 -0
  37. object_name_min5.csv +1 -0
  38. object_name_min5_max1.csv +1 -0
  39. object_name_min6.csv +1 -0
  40. object_name_min6_max1.csv +1 -0
  41. preprocessor_config.json +29 -0
  42. results.pkl +3 -0
  43. results_local.pkl +3 -0
  44. training_args.bin +3 -0
  45. wandb/debug-cli.james.log +0 -0
  46. wandb/debug-internal.log +0 -0
  47. wandb/debug.log +0 -0
  48. wandb/run-20240214_112422-hfwsgqj3/files/config.yaml +0 -0
  49. wandb/run-20240214_112422-hfwsgqj3/files/output.log +33 -0
  50. wandb/run-20240214_112422-hfwsgqj3/files/requirements.txt +202 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wandb/run-20240214_112422-hfwsgqj3/run-hfwsgqj3.wandb filter=lfs diff=lfs merge=lfs -text
.gitkeep ADDED
File without changes
0.1-testing.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
0.10-rethinking_OM_splits.ipynb ADDED
@@ -0,0 +1,770 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import os\n",
11
+ "from sklearn.model_selection import train_test_split"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 2,
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n",
21
+ "obj2info = pd.read_csv(\"../data/processed/OM_obj_to_info.csv\")\n",
22
+ "\n",
23
+ "\n",
24
+ "# Could eventually do something with these columns, but need cleaning first\n",
25
+ "obj2info.drop(\n",
26
+ " columns=[\"number_of_parts\", \"production.date.start\", \"production.date.end\", \"obj_num_old\"],\n",
27
+ " inplace=True,\n",
28
+ ")\n",
29
+ "\n",
30
+ "file2obj[\"image\"] = file2obj.apply(lambda x: os.path.join(x[\"root\"], x[\"file\"]), axis=1)\n",
31
+ "# file2obj.rename(columns={\"obj_num\": \"label\"}, inplace=True)\n",
32
+ "\n",
33
+ "join_df = file2obj[[\"obj_num\", \"file\", \"image\", \"root\"]].merge(\n",
34
+ " obj2info, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n",
35
+ ")"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": 3,
41
+ "metadata": {},
42
+ "outputs": [
43
+ {
44
+ "data": {
45
+ "text/plain": [
46
+ "0 data/raw/images/fulling_mill/1985\n",
47
+ "1 data/raw/images/fulling_mill/1985\n",
48
+ "2 data/raw/images/fulling_mill/1985\n",
49
+ "3 data/raw/images/fulling_mill/1985\n",
50
+ "4 data/raw/images/fulling_mill/1985\n",
51
+ " ... \n",
52
+ "37300 data/raw/images/egyptian/2014\n",
53
+ "37301 data/raw/images/egyptian/2014\n",
54
+ "37302 data/raw/images/egyptian/2014\n",
55
+ "37303 data/raw/images/egyptian/1963\n",
56
+ "37304 data/raw/images/egyptian/1963\n",
57
+ "Name: root, Length: 37305, dtype: object"
58
+ ]
59
+ },
60
+ "execution_count": 3,
61
+ "metadata": {},
62
+ "output_type": "execute_result"
63
+ }
64
+ ],
65
+ "source": [
66
+ "file2obj[\"root\"]"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 4,
72
+ "metadata": {},
73
+ "outputs": [
74
+ {
75
+ "data": {
76
+ "text/html": [
77
+ "<div>\n",
78
+ "<style scoped>\n",
79
+ " .dataframe tbody tr th:only-of-type {\n",
80
+ " vertical-align: middle;\n",
81
+ " }\n",
82
+ "\n",
83
+ " .dataframe tbody tr th {\n",
84
+ " vertical-align: top;\n",
85
+ " }\n",
86
+ "\n",
87
+ " .dataframe thead th {\n",
88
+ " text-align: right;\n",
89
+ " }\n",
90
+ "</style>\n",
91
+ "<table border=\"1\" class=\"dataframe\">\n",
92
+ " <thead>\n",
93
+ " <tr style=\"text-align: right;\">\n",
94
+ " <th></th>\n",
95
+ " <th>obj_num</th>\n",
96
+ " <th>description</th>\n",
97
+ " <th>object_name</th>\n",
98
+ " <th>other_name</th>\n",
99
+ " <th>material</th>\n",
100
+ " <th>production.period</th>\n",
101
+ " <th>production.place</th>\n",
102
+ " </tr>\n",
103
+ " </thead>\n",
104
+ " <tbody>\n",
105
+ " <tr>\n",
106
+ " <th>0</th>\n",
107
+ " <td>eg3</td>\n",
108
+ " <td>squat shouldered jar, no rim</td>\n",
109
+ " <td>bowls</td>\n",
110
+ " <td>bowl</td>\n",
111
+ " <td>limestone</td>\n",
112
+ " <td>1st Dynasty</td>\n",
113
+ " <td>Egypt</td>\n",
114
+ " </tr>\n",
115
+ " <tr>\n",
116
+ " <th>1</th>\n",
117
+ " <td>eg64</td>\n",
118
+ " <td>axe-head</td>\n",
119
+ " <td>axes: woodworking tools</td>\n",
120
+ " <td>axe-head</td>\n",
121
+ " <td>granite</td>\n",
122
+ " <td>NaN</td>\n",
123
+ " <td>Egypt</td>\n",
124
+ " </tr>\n",
125
+ " <tr>\n",
126
+ " <th>2</th>\n",
127
+ " <td>eg71</td>\n",
128
+ " <td>the working end of a fish tail knife with pres...</td>\n",
129
+ " <td>knives</td>\n",
130
+ " <td>knife</td>\n",
131
+ " <td>Flint/Chert</td>\n",
132
+ " <td>Naqada II</td>\n",
133
+ " <td>Egypt</td>\n",
134
+ " </tr>\n",
135
+ " <tr>\n",
136
+ " <th>3</th>\n",
137
+ " <td>eg75</td>\n",
138
+ " <td>seated figure of priest holding unrolled papyr...</td>\n",
139
+ " <td>Human Figurine</td>\n",
140
+ " <td>imhotep figurine</td>\n",
141
+ " <td>bronze</td>\n",
142
+ " <td>Late Period</td>\n",
143
+ " <td>Egypt</td>\n",
144
+ " </tr>\n",
145
+ " <tr>\n",
146
+ " <th>4</th>\n",
147
+ " <td>durom.1971.78</td>\n",
148
+ " <td>seated woman, inset eyes (lost), headdress had...</td>\n",
149
+ " <td>Human Figurine</td>\n",
150
+ " <td>Hathor figurine</td>\n",
151
+ " <td>bronze</td>\n",
152
+ " <td>Late Period</td>\n",
153
+ " <td>Egypt</td>\n",
154
+ " </tr>\n",
155
+ " <tr>\n",
156
+ " <th>...</th>\n",
157
+ " <td>...</td>\n",
158
+ " <td>...</td>\n",
159
+ " <td>...</td>\n",
160
+ " <td>...</td>\n",
161
+ " <td>...</td>\n",
162
+ " <td>...</td>\n",
163
+ " <td>...</td>\n",
164
+ " </tr>\n",
165
+ " <tr>\n",
166
+ " <th>12349</th>\n",
167
+ " <td>durma.2020.3.2562</td>\n",
168
+ " <td>A silver Roman coin which is a part of the Pie...</td>\n",
169
+ " <td>coins</td>\n",
170
+ " <td>NaN</td>\n",
171
+ " <td>metal</td>\n",
172
+ " <td>Roman</td>\n",
173
+ " <td>Rome</td>\n",
174
+ " </tr>\n",
175
+ " <tr>\n",
176
+ " <th>12350</th>\n",
177
+ " <td>durma.2020.3.2060</td>\n",
178
+ " <td>A silver Roman coin which is a part of the Pie...</td>\n",
179
+ " <td>coins</td>\n",
180
+ " <td>NaN</td>\n",
181
+ " <td>metal</td>\n",
182
+ " <td>Roman</td>\n",
183
+ " <td>NaN</td>\n",
184
+ " </tr>\n",
185
+ " <tr>\n",
186
+ " <th>12351</th>\n",
187
+ " <td>durma.2020.3.1446</td>\n",
188
+ " <td>A silver Roman coin which is a part of the Pie...</td>\n",
189
+ " <td>coins</td>\n",
190
+ " <td>NaN</td>\n",
191
+ " <td>metal</td>\n",
192
+ " <td>Roman</td>\n",
193
+ " <td>Rome</td>\n",
194
+ " </tr>\n",
195
+ " <tr>\n",
196
+ " <th>12352</th>\n",
197
+ " <td>durma.2020.3.2042</td>\n",
198
+ " <td>A silver Roman coin which is a part of the Pie...</td>\n",
199
+ " <td>coins</td>\n",
200
+ " <td>NaN</td>\n",
201
+ " <td>metal</td>\n",
202
+ " <td>Roman</td>\n",
203
+ " <td>Rome</td>\n",
204
+ " </tr>\n",
205
+ " <tr>\n",
206
+ " <th>12353</th>\n",
207
+ " <td>durma.2020.3.2072</td>\n",
208
+ " <td>A silver Roman coin which is a part of the Pie...</td>\n",
209
+ " <td>coins</td>\n",
210
+ " <td>NaN</td>\n",
211
+ " <td>metal</td>\n",
212
+ " <td>Roman</td>\n",
213
+ " <td>Rome</td>\n",
214
+ " </tr>\n",
215
+ " </tbody>\n",
216
+ "</table>\n",
217
+ "<p>11673 rows × 7 columns</p>\n",
218
+ "</div>"
219
+ ],
220
+ "text/plain": [
221
+ " obj_num description \\\n",
222
+ "0 eg3 squat shouldered jar, no rim \n",
223
+ "1 eg64 axe-head \n",
224
+ "2 eg71 the working end of a fish tail knife with pres... \n",
225
+ "3 eg75 seated figure of priest holding unrolled papyr... \n",
226
+ "4 durom.1971.78 seated woman, inset eyes (lost), headdress had... \n",
227
+ "... ... ... \n",
228
+ "12349 durma.2020.3.2562 A silver Roman coin which is a part of the Pie... \n",
229
+ "12350 durma.2020.3.2060 A silver Roman coin which is a part of the Pie... \n",
230
+ "12351 durma.2020.3.1446 A silver Roman coin which is a part of the Pie... \n",
231
+ "12352 durma.2020.3.2042 A silver Roman coin which is a part of the Pie... \n",
232
+ "12353 durma.2020.3.2072 A silver Roman coin which is a part of the Pie... \n",
233
+ "\n",
234
+ " object_name other_name material \\\n",
235
+ "0 bowls bowl limestone \n",
236
+ "1 axes: woodworking tools axe-head granite \n",
237
+ "2 knives knife Flint/Chert \n",
238
+ "3 Human Figurine imhotep figurine bronze \n",
239
+ "4 Human Figurine Hathor figurine bronze \n",
240
+ "... ... ... ... \n",
241
+ "12349 coins NaN metal \n",
242
+ "12350 coins NaN metal \n",
243
+ "12351 coins NaN metal \n",
244
+ "12352 coins NaN metal \n",
245
+ "12353 coins NaN metal \n",
246
+ "\n",
247
+ " production.period production.place \n",
248
+ "0 1st Dynasty Egypt \n",
249
+ "1 NaN Egypt \n",
250
+ "2 Naqada II Egypt \n",
251
+ "3 Late Period Egypt \n",
252
+ "4 Late Period Egypt \n",
253
+ "... ... ... \n",
254
+ "12349 Roman Rome \n",
255
+ "12350 Roman NaN \n",
256
+ "12351 Roman Rome \n",
257
+ "12352 Roman Rome \n",
258
+ "12353 Roman Rome \n",
259
+ "\n",
260
+ "[11673 rows x 7 columns]"
261
+ ]
262
+ },
263
+ "execution_count": 4,
264
+ "metadata": {},
265
+ "output_type": "execute_result"
266
+ }
267
+ ],
268
+ "source": [
269
+ "obj2info.dropna(subset=[\"material\", \"description\"], inplace=False)"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "code",
274
+ "execution_count": 5,
275
+ "metadata": {},
276
+ "outputs": [],
277
+ "source": [
278
+ "label_col = \"material\"\n",
279
+ "\n",
280
+ "o2i_lim = obj2info.dropna(subset=[label_col, \"description\"], inplace=False)\n",
281
+ "\n",
282
+ "num_counts = o2i_lim[label_col].value_counts()\n",
283
+ "for lower_lim in [3]:\n",
284
+ " o2i_lim = o2i_lim[o2i_lim[label_col].isin(num_counts[num_counts > lower_lim].index)]\n",
285
+ "train, val_test = train_test_split(\n",
286
+ " o2i_lim, stratify=o2i_lim[label_col], test_size=0.4, random_state=42\n",
287
+ ")\n",
288
+ "val, test = train_test_split(\n",
289
+ " val_test, stratify=val_test[label_col], test_size=0.8, random_state=42\n",
290
+ ")"
291
+ ]
292
+ },
293
+ {
294
+ "cell_type": "code",
295
+ "execution_count": 6,
296
+ "metadata": {},
297
+ "outputs": [],
298
+ "source": [
299
+ "from datasets import Dataset, DatasetDict"
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": 7,
305
+ "metadata": {},
306
+ "outputs": [],
307
+ "source": [
308
+ "ds = Dataset.from_pandas(join_df).to_pandas()"
309
+ ]
310
+ },
311
+ {
312
+ "cell_type": "code",
313
+ "execution_count": 9,
314
+ "metadata": {},
315
+ "outputs": [
316
+ {
317
+ "data": {
318
+ "text/html": [
319
+ "<div>\n",
320
+ "<style scoped>\n",
321
+ " .dataframe tbody tr th:only-of-type {\n",
322
+ " vertical-align: middle;\n",
323
+ " }\n",
324
+ "\n",
325
+ " .dataframe tbody tr th {\n",
326
+ " vertical-align: top;\n",
327
+ " }\n",
328
+ "\n",
329
+ " .dataframe thead th {\n",
330
+ " text-align: right;\n",
331
+ " }\n",
332
+ "</style>\n",
333
+ "<table border=\"1\" class=\"dataframe\">\n",
334
+ " <thead>\n",
335
+ " <tr style=\"text-align: right;\">\n",
336
+ " <th></th>\n",
337
+ " <th>obj_num</th>\n",
338
+ " <th>file</th>\n",
339
+ " <th>image</th>\n",
340
+ " <th>root</th>\n",
341
+ " <th>description</th>\n",
342
+ " <th>object_name</th>\n",
343
+ " <th>other_name</th>\n",
344
+ " <th>material</th>\n",
345
+ " <th>production.period</th>\n",
346
+ " <th>production.place</th>\n",
347
+ " </tr>\n",
348
+ " </thead>\n",
349
+ " <tbody>\n",
350
+ " <tr>\n",
351
+ " <th>0</th>\n",
352
+ " <td>durma.1985.15.68</td>\n",
353
+ " <td>1985.15.68.jpg</td>\n",
354
+ " <td>data/raw/images/fulling_mill/1985/1985.15.68.jpg</td>\n",
355
+ " <td>data/raw/images/fulling_mill/1985</td>\n",
356
+ " <td>2 fragments of a bowl with open fret work at t...</td>\n",
357
+ " <td>None</td>\n",
358
+ " <td>Rim Sherds</td>\n",
359
+ " <td>pottery</td>\n",
360
+ " <td>Post-Medieval</td>\n",
361
+ " <td>None</td>\n",
362
+ " </tr>\n",
363
+ " <tr>\n",
364
+ " <th>1</th>\n",
365
+ " <td>durma.1985.52.37</td>\n",
366
+ " <td>1985.52.37.ff2.jpg</td>\n",
367
+ " <td>data/raw/images/fulling_mill/1985/1985.52.37.f...</td>\n",
368
+ " <td>data/raw/images/fulling_mill/1985</td>\n",
369
+ " <td>Reconstructed small vessel (many pieces with s...</td>\n",
370
+ " <td>pottery</td>\n",
371
+ " <td>Pottery</td>\n",
372
+ " <td>pottery</td>\n",
373
+ " <td>Roman</td>\n",
374
+ " <td>None</td>\n",
375
+ " </tr>\n",
376
+ " <tr>\n",
377
+ " <th>2</th>\n",
378
+ " <td>durma.1985.81.4496</td>\n",
379
+ " <td>1985.81.4496 d2.jpg</td>\n",
380
+ " <td>data/raw/images/fulling_mill/1985/1985.81.4496...</td>\n",
381
+ " <td>data/raw/images/fulling_mill/1985</td>\n",
382
+ " <td>Fragment of a Samian beaker. Panell decoration...</td>\n",
383
+ " <td>vessels</td>\n",
384
+ " <td>pottery</td>\n",
385
+ " <td>pottery</td>\n",
386
+ " <td>Roman</td>\n",
387
+ " <td>None</td>\n",
388
+ " </tr>\n",
389
+ " <tr>\n",
390
+ " <th>3</th>\n",
391
+ " <td>durma.1985.9.1</td>\n",
392
+ " <td>1985.9.1.1-d4.jpg</td>\n",
393
+ " <td>data/raw/images/fulling_mill/1985/1985.9.1.1-d...</td>\n",
394
+ " <td>data/raw/images/fulling_mill/1985</td>\n",
395
+ " <td>2 Fragmentary Saxon Cinerary Urns + 1 relative...</td>\n",
396
+ " <td>None</td>\n",
397
+ " <td>Cinerary Urns</td>\n",
398
+ " <td>pottery</td>\n",
399
+ " <td>Saxon</td>\n",
400
+ " <td>None</td>\n",
401
+ " </tr>\n",
402
+ " <tr>\n",
403
+ " <th>4</th>\n",
404
+ " <td>durma.1985.52.37</td>\n",
405
+ " <td>1985.52.37.sf2.jpg</td>\n",
406
+ " <td>data/raw/images/fulling_mill/1985/1985.52.37.s...</td>\n",
407
+ " <td>data/raw/images/fulling_mill/1985</td>\n",
408
+ " <td>Reconstructed small vessel (many pieces with s...</td>\n",
409
+ " <td>pottery</td>\n",
410
+ " <td>Pottery</td>\n",
411
+ " <td>pottery</td>\n",
412
+ " <td>Roman</td>\n",
413
+ " <td>None</td>\n",
414
+ " </tr>\n",
415
+ " <tr>\n",
416
+ " <th>...</th>\n",
417
+ " <td>...</td>\n",
418
+ " <td>...</td>\n",
419
+ " <td>...</td>\n",
420
+ " <td>...</td>\n",
421
+ " <td>...</td>\n",
422
+ " <td>...</td>\n",
423
+ " <td>...</td>\n",
424
+ " <td>...</td>\n",
425
+ " <td>...</td>\n",
426
+ " <td>...</td>\n",
427
+ " </tr>\n",
428
+ " <tr>\n",
429
+ " <th>37300</th>\n",
430
+ " <td>durom.2014.1.2</td>\n",
431
+ " <td>2014.1.2 bb.jpg</td>\n",
432
+ " <td>data/raw/images/egyptian/2014/2014.1.2 bb.jpg</td>\n",
433
+ " <td>data/raw/images/egyptian/2014</td>\n",
434
+ " <td>One of a collection of 162 flint tools. Brown,...</td>\n",
435
+ " <td>blades</td>\n",
436
+ " <td>None</td>\n",
437
+ " <td>Flint/Chert</td>\n",
438
+ " <td>Neolithic Period</td>\n",
439
+ " <td>Egypt</td>\n",
440
+ " </tr>\n",
441
+ " <tr>\n",
442
+ " <th>37301</th>\n",
443
+ " <td>durom.2014.1.71</td>\n",
444
+ " <td>2014.1.71 ll.jpg</td>\n",
445
+ " <td>data/raw/images/egyptian/2014/2014.1.71 ll.jpg</td>\n",
446
+ " <td>data/raw/images/egyptian/2014</td>\n",
447
+ " <td>One of a collection of 162 flint tools. Large,...</td>\n",
448
+ " <td>axes: woodworking tools</td>\n",
449
+ " <td>None</td>\n",
450
+ " <td>Flint/Chert</td>\n",
451
+ " <td>Neolithic Period</td>\n",
452
+ " <td>Egypt</td>\n",
453
+ " </tr>\n",
454
+ " <tr>\n",
455
+ " <th>37302</th>\n",
456
+ " <td>durom.2014.1.2</td>\n",
457
+ " <td>2014.1.2 rr.jpg</td>\n",
458
+ " <td>data/raw/images/egyptian/2014/2014.1.2 rr.jpg</td>\n",
459
+ " <td>data/raw/images/egyptian/2014</td>\n",
460
+ " <td>One of a collection of 162 flint tools. Brown,...</td>\n",
461
+ " <td>blades</td>\n",
462
+ " <td>None</td>\n",
463
+ " <td>Flint/Chert</td>\n",
464
+ " <td>Neolithic Period</td>\n",
465
+ " <td>Egypt</td>\n",
466
+ " </tr>\n",
467
+ " <tr>\n",
468
+ " <th>37303</th>\n",
469
+ " <td>durom.1963.4</td>\n",
470
+ " <td>1963.4.jpg</td>\n",
471
+ " <td>data/raw/images/egyptian/1963/1963.4.jpg</td>\n",
472
+ " <td>data/raw/images/egyptian/1963</td>\n",
473
+ " <td>The woman is dressed in Qing dynasty style and...</td>\n",
474
+ " <td>figures</td>\n",
475
+ " <td>牙雕母婴像</td>\n",
476
+ " <td>ivory</td>\n",
477
+ " <td>late Qing dynasty</td>\n",
478
+ " <td>China</td>\n",
479
+ " </tr>\n",
480
+ " <tr>\n",
481
+ " <th>37304</th>\n",
482
+ " <td>durom.1963.4</td>\n",
483
+ " <td>1963.4.2.jpg</td>\n",
484
+ " <td>data/raw/images/egyptian/1963/1963.4.2.jpg</td>\n",
485
+ " <td>data/raw/images/egyptian/1963</td>\n",
486
+ " <td>The woman is dressed in Qing dynasty style and...</td>\n",
487
+ " <td>figures</td>\n",
488
+ " <td>牙雕母婴像</td>\n",
489
+ " <td>ivory</td>\n",
490
+ " <td>late Qing dynasty</td>\n",
491
+ " <td>China</td>\n",
492
+ " </tr>\n",
493
+ " </tbody>\n",
494
+ "</table>\n",
495
+ "<p>37305 rows × 10 columns</p>\n",
496
+ "</div>"
497
+ ],
498
+ "text/plain": [
499
+ " obj_num file \\\n",
500
+ "0 durma.1985.15.68 1985.15.68.jpg \n",
501
+ "1 durma.1985.52.37 1985.52.37.ff2.jpg \n",
502
+ "2 durma.1985.81.4496 1985.81.4496 d2.jpg \n",
503
+ "3 durma.1985.9.1 1985.9.1.1-d4.jpg \n",
504
+ "4 durma.1985.52.37 1985.52.37.sf2.jpg \n",
505
+ "... ... ... \n",
506
+ "37300 durom.2014.1.2 2014.1.2 bb.jpg \n",
507
+ "37301 durom.2014.1.71 2014.1.71 ll.jpg \n",
508
+ "37302 durom.2014.1.2 2014.1.2 rr.jpg \n",
509
+ "37303 durom.1963.4 1963.4.jpg \n",
510
+ "37304 durom.1963.4 1963.4.2.jpg \n",
511
+ "\n",
512
+ " image \\\n",
513
+ "0 data/raw/images/fulling_mill/1985/1985.15.68.jpg \n",
514
+ "1 data/raw/images/fulling_mill/1985/1985.52.37.f... \n",
515
+ "2 data/raw/images/fulling_mill/1985/1985.81.4496... \n",
516
+ "3 data/raw/images/fulling_mill/1985/1985.9.1.1-d... \n",
517
+ "4 data/raw/images/fulling_mill/1985/1985.52.37.s... \n",
518
+ "... ... \n",
519
+ "37300 data/raw/images/egyptian/2014/2014.1.2 bb.jpg \n",
520
+ "37301 data/raw/images/egyptian/2014/2014.1.71 ll.jpg \n",
521
+ "37302 data/raw/images/egyptian/2014/2014.1.2 rr.jpg \n",
522
+ "37303 data/raw/images/egyptian/1963/1963.4.jpg \n",
523
+ "37304 data/raw/images/egyptian/1963/1963.4.2.jpg \n",
524
+ "\n",
525
+ " root \\\n",
526
+ "0 data/raw/images/fulling_mill/1985 \n",
527
+ "1 data/raw/images/fulling_mill/1985 \n",
528
+ "2 data/raw/images/fulling_mill/1985 \n",
529
+ "3 data/raw/images/fulling_mill/1985 \n",
530
+ "4 data/raw/images/fulling_mill/1985 \n",
531
+ "... ... \n",
532
+ "37300 data/raw/images/egyptian/2014 \n",
533
+ "37301 data/raw/images/egyptian/2014 \n",
534
+ "37302 data/raw/images/egyptian/2014 \n",
535
+ "37303 data/raw/images/egyptian/1963 \n",
536
+ "37304 data/raw/images/egyptian/1963 \n",
537
+ "\n",
538
+ " description \\\n",
539
+ "0 2 fragments of a bowl with open fret work at t... \n",
540
+ "1 Reconstructed small vessel (many pieces with s... \n",
541
+ "2 Fragment of a Samian beaker. Panell decoration... \n",
542
+ "3 2 Fragmentary Saxon Cinerary Urns + 1 relative... \n",
543
+ "4 Reconstructed small vessel (many pieces with s... \n",
544
+ "... ... \n",
545
+ "37300 One of a collection of 162 flint tools. Brown,... \n",
546
+ "37301 One of a collection of 162 flint tools. Large,... \n",
547
+ "37302 One of a collection of 162 flint tools. Brown,... \n",
548
+ "37303 The woman is dressed in Qing dynasty style and... \n",
549
+ "37304 The woman is dressed in Qing dynasty style and... \n",
550
+ "\n",
551
+ " object_name other_name material production.period \\\n",
552
+ "0 None Rim Sherds pottery Post-Medieval \n",
553
+ "1 pottery Pottery pottery Roman \n",
554
+ "2 vessels pottery pottery Roman \n",
555
+ "3 None Cinerary Urns pottery Saxon \n",
556
+ "4 pottery Pottery pottery Roman \n",
557
+ "... ... ... ... ... \n",
558
+ "37300 blades None Flint/Chert Neolithic Period \n",
559
+ "37301 axes: woodworking tools None Flint/Chert Neolithic Period \n",
560
+ "37302 blades None Flint/Chert Neolithic Period \n",
561
+ "37303 figures 牙雕母婴像 ivory late Qing dynasty \n",
562
+ "37304 figures 牙雕母婴像 ivory late Qing dynasty \n",
563
+ "\n",
564
+ " production.place \n",
565
+ "0 None \n",
566
+ "1 None \n",
567
+ "2 None \n",
568
+ "3 None \n",
569
+ "4 None \n",
570
+ "... ... \n",
571
+ "37300 Egypt \n",
572
+ "37301 Egypt \n",
573
+ "37302 Egypt \n",
574
+ "37303 China \n",
575
+ "37304 China \n",
576
+ "\n",
577
+ "[37305 rows x 10 columns]"
578
+ ]
579
+ },
580
+ "execution_count": 9,
581
+ "metadata": {},
582
+ "output_type": "execute_result"
583
+ }
584
+ ],
585
+ "source": [
586
+ "ds"
587
+ ]
588
+ },
589
+ {
590
+ "cell_type": "code",
591
+ "execution_count": 10,
592
+ "metadata": {},
593
+ "outputs": [
594
+ {
595
+ "name": "stdout",
596
+ "output_type": "stream",
597
+ "text": [
598
+ "(6819, 7) (2370, 7) (2370, 7) (11559, 7)\n",
599
+ "(19246, 10) (6743, 10) (7078, 10) (37305, 10)\n"
600
+ ]
601
+ }
602
+ ],
603
+ "source": [
604
+ "index_col = \"obj_num\"\n",
605
+ "text_col = \"obj_num\"\n",
606
+ "label_col = \"material\"\n",
607
+ "lower_lim = 3\n",
608
+ "problem_type = \"image\"\n",
609
+ "\n",
610
+ "\n",
611
+ "o2i_lim = (\n",
612
+ " ds.drop_duplicates(subset=[index_col, label_col], inplace=False)\n",
613
+ " .dropna(subset=[text_col, label_col], inplace=False)\n",
614
+ " .drop(columns=[\"root\", \"file\", \"image\"], inplace=False)\n",
615
+ ")\n",
616
+ "\n",
617
+ "\n",
618
+ "num_counts = o2i_lim[label_col].value_counts()\n",
619
+ "o2i_lim = o2i_lim[o2i_lim[label_col].isin(num_counts[num_counts > lower_lim].index)]\n",
620
+ "\n",
621
+ "train, val_test = train_test_split(\n",
622
+ " o2i_lim, stratify=o2i_lim[label_col], test_size=0.41, random_state=42\n",
623
+ ")\n",
624
+ "val, test = train_test_split(\n",
625
+ " val_test, stratify=val_test[label_col], test_size=0.5, random_state=42\n",
626
+ ")\n",
627
+ "print(train.shape, val.shape, test.shape, o2i_lim.shape)\n",
628
+ "\n",
629
+ "if problem_type == \"image\":\n",
630
+ " train = train.merge(\n",
631
+ " ds[[\"obj_num\", \"root\", \"file\", \"image\"]], left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n",
632
+ " )\n",
633
+ " val = val.merge(\n",
634
+ " ds[[\"obj_num\", \"root\", \"file\", \"image\"]], left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n",
635
+ " )\n",
636
+ " test = test.merge(\n",
637
+ " ds[[\"obj_num\", \"root\", \"file\", \"image\"]], left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n",
638
+ " )\n",
639
+ " print(train.shape, val.shape, test.shape, ds.shape)\n",
640
+ "\n",
641
+ "# ds_dict = DatasetDict({\"train\": Dataset.from_pandas(train), \"val\": Dataset.from_pandas(val), \"test\": Dataset.from_pandas(test)})\n",
642
+ "# ds_dict\n",
643
+ "\n",
644
+ "# if problem_type == \"image\":\n",
645
+ "\n",
646
+ "# o2i_lim_ds = o2i_lim_ds.train_test_split(test_size=0.3, stratify_by_column=label_col, seed=42)\n",
647
+ "# o2i_lim_ds_valtest = o2i_lim_ds[\"test\"].train_test_split(test_size=0.5, stratify_by_column=label_col, seed=42)\n",
648
+ "# o2i_lim_ds = DatasetDict({\"train\": o2i_lim_ds[\"train\"], \"val\": o2i_lim_ds_valtest[\"train\"], \"test\": o2i_lim_ds_valtest[\"test\"]})\n",
649
+ "\n",
650
+ "# if problem_type == \"image\":\n",
651
+ "# file2obj = ds[[\"obj_num\", \"file\", \"image\", \"root\"]].drop_duplicates(subset=[\"obj_num\"], inplace=False)\n",
652
+ "# train = o2i_lim_ds[\"train\"].merge(file2obj, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\")\n",
653
+ "# val = o2i_lim_ds[\"val\"].merge(file2obj, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\")\n",
654
+ "# test = o2i_lim_ds[\"test\"].merge(file2obj, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\")\n",
655
+ "# o2i_lim_ds = DatasetDict({\"train\": train, \"val\": val, \"test\": test})\n",
656
+ "# o2i_lim_ds"
657
+ ]
658
+ },
659
+ {
660
+ "cell_type": "code",
661
+ "execution_count": null,
662
+ "metadata": {},
663
+ "outputs": [
664
+ {
665
+ "data": {
666
+ "text/html": [
667
+ "<div>\n",
668
+ "<style scoped>\n",
669
+ " .dataframe tbody tr th:only-of-type {\n",
670
+ " vertical-align: middle;\n",
671
+ " }\n",
672
+ "\n",
673
+ " .dataframe tbody tr th {\n",
674
+ " vertical-align: top;\n",
675
+ " }\n",
676
+ "\n",
677
+ " .dataframe thead th {\n",
678
+ " text-align: right;\n",
679
+ " }\n",
680
+ "</style>\n",
681
+ "<table border=\"1\" class=\"dataframe\">\n",
682
+ " <thead>\n",
683
+ " <tr style=\"text-align: right;\">\n",
684
+ " <th></th>\n",
685
+ " <th>obj_num</th>\n",
686
+ " <th>description</th>\n",
687
+ " <th>object_name</th>\n",
688
+ " <th>other_name</th>\n",
689
+ " <th>material</th>\n",
690
+ " <th>production.period</th>\n",
691
+ " <th>production.place</th>\n",
692
+ " </tr>\n",
693
+ " </thead>\n",
694
+ " <tbody>\n",
695
+ " </tbody>\n",
696
+ "</table>\n",
697
+ "</div>"
698
+ ],
699
+ "text/plain": [
700
+ "Empty DataFrame\n",
701
+ "Columns: [obj_num, description, object_name, other_name, material, production.period, production.place]\n",
702
+ "Index: []"
703
+ ]
704
+ },
705
+ "execution_count": 10,
706
+ "metadata": {},
707
+ "output_type": "execute_result"
708
+ }
709
+ ],
710
+ "source": [
711
+ "o2i_lim"
712
+ ]
713
+ },
714
+ {
715
+ "cell_type": "code",
716
+ "execution_count": null,
717
+ "metadata": {},
718
+ "outputs": [],
719
+ "source": [
720
+ "cols_to_drop = [\"col1\", \"col2\", \"col3\"]\n",
721
+ "ds = ds.drop(cols_to_drop, axis=1, errors=\"ignore\")"
722
+ ]
723
+ },
724
+ {
725
+ "cell_type": "code",
726
+ "execution_count": null,
727
+ "metadata": {},
728
+ "outputs": [],
729
+ "source": [
730
+ "ds_lim = ds_all.dropna(subset=[\"image\", args.label_col], inplace=False)\n",
731
+ "if \"3D\" in args.dataset:\n",
732
+ " ds_lim = ds_all[ds_all[\"original\"]]\n",
733
+ "\n",
734
+ "num_counts = ds_lim[args.label_col].value_counts()\n",
735
+ "ds_lim = ds_lim[ds_lim[args.label_col].isin(num_counts[num_counts > args.lower_lim].index)]\n",
736
+ "\n",
737
+ "train, val_test = train_test_split(\n",
738
+ " ds_lim,\n",
739
+ " stratify=ds_lim[args.label_col],\n",
740
+ " test_size=2 * args.testset_size,\n",
741
+ " random_state=42,\n",
742
+ ")\n",
743
+ "val, test = train_test_split(\n",
744
+ " val_test, stratify=val_test[args.label_col], test_size=0.5, random_state=42\n",
745
+ ")"
746
+ ]
747
+ }
748
+ ],
749
+ "metadata": {
750
+ "kernelspec": {
751
+ "display_name": "ArtifactClassification",
752
+ "language": "python",
753
+ "name": "python3"
754
+ },
755
+ "language_info": {
756
+ "codemirror_mode": {
757
+ "name": "ipython",
758
+ "version": 3
759
+ },
760
+ "file_extension": ".py",
761
+ "mimetype": "text/x-python",
762
+ "name": "python",
763
+ "nbconvert_exporter": "python",
764
+ "pygments_lexer": "ipython3",
765
+ "version": "3.10.12"
766
+ }
767
+ },
768
+ "nbformat": 4,
769
+ "nbformat_minor": 2
770
+ }
0.11-testing_bm_split_sizes.ipynb ADDED
@@ -0,0 +1,644 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import os\n",
11
+ "from sklearn.model_selection import train_test_split\n",
12
+ "from datasets import load_dataset\n",
13
+ "from artifact_classification.utils import ConfigLoader"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 3,
19
+ "metadata": {},
20
+ "outputs": [
21
+ {
22
+ "name": "stdout",
23
+ "output_type": "stream",
24
+ "text": [
25
+ "Updating with:\n",
26
+ "{'config': 'testing', 'fast_dev_run': True, 'dataset': 'james-burton/BritishMuseum', 'wandb_proj_name': 'British Museum', 'model_base': 'google/efficientnet-b3', 'problem_type': 'image', 'lower_lim': 5, 'label_col': 'Object type'}\n",
27
+ "\n",
28
+ "\n",
29
+ "{'config': 'testing', 'fast_dev_run': True, 'do_train': True, 'do_predict': True, 'batch_size': 128, 'output_root': 'models/', 'num_epochs': 100, 'early_stopping_patience': 5, 'grad_accumulation_steps': 1, 'seed': 42, 'logging_steps': 10, 'lr_scheduler': 'linear', 'warmup_ratio': 0, 'weight_decay': 0, 'device': 'cuda', 'num_workers': 1, 'resume_from_checkpoint': False, 'predict_batch_size': 16, 'save_total_limit': 1, 'lr': 5e-05, 'pytorch2_0': True, 'max_length': 512, 'text_column': 'Description', 'fp16': True, 'testset_size': 0.1, 'dataset': 'james-burton/BritishMuseum', 'wandb_proj_name': 'British Museum', 'model_base': 'google/efficientnet-b3', 'problem_type': 'image', 'lower_lim': 5, 'label_col': 'Object type'}\n",
30
+ "\n"
31
+ ]
32
+ },
33
+ {
34
+ "data": {
35
+ "application/vnd.jupyter.widget-view+json": {
36
+ "model_id": "cca9a5e0c5f2487ea3ad65c183da9c90",
37
+ "version_major": 2,
38
+ "version_minor": 0
39
+ },
40
+ "text/plain": [
41
+ "Resolving data files: 0%| | 0/22 [00:00<?, ?it/s]"
42
+ ]
43
+ },
44
+ "metadata": {},
45
+ "output_type": "display_data"
46
+ },
47
+ {
48
+ "data": {
49
+ "application/vnd.jupyter.widget-view+json": {
50
+ "model_id": "431f0e5b4ec84c8693b5c8c18525f810",
51
+ "version_major": 2,
52
+ "version_minor": 0
53
+ },
54
+ "text/plain": [
55
+ "Resolving data files: 0%| | 0/22 [00:00<?, ?it/s]"
56
+ ]
57
+ },
58
+ "metadata": {},
59
+ "output_type": "display_data"
60
+ },
61
+ {
62
+ "data": {
63
+ "application/vnd.jupyter.widget-view+json": {
64
+ "model_id": "afda95f374c1487584af43d91ba321df",
65
+ "version_major": 2,
66
+ "version_minor": 0
67
+ },
68
+ "text/plain": [
69
+ "Loading dataset shards: 0%| | 0/21 [00:00<?, ?it/s]"
70
+ ]
71
+ },
72
+ "metadata": {},
73
+ "output_type": "display_data"
74
+ }
75
+ ],
76
+ "source": [
77
+ "config = \"testing\"\n",
78
+ "args = ConfigLoader(config, \"../configs/train_bm_configs.yaml\", \"../configs/train_bm_default.yaml\")\n",
79
+ "\n",
80
+ "############################## Load dataset ##############################\n",
81
+ "# Load dataset, filter out na inputs and labels and encode labels (as label column can change)\n",
82
+ "\n",
83
+ "\n",
84
+ "label_cols = [\"Object type\", \"Culture\", \"Materials\", \"Production place\"]\n",
85
+ "split_sizes = [0.1, 0.12, 0.13, 0.15, 0.2]\n",
86
+ "\n",
87
+ "ds_lim = load_dataset(args.dataset)[\"train\"].to_pandas()"
88
+ ]
89
+ },
90
+ {
91
+ "cell_type": "code",
92
+ "execution_count": 8,
93
+ "metadata": {},
94
+ "outputs": [
95
+ {
96
+ "name": "stdout",
97
+ "output_type": "stream",
98
+ "text": [
99
+ "Split size 0.1 failed\n",
100
+ "Split size 0.12 failed\n",
101
+ "Label col Object type Split size 0.13 passed\n",
102
+ "Label col Object type Split size 0.15 passed\n",
103
+ "Label col Object type Split size 0.2 passed\n",
104
+ "Split size 0.1 failed\n",
105
+ "Split size 0.12 failed\n",
106
+ "Label col Culture Split size 0.13 passed\n",
107
+ "Label col Culture Split size 0.15 passed\n",
108
+ "Label col Culture Split size 0.2 passed\n",
109
+ "Split size 0.1 failed\n",
110
+ "Split size 0.12 failed\n",
111
+ "Label col Materials Split size 0.13 passed\n",
112
+ "Label col Materials Split size 0.15 passed\n",
113
+ "Label col Materials Split size 0.2 passed\n",
114
+ "Split size 0.1 failed\n",
115
+ "Split size 0.12 failed\n",
116
+ "Label col Production place Split size 0.13 passed\n",
117
+ "Label col Production place Split size 0.15 passed\n",
118
+ "Label col Production place Split size 0.2 passed\n"
119
+ ]
120
+ }
121
+ ],
122
+ "source": [
123
+ "def test_split_size(split_size, label_col, ds_lim):\n",
124
+ " try:\n",
125
+ " ds_lim.dropna(subset=[label_col])\n",
126
+ " num_counts = ds_lim[label_col].value_counts()\n",
127
+ " ds_lim = ds_lim[ds_lim[label_col].isin(num_counts[num_counts > args.lower_lim].index)]\n",
128
+ "\n",
129
+ " train, val_test = train_test_split(\n",
130
+ " ds_lim,\n",
131
+ " stratify=ds_lim[label_col],\n",
132
+ " test_size=2 * split_size,\n",
133
+ " random_state=42,\n",
134
+ " )\n",
135
+ " val, test = train_test_split(\n",
136
+ " val_test, stratify=val_test[label_col], test_size=0.5, random_state=42\n",
137
+ " )\n",
138
+ " print(f\"Label col {label_col} Split size {split_size} passed\")\n",
139
+ " except ValueError:\n",
140
+ " print(f\"Split size {split_size} failed\")\n",
141
+ "\n",
142
+ "\n",
143
+ "for label_col in label_cols:\n",
144
+ " for split_size in split_sizes:\n",
145
+ " test_split_size(split_size, label_col, ds_lim)"
146
+ ]
147
+ },
148
+ {
149
+ "cell_type": "code",
150
+ "execution_count": 1,
151
+ "metadata": {},
152
+ "outputs": [],
153
+ "source": [
154
+ "import yaml\n",
155
+ "\n",
156
+ "with open(\"../configs/train_configs.yaml\", \"r\") as file:\n",
157
+ " configs = list(yaml.safe_load_all(file))"
158
+ ]
159
+ },
160
+ {
161
+ "cell_type": "code",
162
+ "execution_count": 2,
163
+ "metadata": {},
164
+ "outputs": [
165
+ {
166
+ "data": {
167
+ "text/plain": [
168
+ "'om3-white_material_bm-pretrn om3-white_name_bm-pretrn om3-3Dwhite_material_bm-pretrn om3-3Dwhite_name_bm-pretrn om3-3Dwhite-1frame_material_bm-pretrn om3-3Dwhite-1frame_name_bm-pretrn om4-white_material_bm-pretrn om4-white_name_bm-pretrn om4-3Dwhite_material_bm-pretrn om4-3Dwhite_name_bm-pretrn om4-3Dwhite-1frame_material_bm-pretrn om4-3Dwhite-1frame_name_bm-pretrn om5-white_material_bm-pretrn om5-white_name_bm-pretrn om5-3Dwhite_material_bm-pretrn om5-3Dwhite_name_bm-pretrn om5-3Dwhite-1frame_material_bm-pretrn om5-3Dwhite-1frame_name_bm-pretrn om6-white_material_bm-pretrn om6-white_name_bm-pretrn om6-3Dwhite_material_bm-pretrn om6-3Dwhite_name_bm-pretrn om6-3Dwhite-1frame_material_bm-pretrn om6-3Dwhite-1frame_name_bm-pretrn'"
169
+ ]
170
+ },
171
+ "execution_count": 2,
172
+ "metadata": {},
173
+ "output_type": "execute_result"
174
+ }
175
+ ],
176
+ "source": [
177
+ "\" \".join(\n",
178
+ " [cfg[\"config\"] for cfg in configs if \"bm\" in cfg[\"config\"] and \"num\" not in cfg[\"config\"]]\n",
179
+ ")"
180
+ ]
181
+ },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": 18,
185
+ "metadata": {},
186
+ "outputs": [
187
+ {
188
+ "data": {
189
+ "text/plain": [
190
+ "[{'config': 'testing',\n",
191
+ " 'fast_dev_run': True,\n",
192
+ " 'dataset': 'james-burton/BritishMuseum',\n",
193
+ " 'wandb_proj_name': 'British Museum',\n",
194
+ " 'model_base': 'google/efficientnet-b3',\n",
195
+ " 'problem_type': 'image',\n",
196
+ " 'lower_lim': 5,\n",
197
+ " 'label_col': 'Object type'},\n",
198
+ " {'config': 'bm3_type',\n",
199
+ " 'dataset': 'james-burton/BritishMuseum',\n",
200
+ " 'wandb_proj_name': 'British Museum',\n",
201
+ " 'model_base': 'google/efficientnet-b3',\n",
202
+ " 'problem_type': 'image',\n",
203
+ " 'lower_lim': 3,\n",
204
+ " 'label_col': 'Object type',\n",
205
+ " 'testset_size': 0.205},\n",
206
+ " {'config': 'bm3_material',\n",
207
+ " 'dataset': 'james-burton/BritishMuseum',\n",
208
+ " 'wandb_proj_name': 'British Museum',\n",
209
+ " 'model_base': 'google/efficientnet-b3',\n",
210
+ " 'problem_type': 'image',\n",
211
+ " 'lower_lim': 3,\n",
212
+ " 'label_col': 'Materials',\n",
213
+ " 'testset_size': 0.205},\n",
214
+ " {'config': 'bm3_culture',\n",
215
+ " 'dataset': 'james-burton/BritishMuseum',\n",
216
+ " 'wandb_proj_name': 'British Museum',\n",
217
+ " 'model_base': 'google/efficientnet-b3',\n",
218
+ " 'problem_type': 'image',\n",
219
+ " 'lower_lim': 3,\n",
220
+ " 'label_col': 'Culture',\n",
221
+ " 'testset_size': 0.205},\n",
222
+ " {'config': 'bm3txt_material',\n",
223
+ " 'dataset': 'james-burton/BritishMuseum',\n",
224
+ " 'wandb_proj_name': 'British Museum',\n",
225
+ " 'model_base': 'microsoft/deberta-v3-base',\n",
226
+ " 'problem_type': 'text',\n",
227
+ " 'lower_lim': 3,\n",
228
+ " 'label_col': 'Materials',\n",
229
+ " 'testset_size': 0.205},\n",
230
+ " {'config': 'bm3txt_culture',\n",
231
+ " 'dataset': 'james-burton/BritishMuseum',\n",
232
+ " 'wandb_proj_name': 'British Museum',\n",
233
+ " 'model_base': 'microsoft/deberta-v3-base',\n",
234
+ " 'problem_type': 'text',\n",
235
+ " 'lower_lim': 3,\n",
236
+ " 'label_col': 'Culture',\n",
237
+ " 'testset_size': 0.205},\n",
238
+ " {'config': 'bm3-white_type',\n",
239
+ " 'dataset': 'james-burton/BritishMuseum-white',\n",
240
+ " 'wandb_proj_name': 'British Museum',\n",
241
+ " 'model_base': 'google/efficientnet-b3',\n",
242
+ " 'problem_type': 'image',\n",
243
+ " 'lower_lim': 3,\n",
244
+ " 'label_col': 'Object type',\n",
245
+ " 'testset_size': 0.205},\n",
246
+ " {'config': 'bm3-white_material',\n",
247
+ " 'dataset': 'james-burton/BritishMuseum-white',\n",
248
+ " 'wandb_proj_name': 'British Museum',\n",
249
+ " 'model_base': 'google/efficientnet-b3',\n",
250
+ " 'problem_type': 'image',\n",
251
+ " 'lower_lim': 3,\n",
252
+ " 'label_col': 'Materials',\n",
253
+ " 'testset_size': 0.205},\n",
254
+ " {'config': 'bm3-white_culture',\n",
255
+ " 'dataset': 'james-burton/BritishMuseum-white',\n",
256
+ " 'wandb_proj_name': 'British Museum',\n",
257
+ " 'model_base': 'google/efficientnet-b3',\n",
258
+ " 'problem_type': 'image',\n",
259
+ " 'lower_lim': 3,\n",
260
+ " 'label_col': 'Culture',\n",
261
+ " 'testset_size': 0.205},\n",
262
+ " {'config': 'bm3-3Dwhite_type',\n",
263
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
264
+ " 'wandb_proj_name': 'British Museum',\n",
265
+ " 'model_base': 'google/efficientnet-b3',\n",
266
+ " 'problem_type': 'image',\n",
267
+ " 'lower_lim': 3,\n",
268
+ " 'label_col': 'Object type',\n",
269
+ " 'testset_size': 0.205},\n",
270
+ " {'config': 'bm3-3Dwhite_material',\n",
271
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
272
+ " 'wandb_proj_name': 'British Museum',\n",
273
+ " 'model_base': 'google/efficientnet-b3',\n",
274
+ " 'problem_type': 'image',\n",
275
+ " 'lower_lim': 3,\n",
276
+ " 'label_col': 'Materials',\n",
277
+ " 'testset_size': 0.205},\n",
278
+ " {'config': 'bm3-3Dwhite_culture',\n",
279
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
280
+ " 'wandb_proj_name': 'British Museum',\n",
281
+ " 'model_base': 'google/efficientnet-b3',\n",
282
+ " 'problem_type': 'image',\n",
283
+ " 'lower_lim': 3,\n",
284
+ " 'label_col': 'Culture',\n",
285
+ " 'testset_size': 0.205},\n",
286
+ " {'config': 'bm3-3Dwhite-1frame_type',\n",
287
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
288
+ " 'wandb_proj_name': 'British Museum',\n",
289
+ " 'model_base': 'google/efficientnet-b3',\n",
290
+ " 'problem_type': 'image',\n",
291
+ " 'lower_lim': 3,\n",
292
+ " 'label_col': 'Object type',\n",
293
+ " 'testset_size': 0.205},\n",
294
+ " {'config': 'bm3-3Dwhite-1frame_material',\n",
295
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
296
+ " 'wandb_proj_name': 'British Museum',\n",
297
+ " 'model_base': 'google/efficientnet-b3',\n",
298
+ " 'problem_type': 'image',\n",
299
+ " 'lower_lim': 3,\n",
300
+ " 'label_col': 'Materials',\n",
301
+ " 'testset_size': 0.205},\n",
302
+ " {'config': 'bm3-3Dwhite-1frame_culture',\n",
303
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
304
+ " 'wandb_proj_name': 'British Museum',\n",
305
+ " 'model_base': 'google/efficientnet-b3',\n",
306
+ " 'problem_type': 'image',\n",
307
+ " 'lower_lim': 3,\n",
308
+ " 'label_col': 'Culture',\n",
309
+ " 'testset_size': 0.205},\n",
310
+ " {'config': 'bm4_type',\n",
311
+ " 'dataset': 'james-burton/BritishMuseum',\n",
312
+ " 'wandb_proj_name': 'British Museum',\n",
313
+ " 'model_base': 'google/efficientnet-b3',\n",
314
+ " 'problem_type': 'image',\n",
315
+ " 'lower_lim': 4,\n",
316
+ " 'label_col': 'Object type'},\n",
317
+ " {'config': 'bm4_material',\n",
318
+ " 'dataset': 'james-burton/BritishMuseum',\n",
319
+ " 'wandb_proj_name': 'British Museum',\n",
320
+ " 'model_base': 'google/efficientnet-b3',\n",
321
+ " 'problem_type': 'image',\n",
322
+ " 'lower_lim': 4,\n",
323
+ " 'label_col': 'Materials'},\n",
324
+ " {'config': 'bm4_culture',\n",
325
+ " 'dataset': 'james-burton/BritishMuseum',\n",
326
+ " 'wandb_proj_name': 'British Museum',\n",
327
+ " 'model_base': 'google/efficientnet-b3',\n",
328
+ " 'problem_type': 'image',\n",
329
+ " 'lower_lim': 4,\n",
330
+ " 'label_col': 'Culture'},\n",
331
+ " {'config': 'bm4txt_material',\n",
332
+ " 'dataset': 'james-burton/BritishMuseum',\n",
333
+ " 'wandb_proj_name': 'British Museum',\n",
334
+ " 'model_base': 'microsoft/deberta-v3-base',\n",
335
+ " 'problem_type': 'text',\n",
336
+ " 'lower_lim': 4,\n",
337
+ " 'label_col': 'Materials'},\n",
338
+ " {'config': 'bm4txt_culture',\n",
339
+ " 'dataset': 'james-burton/BritishMuseum',\n",
340
+ " 'wandb_proj_name': 'British Museum',\n",
341
+ " 'model_base': 'microsoft/deberta-v3-base',\n",
342
+ " 'problem_type': 'text',\n",
343
+ " 'lower_lim': 4,\n",
344
+ " 'label_col': 'Culture'},\n",
345
+ " {'config': 'bm4-white_type',\n",
346
+ " 'dataset': 'james-burton/BritishMuseum-white',\n",
347
+ " 'wandb_proj_name': 'British Museum',\n",
348
+ " 'model_base': 'google/efficientnet-b3',\n",
349
+ " 'problem_type': 'image',\n",
350
+ " 'lower_lim': 4,\n",
351
+ " 'label_col': 'Object type'},\n",
352
+ " {'config': 'bm4-white_material',\n",
353
+ " 'dataset': 'james-burton/BritishMuseum-white',\n",
354
+ " 'wandb_proj_name': 'British Museum',\n",
355
+ " 'model_base': 'google/efficientnet-b3',\n",
356
+ " 'problem_type': 'image',\n",
357
+ " 'lower_lim': 4,\n",
358
+ " 'label_col': 'Materials'},\n",
359
+ " {'config': 'bm4-white_culture',\n",
360
+ " 'dataset': 'james-burton/BritishMuseum-white',\n",
361
+ " 'wandb_proj_name': 'British Museum',\n",
362
+ " 'model_base': 'google/efficientnet-b3',\n",
363
+ " 'problem_type': 'image',\n",
364
+ " 'lower_lim': 4,\n",
365
+ " 'label_col': 'Culture'},\n",
366
+ " {'config': 'bm4-3Dwhite_type',\n",
367
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
368
+ " 'wandb_proj_name': 'British Museum',\n",
369
+ " 'model_base': 'google/efficientnet-b3',\n",
370
+ " 'problem_type': 'image',\n",
371
+ " 'lower_lim': 4,\n",
372
+ " 'label_col': 'Object type'},\n",
373
+ " {'config': 'bm4-3Dwhite_material',\n",
374
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
375
+ " 'wandb_proj_name': 'British Museum',\n",
376
+ " 'model_base': 'google/efficientnet-b3',\n",
377
+ " 'problem_type': 'image',\n",
378
+ " 'lower_lim': 4,\n",
379
+ " 'label_col': 'Materials'},\n",
380
+ " {'config': 'bm4-3Dwhite_culture',\n",
381
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
382
+ " 'wandb_proj_name': 'British Museum',\n",
383
+ " 'model_base': 'google/efficientnet-b3',\n",
384
+ " 'problem_type': 'image',\n",
385
+ " 'lower_lim': 4,\n",
386
+ " 'label_col': 'Culture'},\n",
387
+ " {'config': 'bm4-3Dwhite-1frame_type',\n",
388
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
389
+ " 'wandb_proj_name': 'British Museum',\n",
390
+ " 'model_base': 'google/efficientnet-b3',\n",
391
+ " 'problem_type': 'image',\n",
392
+ " 'lower_lim': 4,\n",
393
+ " 'label_col': 'Object type'},\n",
394
+ " {'config': 'bm4-3Dwhite-1frame_material',\n",
395
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
396
+ " 'wandb_proj_name': 'British Museum',\n",
397
+ " 'model_base': 'google/efficientnet-b3',\n",
398
+ " 'problem_type': 'image',\n",
399
+ " 'lower_lim': 4,\n",
400
+ " 'label_col': 'Materials'},\n",
401
+ " {'config': 'bm4-3Dwhite-1frame_culture',\n",
402
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
403
+ " 'wandb_proj_name': 'British Museum',\n",
404
+ " 'model_base': 'google/efficientnet-b3',\n",
405
+ " 'problem_type': 'image',\n",
406
+ " 'lower_lim': 4,\n",
407
+ " 'label_col': 'Culture'},\n",
408
+ " {'config': 'bm5_type',\n",
409
+ " 'dataset': 'james-burton/BritishMuseum',\n",
410
+ " 'wandb_proj_name': 'British Museum',\n",
411
+ " 'model_base': 'google/efficientnet-b3',\n",
412
+ " 'problem_type': 'image',\n",
413
+ " 'lower_lim': 5,\n",
414
+ " 'label_col': 'Object type'},\n",
415
+ " {'config': 'bm5_material',\n",
416
+ " 'dataset': 'james-burton/BritishMuseum',\n",
417
+ " 'wandb_proj_name': 'British Museum',\n",
418
+ " 'model_base': 'google/efficientnet-b3',\n",
419
+ " 'problem_type': 'image',\n",
420
+ " 'lower_lim': 5,\n",
421
+ " 'label_col': 'Materials'},\n",
422
+ " {'config': 'bm5_culture',\n",
423
+ " 'dataset': 'james-burton/BritishMuseum',\n",
424
+ " 'wandb_proj_name': 'British Museum',\n",
425
+ " 'model_base': 'google/efficientnet-b3',\n",
426
+ " 'problem_type': 'image',\n",
427
+ " 'lower_lim': 5,\n",
428
+ " 'label_col': 'Culture'},\n",
429
+ " {'config': 'bm5txt_material',\n",
430
+ " 'dataset': 'james-burton/BritishMuseum',\n",
431
+ " 'wandb_proj_name': 'British Museum',\n",
432
+ " 'model_base': 'microsoft/deberta-v3-base',\n",
433
+ " 'problem_type': 'text',\n",
434
+ " 'lower_lim': 5,\n",
435
+ " 'label_col': 'Materials'},\n",
436
+ " {'config': 'bm5txt_culture',\n",
437
+ " 'dataset': 'james-burton/BritishMuseum',\n",
438
+ " 'wandb_proj_name': 'British Museum',\n",
439
+ " 'model_base': 'microsoft/deberta-v3-base',\n",
440
+ " 'problem_type': 'text',\n",
441
+ " 'lower_lim': 5,\n",
442
+ " 'label_col': 'Culture'},\n",
443
+ " {'config': 'bm5-white_type',\n",
444
+ " 'dataset': 'james-burton/BritishMuseum-white',\n",
445
+ " 'wandb_proj_name': 'British Museum',\n",
446
+ " 'model_base': 'google/efficientnet-b3',\n",
447
+ " 'problem_type': 'image',\n",
448
+ " 'lower_lim': 5,\n",
449
+ " 'label_col': 'Object type'},\n",
450
+ " {'config': 'bm5-white_material',\n",
451
+ " 'dataset': 'james-burton/BritishMuseum-white',\n",
452
+ " 'wandb_proj_name': 'British Museum',\n",
453
+ " 'model_base': 'google/efficientnet-b3',\n",
454
+ " 'problem_type': 'image',\n",
455
+ " 'lower_lim': 5,\n",
456
+ " 'label_col': 'Materials'},\n",
457
+ " {'config': 'bm5-white_culture',\n",
458
+ " 'dataset': 'james-burton/BritishMuseum-white',\n",
459
+ " 'wandb_proj_name': 'British Museum',\n",
460
+ " 'model_base': 'google/efficientnet-b3',\n",
461
+ " 'problem_type': 'image',\n",
462
+ " 'lower_lim': 5,\n",
463
+ " 'label_col': 'Culture'},\n",
464
+ " {'config': 'bm5-3Dwhite_type',\n",
465
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
466
+ " 'wandb_proj_name': 'British Museum',\n",
467
+ " 'model_base': 'google/efficientnet-b3',\n",
468
+ " 'problem_type': 'image',\n",
469
+ " 'lower_lim': 5,\n",
470
+ " 'label_col': 'Object type'},\n",
471
+ " {'config': 'bm5-3Dwhite_material',\n",
472
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
473
+ " 'wandb_proj_name': 'British Museum',\n",
474
+ " 'model_base': 'google/efficientnet-b3',\n",
475
+ " 'problem_type': 'image',\n",
476
+ " 'lower_lim': 5,\n",
477
+ " 'label_col': 'Materials'},\n",
478
+ " {'config': 'bm5-3Dwhite_culture',\n",
479
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
480
+ " 'wandb_proj_name': 'British Museum',\n",
481
+ " 'model_base': 'google/efficientnet-b3',\n",
482
+ " 'problem_type': 'image',\n",
483
+ " 'lower_lim': 5,\n",
484
+ " 'label_col': 'Culture'},\n",
485
+ " {'config': 'bm5-3Dwhite-1frame_type',\n",
486
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
487
+ " 'wandb_proj_name': 'British Museum',\n",
488
+ " 'model_base': 'google/efficientnet-b3',\n",
489
+ " 'problem_type': 'image',\n",
490
+ " 'lower_lim': 5,\n",
491
+ " 'label_col': 'Object type'},\n",
492
+ " {'config': 'bm5-3Dwhite-1frame_material',\n",
493
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
494
+ " 'wandb_proj_name': 'British Museum',\n",
495
+ " 'model_base': 'google/efficientnet-b3',\n",
496
+ " 'problem_type': 'image',\n",
497
+ " 'lower_lim': 5,\n",
498
+ " 'label_col': 'Materials'},\n",
499
+ " {'config': 'bm5-3Dwhite-1frame_culture',\n",
500
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
501
+ " 'wandb_proj_name': 'British Museum',\n",
502
+ " 'model_base': 'google/efficientnet-b3',\n",
503
+ " 'problem_type': 'image',\n",
504
+ " 'lower_lim': 5,\n",
505
+ " 'label_col': 'Culture'},\n",
506
+ " {'config': 'bm6_type',\n",
507
+ " 'dataset': 'james-burton/BritishMuseum',\n",
508
+ " 'wandb_proj_name': 'British Museum',\n",
509
+ " 'model_base': 'google/efficientnet-b3',\n",
510
+ " 'problem_type': 'image',\n",
511
+ " 'lower_lim': 6,\n",
512
+ " 'label_col': 'Object type'},\n",
513
+ " {'config': 'bm6_material',\n",
514
+ " 'dataset': 'james-burton/BritishMuseum',\n",
515
+ " 'wandb_proj_name': 'British Museum',\n",
516
+ " 'model_base': 'google/efficientnet-b3',\n",
517
+ " 'problem_type': 'image',\n",
518
+ " 'lower_lim': 6,\n",
519
+ " 'label_col': 'Materials'},\n",
520
+ " {'config': 'bm6_culture',\n",
521
+ " 'dataset': 'james-burton/BritishMuseum',\n",
522
+ " 'wandb_proj_name': 'British Museum',\n",
523
+ " 'model_base': 'google/efficientnet-b3',\n",
524
+ " 'problem_type': 'image',\n",
525
+ " 'lower_lim': 6,\n",
526
+ " 'label_col': 'Culture'},\n",
527
+ " {'config': 'bm6txt_material',\n",
528
+ " 'dataset': 'james-burton/BritishMuseum',\n",
529
+ " 'wandb_proj_name': 'British Museum',\n",
530
+ " 'model_base': 'microsoft/deberta-v3-base',\n",
531
+ " 'problem_type': 'text',\n",
532
+ " 'lower_lim': 6,\n",
533
+ " 'label_col': 'Materials'},\n",
534
+ " {'config': 'bm6txt_culture',\n",
535
+ " 'dataset': 'james-burton/BritishMuseum',\n",
536
+ " 'wandb_proj_name': 'British Museum',\n",
537
+ " 'model_base': 'microsoft/deberta-v3-base',\n",
538
+ " 'problem_type': 'text',\n",
539
+ " 'lower_lim': 6,\n",
540
+ " 'label_col': 'Culture'},\n",
541
+ " {'config': 'bm6-white_type',\n",
542
+ " 'dataset': 'james-burton/BritishMuseum-white',\n",
543
+ " 'wandb_proj_name': 'British Museum',\n",
544
+ " 'model_base': 'google/efficientnet-b3',\n",
545
+ " 'problem_type': 'image',\n",
546
+ " 'lower_lim': 6,\n",
547
+ " 'label_col': 'Object type'},\n",
548
+ " {'config': 'bm6-white_material',\n",
549
+ " 'dataset': 'james-burton/BritishMuseum-white',\n",
550
+ " 'wandb_proj_name': 'British Museum',\n",
551
+ " 'model_base': 'google/efficientnet-b3',\n",
552
+ " 'problem_type': 'image',\n",
553
+ " 'lower_lim': 6,\n",
554
+ " 'label_col': 'Materials'},\n",
555
+ " {'config': 'bm6-white_culture',\n",
556
+ " 'dataset': 'james-burton/BritishMuseum-white',\n",
557
+ " 'wandb_proj_name': 'British Museum',\n",
558
+ " 'model_base': 'google/efficientnet-b3',\n",
559
+ " 'problem_type': 'image',\n",
560
+ " 'lower_lim': 6,\n",
561
+ " 'label_col': 'Culture'},\n",
562
+ " {'config': 'bm6-3Dwhite_type',\n",
563
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
564
+ " 'wandb_proj_name': 'British Museum',\n",
565
+ " 'model_base': 'google/efficientnet-b3',\n",
566
+ " 'problem_type': 'image',\n",
567
+ " 'lower_lim': 6,\n",
568
+ " 'label_col': 'Object type'},\n",
569
+ " {'config': 'bm6-3Dwhite_material',\n",
570
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
571
+ " 'wandb_proj_name': 'British Museum',\n",
572
+ " 'model_base': 'google/efficientnet-b3',\n",
573
+ " 'problem_type': 'image',\n",
574
+ " 'lower_lim': 6,\n",
575
+ " 'label_col': 'Materials'},\n",
576
+ " {'config': 'bm6-3Dwhite_culture',\n",
577
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
578
+ " 'wandb_proj_name': 'British Museum',\n",
579
+ " 'model_base': 'google/efficientnet-b3',\n",
580
+ " 'problem_type': 'image',\n",
581
+ " 'lower_lim': 6,\n",
582
+ " 'label_col': 'Culture'},\n",
583
+ " {'config': 'bm6-3Dwhite-1frame_type',\n",
584
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
585
+ " 'wandb_proj_name': 'British Museum',\n",
586
+ " 'model_base': 'google/efficientnet-b3',\n",
587
+ " 'problem_type': 'image',\n",
588
+ " 'lower_lim': 6,\n",
589
+ " 'label_col': 'Object type'},\n",
590
+ " {'config': 'bm6-3Dwhite-1frame_material',\n",
591
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
592
+ " 'wandb_proj_name': 'British Museum',\n",
593
+ " 'model_base': 'google/efficientnet-b3',\n",
594
+ " 'problem_type': 'image',\n",
595
+ " 'lower_lim': 6,\n",
596
+ " 'label_col': 'Materials'},\n",
597
+ " {'config': 'bm6-3Dwhite-1frame_culture',\n",
598
+ " 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
599
+ " 'wandb_proj_name': 'British Museum',\n",
600
+ " 'model_base': 'google/efficientnet-b3',\n",
601
+ " 'problem_type': 'image',\n",
602
+ " 'lower_lim': 6,\n",
603
+ " 'label_col': 'Culture'}]"
604
+ ]
605
+ },
606
+ "execution_count": 18,
607
+ "metadata": {},
608
+ "output_type": "execute_result"
609
+ }
610
+ ],
611
+ "source": [
612
+ "configs"
613
+ ]
614
+ },
615
+ {
616
+ "cell_type": "code",
617
+ "execution_count": null,
618
+ "metadata": {},
619
+ "outputs": [],
620
+ "source": []
621
+ }
622
+ ],
623
+ "metadata": {
624
+ "kernelspec": {
625
+ "display_name": "ArtifactClassification",
626
+ "language": "python",
627
+ "name": "python3"
628
+ },
629
+ "language_info": {
630
+ "codemirror_mode": {
631
+ "name": "ipython",
632
+ "version": 3
633
+ },
634
+ "file_extension": ".py",
635
+ "mimetype": "text/x-python",
636
+ "name": "python",
637
+ "nbconvert_exporter": "python",
638
+ "pygments_lexer": "ipython3",
639
+ "version": "3.10.12"
640
+ }
641
+ },
642
+ "nbformat": 4,
643
+ "nbformat_minor": 2
644
+ }
0.12-get_wandb_results.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
0.13-bm_dates_col.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
0.2-testing_image_scraping.ipynb ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 9,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "1894,1101.507\n",
13
+ "https://media.britishmuseum.org/media/Repository/Documents/2014_10/6_14/c5015a41_782e_4eb7_badf_a3bc00f54f2c/preview_00426109_001.jpg\n",
14
+ "Image downloaded successfully!\n"
15
+ ]
16
+ },
17
+ {
18
+ "name": "stderr",
19
+ "output_type": "stream",
20
+ "text": [
21
+ "/home/james/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'media.britishmuseum.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
22
+ " warnings.warn(\n"
23
+ ]
24
+ },
25
+ {
26
+ "name": "stdout",
27
+ "output_type": "stream",
28
+ "text": [
29
+ "1903,1215.10\n",
30
+ "https://media.britishmuseum.org/media/Repository/Documents/2014_10/15_13/532668b9_0af1_4402_8e13_a3c500e1907c/preview_00944260_001.jpg\n",
31
+ "Image downloaded successfully!\n"
32
+ ]
33
+ },
34
+ {
35
+ "name": "stderr",
36
+ "output_type": "stream",
37
+ "text": [
38
+ "/home/james/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'media.britishmuseum.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
39
+ " warnings.warn(\n"
40
+ ]
41
+ },
42
+ {
43
+ "ename": "KeyboardInterrupt",
44
+ "evalue": "",
45
+ "output_type": "error",
46
+ "traceback": [
47
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
48
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
49
+ "Cell \u001b[0;32mIn[9], line 27\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to download image. Status code:\u001b[39m\u001b[38;5;124m\"\u001b[39m, response\u001b[38;5;241m.\u001b[39mstatus_code)\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# wait 20 seconds\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m20\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;66;03m# response = requests.get(url, headers=headers)\u001b[39;00m\n\u001b[1;32m 29\u001b[0m \n\u001b[1;32m 30\u001b[0m \u001b[38;5;66;03m# if response.status_code == 200:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[38;5;66;03m# else:\u001b[39;00m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;66;03m# print(\"Failed to download image. Status code:\", response.status_code)\u001b[39;00m\n",
50
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
51
+ ]
52
+ }
53
+ ],
54
+ "source": [
55
+ "import requests\n",
56
+ "import pandas as pd\n",
57
+ "import time\n",
58
+ "\n",
59
+ "url = \"http://media.britishmuseum.org/media/Repository/Documents/2020_2/25_11/8772f2ea_b08f_46cf_8af2_ab6c00c10b84/preview_DSC_0760.jpg\"\n",
60
+ "headers = {\n",
61
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3\"\n",
62
+ "}\n",
63
+ "\n",
64
+ "\n",
65
+ "df = pd.read_csv(\"../data/raw/BM_csv_files/3000BC-AD500/europe_999BC-600.csv\")\n",
66
+ "df[\"Museum number\"] = df[\"Museum number\"].str.replace(r\"^No: \", \"\", regex=True)\n",
67
+ "\n",
68
+ "for index, row in df.iterrows():\n",
69
+ " print(row[\"Museum number\"])\n",
70
+ " url = row[\"Image\"]\n",
71
+ " print(url)\n",
72
+ " response = requests.get(url, verify=False)\n",
73
+ " if response.status_code == 200:\n",
74
+ " with open(f\"../data/raw/BM_images/{row['Museum number']}.jpg\", \"wb\") as f:\n",
75
+ " f.write(response.content)\n",
76
+ " print(\"Image downloaded successfully!\")\n",
77
+ " else:\n",
78
+ " print(\"Failed to download image. Status code:\", response.status_code)\n",
79
+ " # wait 20 seconds\n",
80
+ " time.sleep(20)\n",
81
+ "# response = requests.get(url, headers=headers)\n",
82
+ "\n",
83
+ "# if response.status_code == 200:\n",
84
+ "# with open(\"image.jpg\", \"wb\") as f:\n",
85
+ "# f.write(response.content)\n",
86
+ "# print(\"Image downloaded successfully!\")\n",
87
+ "# else:\n",
88
+ "# print(\"Failed to download image. Status code:\", response.status_code)"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": 12,
94
+ "metadata": {},
95
+ "outputs": [
96
+ {
97
+ "data": {
98
+ "text/plain": [
99
+ "460"
100
+ ]
101
+ },
102
+ "execution_count": 12,
103
+ "metadata": {},
104
+ "output_type": "execute_result"
105
+ }
106
+ ],
107
+ "source": [
108
+ "(df[\"Museum number\"] == \"null\").sum()"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": null,
114
+ "metadata": {},
115
+ "outputs": [],
116
+ "source": []
117
+ }
118
+ ],
119
+ "metadata": {
120
+ "kernelspec": {
121
+ "display_name": "ArtifactClassification",
122
+ "language": "python",
123
+ "name": "python3"
124
+ },
125
+ "language_info": {
126
+ "codemirror_mode": {
127
+ "name": "ipython",
128
+ "version": 3
129
+ },
130
+ "file_extension": ".py",
131
+ "mimetype": "text/x-python",
132
+ "name": "python",
133
+ "nbconvert_exporter": "python",
134
+ "pygments_lexer": "ipython3",
135
+ "version": "3.10.12"
136
+ }
137
+ },
138
+ "nbformat": 4,
139
+ "nbformat_minor": 2
140
+ }
0.3-testing_csv_join.ipynb ADDED
@@ -0,0 +1,973 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/tmp/ipykernel_751668/3571106454.py:2: DeprecationWarning: \n",
13
+ "Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n",
14
+ "(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n",
15
+ "but was not found to be installed on your system.\n",
16
+ "If this would cause problems for you,\n",
17
+ "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n",
18
+ " \n",
19
+ " import pandas as pd\n"
20
+ ]
21
+ }
22
+ ],
23
+ "source": [
24
+ "import os\n",
25
+ "import pandas as pd"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 3,
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": [
34
+ "# Europe\n",
35
+ "input_filepath = \"../data/raw\"\n",
36
+ "csv_files = os.listdir(f\"{input_filepath}/BM_csv_files/3000BC-AD500/\")\n",
37
+ "europe_csv_files = [file for file in csv_files if \"africa\" in file.lower()]\n",
38
+ "\n",
39
+ "if europe_csv_files:\n",
40
+ " # europe_csv_path = f\"{output_filepath}/BM_images/europe/\"\n",
41
+ " # if not os.path.exists(europe_csv_path):\n",
42
+ " # os.makedirs(europe_csv_path)\n",
43
+ "\n",
44
+ " europe_df = pd.DataFrame()\n",
45
+ " for csv_file in europe_csv_files:\n",
46
+ " csv_path = f\"{input_filepath}/BM_csv_files/3000BC-AD500/{csv_file}\"\n",
47
+ " df = pd.read_csv(csv_path)\n",
48
+ " europe_df = pd.concat([europe_df, df], ignore_index=True)\n",
49
+ "\n",
50
+ " # europe_df.drop_duplicates(inplace=True)\n",
51
+ " # europe_df.to_csv(f\"{europe_csv_path}/europe.csv\", index=False)\n",
52
+ " # print(\"Europe CSV file created successfully!\")\n",
53
+ "else:\n",
54
+ " print(\"No Europe CSV files found.\")"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": 10,
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "region = \"africa\"\n",
64
+ "region_csv_files = [file for file in csv_files if region in file.lower()]\n",
65
+ "region_df = pd.concat(\n",
66
+ " [\n",
67
+ " pd.read_csv(f\"{input_filepath}/BM_csv_files/3000BC-AD500/{file}\")\n",
68
+ " for file in region_csv_files\n",
69
+ " ]\n",
70
+ ")\n",
71
+ "region_df[\"Museum number\"] = region_df[\"Museum number\"].str.replace(r\"^No: \", \"\", regex=True)\n",
72
+ "region_df.drop_duplicates(inplace=True)\n",
73
+ "region_df = region_df[region_df[\"Museum number\"] != \"null\"]"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 4,
79
+ "metadata": {},
80
+ "outputs": [
81
+ {
82
+ "data": {
83
+ "text/html": [
84
+ "<div>\n",
85
+ "<style scoped>\n",
86
+ " .dataframe tbody tr th:only-of-type {\n",
87
+ " vertical-align: middle;\n",
88
+ " }\n",
89
+ "\n",
90
+ " .dataframe tbody tr th {\n",
91
+ " vertical-align: top;\n",
92
+ " }\n",
93
+ "\n",
94
+ " .dataframe thead th {\n",
95
+ " text-align: right;\n",
96
+ " }\n",
97
+ "</style>\n",
98
+ "<table border=\"1\" class=\"dataframe\">\n",
99
+ " <thead>\n",
100
+ " <tr style=\"text-align: right;\">\n",
101
+ " <th></th>\n",
102
+ " <th>Image</th>\n",
103
+ " <th>Object type</th>\n",
104
+ " <th>Museum number</th>\n",
105
+ " <th>Title</th>\n",
106
+ " <th>Denomination</th>\n",
107
+ " <th>Escapement</th>\n",
108
+ " <th>Description</th>\n",
109
+ " <th>Producer name</th>\n",
110
+ " <th>School/style</th>\n",
111
+ " <th>State</th>\n",
112
+ " <th>...</th>\n",
113
+ " <th>Acq date</th>\n",
114
+ " <th>Acq notes (acq)</th>\n",
115
+ " <th>Acq notes (exc)</th>\n",
116
+ " <th>Dept</th>\n",
117
+ " <th>BM/Big number</th>\n",
118
+ " <th>Reg number</th>\n",
119
+ " <th>Add ids</th>\n",
120
+ " <th>Cat no</th>\n",
121
+ " <th>Banknote serial number</th>\n",
122
+ " <th>Joined objects</th>\n",
123
+ " </tr>\n",
124
+ " </thead>\n",
125
+ " <tbody>\n",
126
+ " <tr>\n",
127
+ " <th>0</th>\n",
128
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
129
+ " <td>acorn lekythos</td>\n",
130
+ " <td>No: 1888,0601.716</td>\n",
131
+ " <td>NaN</td>\n",
132
+ " <td>NaN</td>\n",
133
+ " <td>NaN</td>\n",
134
+ " <td>Attic red-figured pottery acorn lekythos, rest...</td>\n",
135
+ " <td>NaN</td>\n",
136
+ " <td>NaN</td>\n",
137
+ " <td>NaN</td>\n",
138
+ " <td>...</td>\n",
139
+ " <td>1888</td>\n",
140
+ " <td>NaN</td>\n",
141
+ " <td>Excavated 1885-1886.</td>\n",
142
+ " <td>Greek and Roman</td>\n",
143
+ " <td>NaN</td>\n",
144
+ " <td>1888,0601.716</td>\n",
145
+ " <td>NaN</td>\n",
146
+ " <td>NaN</td>\n",
147
+ " <td>NaN</td>\n",
148
+ " <td>NaN</td>\n",
149
+ " </tr>\n",
150
+ " <tr>\n",
151
+ " <th>1</th>\n",
152
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
153
+ " <td>acroterion</td>\n",
154
+ " <td>No: 1886,0401.45</td>\n",
155
+ " <td>NaN</td>\n",
156
+ " <td>NaN</td>\n",
157
+ " <td>NaN</td>\n",
158
+ " <td>Fragment of a marble corner palmetto with bird...</td>\n",
159
+ " <td>NaN</td>\n",
160
+ " <td>NaN</td>\n",
161
+ " <td>NaN</td>\n",
162
+ " <td>...</td>\n",
163
+ " <td>NaN</td>\n",
164
+ " <td>NaN</td>\n",
165
+ " <td>NaN</td>\n",
166
+ " <td>Greek and Roman</td>\n",
167
+ " <td>NaN</td>\n",
168
+ " <td>1886,0401.45</td>\n",
169
+ " <td>NaN</td>\n",
170
+ " <td>NaN</td>\n",
171
+ " <td>NaN</td>\n",
172
+ " <td>NaN</td>\n",
173
+ " </tr>\n",
174
+ " <tr>\n",
175
+ " <th>2</th>\n",
176
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
177
+ " <td>acroterion</td>\n",
178
+ " <td>No: 1886,0401.1215</td>\n",
179
+ " <td>NaN</td>\n",
180
+ " <td>NaN</td>\n",
181
+ " <td>NaN</td>\n",
182
+ " <td>Fragment of a marble acroterion palmetto. Two ...</td>\n",
183
+ " <td>NaN</td>\n",
184
+ " <td>NaN</td>\n",
185
+ " <td>NaN</td>\n",
186
+ " <td>...</td>\n",
187
+ " <td>1886</td>\n",
188
+ " <td>NaN</td>\n",
189
+ " <td>NaN</td>\n",
190
+ " <td>Greek and Roman</td>\n",
191
+ " <td>NaN</td>\n",
192
+ " <td>1886,0401.1215</td>\n",
193
+ " <td>Miscellaneous number: 1886,0401.44</td>\n",
194
+ " <td>NaN</td>\n",
195
+ " <td>NaN</td>\n",
196
+ " <td>NaN</td>\n",
197
+ " </tr>\n",
198
+ " <tr>\n",
199
+ " <th>3</th>\n",
200
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
201
+ " <td>adze; hoe</td>\n",
202
+ " <td>No: null</td>\n",
203
+ " <td>NaN</td>\n",
204
+ " <td>NaN</td>\n",
205
+ " <td>NaN</td>\n",
206
+ " <td>Iron adze or hoe.</td>\n",
207
+ " <td>NaN</td>\n",
208
+ " <td>NaN</td>\n",
209
+ " <td>NaN</td>\n",
210
+ " <td>...</td>\n",
211
+ " <td>NaN</td>\n",
212
+ " <td>NaN</td>\n",
213
+ " <td>Excavated 1885-1886 by Petrie.</td>\n",
214
+ " <td>External</td>\n",
215
+ " <td>NaN</td>\n",
216
+ " <td>NaN</td>\n",
217
+ " <td>Miscellaneous number: 1886.XI.5 (Publication p...</td>\n",
218
+ " <td>NaN</td>\n",
219
+ " <td>NaN</td>\n",
220
+ " <td>NaN</td>\n",
221
+ " </tr>\n",
222
+ " <tr>\n",
223
+ " <th>4</th>\n",
224
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
225
+ " <td>aegis; votive offering</td>\n",
226
+ " <td>No: null</td>\n",
227
+ " <td>NaN</td>\n",
228
+ " <td>NaN</td>\n",
229
+ " <td>NaN</td>\n",
230
+ " <td>Solid-cast fragmentary Menat-counterweight of ...</td>\n",
231
+ " <td>NaN</td>\n",
232
+ " <td>NaN</td>\n",
233
+ " <td>NaN</td>\n",
234
+ " <td>...</td>\n",
235
+ " <td>NaN</td>\n",
236
+ " <td>NaN</td>\n",
237
+ " <td>Excavated 1884-1885. 1885: excavated by the Eg...</td>\n",
238
+ " <td>External</td>\n",
239
+ " <td>NaN</td>\n",
240
+ " <td>NaN</td>\n",
241
+ " <td>Miscellaneous number: 86.339 (Accession Number...</td>\n",
242
+ " <td>NaN</td>\n",
243
+ " <td>NaN</td>\n",
244
+ " <td>NaN</td>\n",
245
+ " </tr>\n",
246
+ " <tr>\n",
247
+ " <th>...</th>\n",
248
+ " <td>...</td>\n",
249
+ " <td>...</td>\n",
250
+ " <td>...</td>\n",
251
+ " <td>...</td>\n",
252
+ " <td>...</td>\n",
253
+ " <td>...</td>\n",
254
+ " <td>...</td>\n",
255
+ " <td>...</td>\n",
256
+ " <td>...</td>\n",
257
+ " <td>...</td>\n",
258
+ " <td>...</td>\n",
259
+ " <td>...</td>\n",
260
+ " <td>...</td>\n",
261
+ " <td>...</td>\n",
262
+ " <td>...</td>\n",
263
+ " <td>...</td>\n",
264
+ " <td>...</td>\n",
265
+ " <td>...</td>\n",
266
+ " <td>...</td>\n",
267
+ " <td>...</td>\n",
268
+ " <td>...</td>\n",
269
+ " </tr>\n",
270
+ " <tr>\n",
271
+ " <th>44921</th>\n",
272
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
273
+ " <td>whetstone</td>\n",
274
+ " <td>No: null</td>\n",
275
+ " <td>NaN</td>\n",
276
+ " <td>NaN</td>\n",
277
+ " <td>NaN</td>\n",
278
+ " <td>Whetstone. Well worn on both sides; dull beige.</td>\n",
279
+ " <td>NaN</td>\n",
280
+ " <td>NaN</td>\n",
281
+ " <td>NaN</td>\n",
282
+ " <td>...</td>\n",
283
+ " <td>NaN</td>\n",
284
+ " <td>NaN</td>\n",
285
+ " <td>NaN</td>\n",
286
+ " <td>External</td>\n",
287
+ " <td>NaN</td>\n",
288
+ " <td>NaN</td>\n",
289
+ " <td>Miscellaneous number: 2478 (Accession Number)</td>\n",
290
+ " <td>NaN</td>\n",
291
+ " <td>NaN</td>\n",
292
+ " <td>NaN</td>\n",
293
+ " </tr>\n",
294
+ " <tr>\n",
295
+ " <th>44922</th>\n",
296
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
297
+ " <td>whetstone</td>\n",
298
+ " <td>No: null</td>\n",
299
+ " <td>NaN</td>\n",
300
+ " <td>NaN</td>\n",
301
+ " <td>NaN</td>\n",
302
+ " <td>Whetstone (?), made out of sandstone, in the s...</td>\n",
303
+ " <td>NaN</td>\n",
304
+ " <td>NaN</td>\n",
305
+ " <td>NaN</td>\n",
306
+ " <td>...</td>\n",
307
+ " <td>1886</td>\n",
308
+ " <td>NaN</td>\n",
309
+ " <td>Excavated 1884-1885. 1885: excavated by Willia...</td>\n",
310
+ " <td>External</td>\n",
311
+ " <td>NaN</td>\n",
312
+ " <td>NaN</td>\n",
313
+ " <td>Miscellaneous number: 86.185 (Accession Number...</td>\n",
314
+ " <td>NaN</td>\n",
315
+ " <td>NaN</td>\n",
316
+ " <td>NaN</td>\n",
317
+ " </tr>\n",
318
+ " <tr>\n",
319
+ " <th>44923</th>\n",
320
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
321
+ " <td>whistle</td>\n",
322
+ " <td>No: EA22513</td>\n",
323
+ " <td>NaN</td>\n",
324
+ " <td>NaN</td>\n",
325
+ " <td>NaN</td>\n",
326
+ " <td>A terracotta whistle, still working, roughly i...</td>\n",
327
+ " <td>NaN</td>\n",
328
+ " <td>NaN</td>\n",
329
+ " <td>NaN</td>\n",
330
+ " <td>...</td>\n",
331
+ " <td>1885</td>\n",
332
+ " <td>NaN</td>\n",
333
+ " <td>NaN</td>\n",
334
+ " <td>Egypt and Sudan</td>\n",
335
+ " <td>EA22513</td>\n",
336
+ " <td>1885,0101.361</td>\n",
337
+ " <td>NaN</td>\n",
338
+ " <td>NaN</td>\n",
339
+ " <td>NaN</td>\n",
340
+ " <td>NaN</td>\n",
341
+ " </tr>\n",
342
+ " <tr>\n",
343
+ " <th>44924</th>\n",
344
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
345
+ " <td>whistle</td>\n",
346
+ " <td>No: 1906,0301.7</td>\n",
347
+ " <td>NaN</td>\n",
348
+ " <td>NaN</td>\n",
349
+ " <td>NaN</td>\n",
350
+ " <td>Hand-modelled terracotta whistle, still workin...</td>\n",
351
+ " <td>NaN</td>\n",
352
+ " <td>NaN</td>\n",
353
+ " <td>NaN</td>\n",
354
+ " <td>...</td>\n",
355
+ " <td>1906</td>\n",
356
+ " <td>NaN</td>\n",
357
+ " <td>Excavated May 1886.</td>\n",
358
+ " <td>Greek and Roman</td>\n",
359
+ " <td>NaN</td>\n",
360
+ " <td>1906,0301.7</td>\n",
361
+ " <td>NaN</td>\n",
362
+ " <td>NaN</td>\n",
363
+ " <td>NaN</td>\n",
364
+ " <td>NaN</td>\n",
365
+ " </tr>\n",
366
+ " <tr>\n",
367
+ " <th>44925</th>\n",
368
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
369
+ " <td>null; plate</td>\n",
370
+ " <td>No: null</td>\n",
371
+ " <td>NaN</td>\n",
372
+ " <td>NaN</td>\n",
373
+ " <td>NaN</td>\n",
374
+ " <td>Body of North Ionian Late Wild Goat Style pott...</td>\n",
375
+ " <td>NaN</td>\n",
376
+ " <td>NaN</td>\n",
377
+ " <td>NaN</td>\n",
378
+ " <td>...</td>\n",
379
+ " <td>NaN</td>\n",
380
+ " <td>NaN</td>\n",
381
+ " <td>NaN</td>\n",
382
+ " <td>External</td>\n",
383
+ " <td>NaN</td>\n",
384
+ " <td>NaN</td>\n",
385
+ " <td>Miscellaneous number: 26.2.35 (Accession Number)</td>\n",
386
+ " <td>NaN</td>\n",
387
+ " <td>NaN</td>\n",
388
+ " <td>NaN</td>\n",
389
+ " </tr>\n",
390
+ " </tbody>\n",
391
+ "</table>\n",
392
+ "<p>44926 rows × 47 columns</p>\n",
393
+ "</div>"
394
+ ],
395
+ "text/plain": [
396
+ " Image \\\n",
397
+ "0 https://media.britishmuseum.org/media/Reposito... \n",
398
+ "1 https://media.britishmuseum.org/media/Reposito... \n",
399
+ "2 https://media.britishmuseum.org/media/Reposito... \n",
400
+ "3 https://media.britishmuseum.org/media/Reposito... \n",
401
+ "4 https://media.britishmuseum.org/media/Reposito... \n",
402
+ "... ... \n",
403
+ "44921 https://media.britishmuseum.org/media/Reposito... \n",
404
+ "44922 https://media.britishmuseum.org/media/Reposito... \n",
405
+ "44923 https://media.britishmuseum.org/media/Reposito... \n",
406
+ "44924 https://media.britishmuseum.org/media/Reposito... \n",
407
+ "44925 https://media.britishmuseum.org/media/Reposito... \n",
408
+ "\n",
409
+ " Object type Museum number Title Denomination \\\n",
410
+ "0 acorn lekythos No: 1888,0601.716 NaN NaN \n",
411
+ "1 acroterion No: 1886,0401.45 NaN NaN \n",
412
+ "2 acroterion No: 1886,0401.1215 NaN NaN \n",
413
+ "3 adze; hoe No: null NaN NaN \n",
414
+ "4 aegis; votive offering No: null NaN NaN \n",
415
+ "... ... ... ... ... \n",
416
+ "44921 whetstone No: null NaN NaN \n",
417
+ "44922 whetstone No: null NaN NaN \n",
418
+ "44923 whistle No: EA22513 NaN NaN \n",
419
+ "44924 whistle No: 1906,0301.7 NaN NaN \n",
420
+ "44925 null; plate No: null NaN NaN \n",
421
+ "\n",
422
+ " Escapement Description \\\n",
423
+ "0 NaN Attic red-figured pottery acorn lekythos, rest... \n",
424
+ "1 NaN Fragment of a marble corner palmetto with bird... \n",
425
+ "2 NaN Fragment of a marble acroterion palmetto. Two ... \n",
426
+ "3 NaN Iron adze or hoe. \n",
427
+ "4 NaN Solid-cast fragmentary Menat-counterweight of ... \n",
428
+ "... ... ... \n",
429
+ "44921 NaN Whetstone. Well worn on both sides; dull beige. \n",
430
+ "44922 NaN Whetstone (?), made out of sandstone, in the s... \n",
431
+ "44923 NaN A terracotta whistle, still working, roughly i... \n",
432
+ "44924 NaN Hand-modelled terracotta whistle, still workin... \n",
433
+ "44925 NaN Body of North Ionian Late Wild Goat Style pott... \n",
434
+ "\n",
435
+ " Producer name School/style State ... Acq date Acq notes (acq) \\\n",
436
+ "0 NaN NaN NaN ... 1888 NaN \n",
437
+ "1 NaN NaN NaN ... NaN NaN \n",
438
+ "2 NaN NaN NaN ... 1886 NaN \n",
439
+ "3 NaN NaN NaN ... NaN NaN \n",
440
+ "4 NaN NaN NaN ... NaN NaN \n",
441
+ "... ... ... ... ... ... ... \n",
442
+ "44921 NaN NaN NaN ... NaN NaN \n",
443
+ "44922 NaN NaN NaN ... 1886 NaN \n",
444
+ "44923 NaN NaN NaN ... 1885 NaN \n",
445
+ "44924 NaN NaN NaN ... 1906 NaN \n",
446
+ "44925 NaN NaN NaN ... NaN NaN \n",
447
+ "\n",
448
+ " Acq notes (exc) Dept \\\n",
449
+ "0 Excavated 1885-1886. Greek and Roman \n",
450
+ "1 NaN Greek and Roman \n",
451
+ "2 NaN Greek and Roman \n",
452
+ "3 Excavated 1885-1886 by Petrie. External \n",
453
+ "4 Excavated 1884-1885. 1885: excavated by the Eg... External \n",
454
+ "... ... ... \n",
455
+ "44921 NaN External \n",
456
+ "44922 Excavated 1884-1885. 1885: excavated by Willia... External \n",
457
+ "44923 NaN Egypt and Sudan \n",
458
+ "44924 Excavated May 1886. Greek and Roman \n",
459
+ "44925 NaN External \n",
460
+ "\n",
461
+ " BM/Big number Reg number \\\n",
462
+ "0 NaN 1888,0601.716 \n",
463
+ "1 NaN 1886,0401.45 \n",
464
+ "2 NaN 1886,0401.1215 \n",
465
+ "3 NaN NaN \n",
466
+ "4 NaN NaN \n",
467
+ "... ... ... \n",
468
+ "44921 NaN NaN \n",
469
+ "44922 NaN NaN \n",
470
+ "44923 EA22513 1885,0101.361 \n",
471
+ "44924 NaN 1906,0301.7 \n",
472
+ "44925 NaN NaN \n",
473
+ "\n",
474
+ " Add ids Cat no \\\n",
475
+ "0 NaN NaN \n",
476
+ "1 NaN NaN \n",
477
+ "2 Miscellaneous number: 1886,0401.44 NaN \n",
478
+ "3 Miscellaneous number: 1886.XI.5 (Publication p... NaN \n",
479
+ "4 Miscellaneous number: 86.339 (Accession Number... NaN \n",
480
+ "... ... ... \n",
481
+ "44921 Miscellaneous number: 2478 (Accession Number) NaN \n",
482
+ "44922 Miscellaneous number: 86.185 (Accession Number... NaN \n",
483
+ "44923 NaN NaN \n",
484
+ "44924 NaN NaN \n",
485
+ "44925 Miscellaneous number: 26.2.35 (Accession Number) NaN \n",
486
+ "\n",
487
+ " Banknote serial number Joined objects \n",
488
+ "0 NaN NaN \n",
489
+ "1 NaN NaN \n",
490
+ "2 NaN NaN \n",
491
+ "3 NaN NaN \n",
492
+ "4 NaN NaN \n",
493
+ "... ... ... \n",
494
+ "44921 NaN NaN \n",
495
+ "44922 NaN NaN \n",
496
+ "44923 NaN NaN \n",
497
+ "44924 NaN NaN \n",
498
+ "44925 NaN NaN \n",
499
+ "\n",
500
+ "[44926 rows x 47 columns]"
501
+ ]
502
+ },
503
+ "execution_count": 4,
504
+ "metadata": {},
505
+ "output_type": "execute_result"
506
+ }
507
+ ],
508
+ "source": [
509
+ "europe_df"
510
+ ]
511
+ },
512
+ {
513
+ "cell_type": "code",
514
+ "execution_count": 8,
515
+ "metadata": {},
516
+ "outputs": [],
517
+ "source": [
518
+ "region_df.drop_duplicates(inplace=True)"
519
+ ]
520
+ },
521
+ {
522
+ "cell_type": "code",
523
+ "execution_count": 11,
524
+ "metadata": {},
525
+ "outputs": [
526
+ {
527
+ "data": {
528
+ "text/html": [
529
+ "<div>\n",
530
+ "<style scoped>\n",
531
+ " .dataframe tbody tr th:only-of-type {\n",
532
+ " vertical-align: middle;\n",
533
+ " }\n",
534
+ "\n",
535
+ " .dataframe tbody tr th {\n",
536
+ " vertical-align: top;\n",
537
+ " }\n",
538
+ "\n",
539
+ " .dataframe thead th {\n",
540
+ " text-align: right;\n",
541
+ " }\n",
542
+ "</style>\n",
543
+ "<table border=\"1\" class=\"dataframe\">\n",
544
+ " <thead>\n",
545
+ " <tr style=\"text-align: right;\">\n",
546
+ " <th></th>\n",
547
+ " <th>Image</th>\n",
548
+ " <th>Object type</th>\n",
549
+ " <th>Museum number</th>\n",
550
+ " <th>Title</th>\n",
551
+ " <th>Denomination</th>\n",
552
+ " <th>Escapement</th>\n",
553
+ " <th>Description</th>\n",
554
+ " <th>Producer name</th>\n",
555
+ " <th>School/style</th>\n",
556
+ " <th>State</th>\n",
557
+ " <th>...</th>\n",
558
+ " <th>Acq date</th>\n",
559
+ " <th>Acq notes (acq)</th>\n",
560
+ " <th>Acq notes (exc)</th>\n",
561
+ " <th>Dept</th>\n",
562
+ " <th>BM/Big number</th>\n",
563
+ " <th>Reg number</th>\n",
564
+ " <th>Add ids</th>\n",
565
+ " <th>Cat no</th>\n",
566
+ " <th>Banknote serial number</th>\n",
567
+ " <th>Joined objects</th>\n",
568
+ " </tr>\n",
569
+ " </thead>\n",
570
+ " <tbody>\n",
571
+ " <tr>\n",
572
+ " <th>0</th>\n",
573
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
574
+ " <td>acorn lekythos</td>\n",
575
+ " <td>1888,0601.716</td>\n",
576
+ " <td>NaN</td>\n",
577
+ " <td>NaN</td>\n",
578
+ " <td>NaN</td>\n",
579
+ " <td>Attic red-figured pottery acorn lekythos, rest...</td>\n",
580
+ " <td>NaN</td>\n",
581
+ " <td>NaN</td>\n",
582
+ " <td>NaN</td>\n",
583
+ " <td>...</td>\n",
584
+ " <td>1888</td>\n",
585
+ " <td>NaN</td>\n",
586
+ " <td>Excavated 1885-1886.</td>\n",
587
+ " <td>Greek and Roman</td>\n",
588
+ " <td>NaN</td>\n",
589
+ " <td>1888,0601.716</td>\n",
590
+ " <td>NaN</td>\n",
591
+ " <td>NaN</td>\n",
592
+ " <td>NaN</td>\n",
593
+ " <td>NaN</td>\n",
594
+ " </tr>\n",
595
+ " <tr>\n",
596
+ " <th>1</th>\n",
597
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
598
+ " <td>acroterion</td>\n",
599
+ " <td>1886,0401.45</td>\n",
600
+ " <td>NaN</td>\n",
601
+ " <td>NaN</td>\n",
602
+ " <td>NaN</td>\n",
603
+ " <td>Fragment of a marble corner palmetto with bird...</td>\n",
604
+ " <td>NaN</td>\n",
605
+ " <td>NaN</td>\n",
606
+ " <td>NaN</td>\n",
607
+ " <td>...</td>\n",
608
+ " <td>NaN</td>\n",
609
+ " <td>NaN</td>\n",
610
+ " <td>NaN</td>\n",
611
+ " <td>Greek and Roman</td>\n",
612
+ " <td>NaN</td>\n",
613
+ " <td>1886,0401.45</td>\n",
614
+ " <td>NaN</td>\n",
615
+ " <td>NaN</td>\n",
616
+ " <td>NaN</td>\n",
617
+ " <td>NaN</td>\n",
618
+ " </tr>\n",
619
+ " <tr>\n",
620
+ " <th>2</th>\n",
621
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
622
+ " <td>acroterion</td>\n",
623
+ " <td>1886,0401.1215</td>\n",
624
+ " <td>NaN</td>\n",
625
+ " <td>NaN</td>\n",
626
+ " <td>NaN</td>\n",
627
+ " <td>Fragment of a marble acroterion palmetto. Two ...</td>\n",
628
+ " <td>NaN</td>\n",
629
+ " <td>NaN</td>\n",
630
+ " <td>NaN</td>\n",
631
+ " <td>...</td>\n",
632
+ " <td>1886</td>\n",
633
+ " <td>NaN</td>\n",
634
+ " <td>NaN</td>\n",
635
+ " <td>Greek and Roman</td>\n",
636
+ " <td>NaN</td>\n",
637
+ " <td>1886,0401.1215</td>\n",
638
+ " <td>Miscellaneous number: 1886,0401.44</td>\n",
639
+ " <td>NaN</td>\n",
640
+ " <td>NaN</td>\n",
641
+ " <td>NaN</td>\n",
642
+ " </tr>\n",
643
+ " <tr>\n",
644
+ " <th>7</th>\n",
645
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
646
+ " <td>alabastron</td>\n",
647
+ " <td>1894,1101.213</td>\n",
648
+ " <td>NaN</td>\n",
649
+ " <td>NaN</td>\n",
650
+ " <td>NaN</td>\n",
651
+ " <td>Core-formed glass alabastron.\\r\\nOpaque orange...</td>\n",
652
+ " <td>NaN</td>\n",
653
+ " <td>NaN</td>\n",
654
+ " <td>NaN</td>\n",
655
+ " <td>...</td>\n",
656
+ " <td>1894</td>\n",
657
+ " <td>NaN</td>\n",
658
+ " <td>NaN</td>\n",
659
+ " <td>Greek and Roman</td>\n",
660
+ " <td>NaN</td>\n",
661
+ " <td>1894,1101.213</td>\n",
662
+ " <td>Miscellaneous number: DBH.0056 (Harden number)</td>\n",
663
+ " <td>NaN</td>\n",
664
+ " <td>NaN</td>\n",
665
+ " <td>NaN</td>\n",
666
+ " </tr>\n",
667
+ " <tr>\n",
668
+ " <th>8</th>\n",
669
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
670
+ " <td>alabastron</td>\n",
671
+ " <td>132114</td>\n",
672
+ " <td>NaN</td>\n",
673
+ " <td>NaN</td>\n",
674
+ " <td>NaN</td>\n",
675
+ " <td>Large baggy alabastron of horizontal banded, t...</td>\n",
676
+ " <td>NaN</td>\n",
677
+ " <td>NaN</td>\n",
678
+ " <td>NaN</td>\n",
679
+ " <td>...</td>\n",
680
+ " <td>1857</td>\n",
681
+ " <td>NaN</td>\n",
682
+ " <td>NaN</td>\n",
683
+ " <td>Middle East</td>\n",
684
+ " <td>132114</td>\n",
685
+ " <td>1857,1220.1</td>\n",
686
+ " <td>Miscellaneous number: 416 (paper label attache...</td>\n",
687
+ " <td>NaN</td>\n",
688
+ " <td>NaN</td>\n",
689
+ " <td>NaN</td>\n",
690
+ " </tr>\n",
691
+ " <tr>\n",
692
+ " <th>...</th>\n",
693
+ " <td>...</td>\n",
694
+ " <td>...</td>\n",
695
+ " <td>...</td>\n",
696
+ " <td>...</td>\n",
697
+ " <td>...</td>\n",
698
+ " <td>...</td>\n",
699
+ " <td>...</td>\n",
700
+ " <td>...</td>\n",
701
+ " <td>...</td>\n",
702
+ " <td>...</td>\n",
703
+ " <td>...</td>\n",
704
+ " <td>...</td>\n",
705
+ " <td>...</td>\n",
706
+ " <td>...</td>\n",
707
+ " <td>...</td>\n",
708
+ " <td>...</td>\n",
709
+ " <td>...</td>\n",
710
+ " <td>...</td>\n",
711
+ " <td>...</td>\n",
712
+ " <td>...</td>\n",
713
+ " <td>...</td>\n",
714
+ " </tr>\n",
715
+ " <tr>\n",
716
+ " <th>9301</th>\n",
717
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
718
+ " <td>vessel-fitting; lekane</td>\n",
719
+ " <td>1886,0401.1218</td>\n",
720
+ " <td>NaN</td>\n",
721
+ " <td>NaN</td>\n",
722
+ " <td>NaN</td>\n",
723
+ " <td>Chian pottery plastic head, originally attache...</td>\n",
724
+ " <td>NaN</td>\n",
725
+ " <td>NaN</td>\n",
726
+ " <td>NaN</td>\n",
727
+ " <td>...</td>\n",
728
+ " <td>1886</td>\n",
729
+ " <td>NaN</td>\n",
730
+ " <td>Excavated 1884-1885.</td>\n",
731
+ " <td>Greek and Roman</td>\n",
732
+ " <td>NaN</td>\n",
733
+ " <td>1886,0401.1218</td>\n",
734
+ " <td>NaN</td>\n",
735
+ " <td>NaN</td>\n",
736
+ " <td>NaN</td>\n",
737
+ " <td>NaN</td>\n",
738
+ " </tr>\n",
739
+ " <tr>\n",
740
+ " <th>9302</th>\n",
741
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
742
+ " <td>vessel-fitting; lid</td>\n",
743
+ " <td>1886,0401.1429</td>\n",
744
+ " <td>NaN</td>\n",
745
+ " <td>NaN</td>\n",
746
+ " <td>NaN</td>\n",
747
+ " <td>Plastic double head of Chian, probably black-f...</td>\n",
748
+ " <td>NaN</td>\n",
749
+ " <td>NaN</td>\n",
750
+ " <td>NaN</td>\n",
751
+ " <td>...</td>\n",
752
+ " <td>1886</td>\n",
753
+ " <td>NaN</td>\n",
754
+ " <td>Excavated 1884-1885.</td>\n",
755
+ " <td>Greek and Roman</td>\n",
756
+ " <td>NaN</td>\n",
757
+ " <td>1886,0401.1429</td>\n",
758
+ " <td>NaN</td>\n",
759
+ " <td>NaN</td>\n",
760
+ " <td>NaN</td>\n",
761
+ " <td>NaN</td>\n",
762
+ " </tr>\n",
763
+ " <tr>\n",
764
+ " <th>9304</th>\n",
765
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
766
+ " <td>volute krater</td>\n",
767
+ " <td>1924,1201.41</td>\n",
768
+ " <td>NaN</td>\n",
769
+ " <td>NaN</td>\n",
770
+ " <td>NaN</td>\n",
771
+ " <td>Volute handle and rim sherd (consisting of 3 f...</td>\n",
772
+ " <td>NaN</td>\n",
773
+ " <td>NaN</td>\n",
774
+ " <td>NaN</td>\n",
775
+ " <td>...</td>\n",
776
+ " <td>NaN</td>\n",
777
+ " <td>NaN</td>\n",
778
+ " <td>NaN</td>\n",
779
+ " <td>Greek and Roman</td>\n",
780
+ " <td>NaN</td>\n",
781
+ " <td>1924,1201.41</td>\n",
782
+ " <td>NaN</td>\n",
783
+ " <td>NaN</td>\n",
784
+ " <td>NaN</td>\n",
785
+ " <td>NaN</td>\n",
786
+ " </tr>\n",
787
+ " <tr>\n",
788
+ " <th>9310</th>\n",
789
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
790
+ " <td>volute krater</td>\n",
791
+ " <td>1924,1201.40</td>\n",
792
+ " <td>NaN</td>\n",
793
+ " <td>NaN</td>\n",
794
+ " <td>NaN</td>\n",
795
+ " <td>Sherd (mended from two fragments) of Laconian ...</td>\n",
796
+ " <td>NaN</td>\n",
797
+ " <td>NaN</td>\n",
798
+ " <td>NaN</td>\n",
799
+ " <td>...</td>\n",
800
+ " <td>NaN</td>\n",
801
+ " <td>NaN</td>\n",
802
+ " <td>NaN</td>\n",
803
+ " <td>Greek and Roman</td>\n",
804
+ " <td>NaN</td>\n",
805
+ " <td>1924,1201.40</td>\n",
806
+ " <td>NaN</td>\n",
807
+ " <td>NaN</td>\n",
808
+ " <td>NaN</td>\n",
809
+ " <td>NaN</td>\n",
810
+ " </tr>\n",
811
+ " <tr>\n",
812
+ " <th>9317</th>\n",
813
+ " <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
814
+ " <td>wall-painting</td>\n",
815
+ " <td>1886,0401.67</td>\n",
816
+ " <td>NaN</td>\n",
817
+ " <td>NaN</td>\n",
818
+ " <td>NaN</td>\n",
819
+ " <td>Stucco wall fragment, with marks of pointed to...</td>\n",
820
+ " <td>NaN</td>\n",
821
+ " <td>NaN</td>\n",
822
+ " <td>NaN</td>\n",
823
+ " <td>...</td>\n",
824
+ " <td>1886</td>\n",
825
+ " <td>NaN</td>\n",
826
+ " <td>NaN</td>\n",
827
+ " <td>Greek and Roman</td>\n",
828
+ " <td>NaN</td>\n",
829
+ " <td>1886,0401.67</td>\n",
830
+ " <td>NaN</td>\n",
831
+ " <td>NaN</td>\n",
832
+ " <td>NaN</td>\n",
833
+ " <td>NaN</td>\n",
834
+ " </tr>\n",
835
+ " </tbody>\n",
836
+ "</table>\n",
837
+ "<p>19570 rows × 47 columns</p>\n",
838
+ "</div>"
839
+ ],
840
+ "text/plain": [
841
+ " Image \\\n",
842
+ "0 https://media.britishmuseum.org/media/Reposito... \n",
843
+ "1 https://media.britishmuseum.org/media/Reposito... \n",
844
+ "2 https://media.britishmuseum.org/media/Reposito... \n",
845
+ "7 https://media.britishmuseum.org/media/Reposito... \n",
846
+ "8 https://media.britishmuseum.org/media/Reposito... \n",
847
+ "... ... \n",
848
+ "9301 https://media.britishmuseum.org/media/Reposito... \n",
849
+ "9302 https://media.britishmuseum.org/media/Reposito... \n",
850
+ "9304 https://media.britishmuseum.org/media/Reposito... \n",
851
+ "9310 https://media.britishmuseum.org/media/Reposito... \n",
852
+ "9317 https://media.britishmuseum.org/media/Reposito... \n",
853
+ "\n",
854
+ " Object type Museum number Title Denomination Escapement \\\n",
855
+ "0 acorn lekythos 1888,0601.716 NaN NaN NaN \n",
856
+ "1 acroterion 1886,0401.45 NaN NaN NaN \n",
857
+ "2 acroterion 1886,0401.1215 NaN NaN NaN \n",
858
+ "7 alabastron 1894,1101.213 NaN NaN NaN \n",
859
+ "8 alabastron 132114 NaN NaN NaN \n",
860
+ "... ... ... ... ... ... \n",
861
+ "9301 vessel-fitting; lekane 1886,0401.1218 NaN NaN NaN \n",
862
+ "9302 vessel-fitting; lid 1886,0401.1429 NaN NaN NaN \n",
863
+ "9304 volute krater 1924,1201.41 NaN NaN NaN \n",
864
+ "9310 volute krater 1924,1201.40 NaN NaN NaN \n",
865
+ "9317 wall-painting 1886,0401.67 NaN NaN NaN \n",
866
+ "\n",
867
+ " Description Producer name \\\n",
868
+ "0 Attic red-figured pottery acorn lekythos, rest... NaN \n",
869
+ "1 Fragment of a marble corner palmetto with bird... NaN \n",
870
+ "2 Fragment of a marble acroterion palmetto. Two ... NaN \n",
871
+ "7 Core-formed glass alabastron.\\r\\nOpaque orange... NaN \n",
872
+ "8 Large baggy alabastron of horizontal banded, t... NaN \n",
873
+ "... ... ... \n",
874
+ "9301 Chian pottery plastic head, originally attache... NaN \n",
875
+ "9302 Plastic double head of Chian, probably black-f... NaN \n",
876
+ "9304 Volute handle and rim sherd (consisting of 3 f... NaN \n",
877
+ "9310 Sherd (mended from two fragments) of Laconian ... NaN \n",
878
+ "9317 Stucco wall fragment, with marks of pointed to... NaN \n",
879
+ "\n",
880
+ " School/style State ... Acq date Acq notes (acq) Acq notes (exc) \\\n",
881
+ "0 NaN NaN ... 1888 NaN Excavated 1885-1886. \n",
882
+ "1 NaN NaN ... NaN NaN NaN \n",
883
+ "2 NaN NaN ... 1886 NaN NaN \n",
884
+ "7 NaN NaN ... 1894 NaN NaN \n",
885
+ "8 NaN NaN ... 1857 NaN NaN \n",
886
+ "... ... ... ... ... ... ... \n",
887
+ "9301 NaN NaN ... 1886 NaN Excavated 1884-1885. \n",
888
+ "9302 NaN NaN ... 1886 NaN Excavated 1884-1885. \n",
889
+ "9304 NaN NaN ... NaN NaN NaN \n",
890
+ "9310 NaN NaN ... NaN NaN NaN \n",
891
+ "9317 NaN NaN ... 1886 NaN NaN \n",
892
+ "\n",
893
+ " Dept BM/Big number Reg number \\\n",
894
+ "0 Greek and Roman NaN 1888,0601.716 \n",
895
+ "1 Greek and Roman NaN 1886,0401.45 \n",
896
+ "2 Greek and Roman NaN 1886,0401.1215 \n",
897
+ "7 Greek and Roman NaN 1894,1101.213 \n",
898
+ "8 Middle East 132114 1857,1220.1 \n",
899
+ "... ... ... ... \n",
900
+ "9301 Greek and Roman NaN 1886,0401.1218 \n",
901
+ "9302 Greek and Roman NaN 1886,0401.1429 \n",
902
+ "9304 Greek and Roman NaN 1924,1201.41 \n",
903
+ "9310 Greek and Roman NaN 1924,1201.40 \n",
904
+ "9317 Greek and Roman NaN 1886,0401.67 \n",
905
+ "\n",
906
+ " Add ids Cat no \\\n",
907
+ "0 NaN NaN \n",
908
+ "1 NaN NaN \n",
909
+ "2 Miscellaneous number: 1886,0401.44 NaN \n",
910
+ "7 Miscellaneous number: DBH.0056 (Harden number) NaN \n",
911
+ "8 Miscellaneous number: 416 (paper label attache... NaN \n",
912
+ "... ... ... \n",
913
+ "9301 NaN NaN \n",
914
+ "9302 NaN NaN \n",
915
+ "9304 NaN NaN \n",
916
+ "9310 NaN NaN \n",
917
+ "9317 NaN NaN \n",
918
+ "\n",
919
+ " Banknote serial number Joined objects \n",
920
+ "0 NaN NaN \n",
921
+ "1 NaN NaN \n",
922
+ "2 NaN NaN \n",
923
+ "7 NaN NaN \n",
924
+ "8 NaN NaN \n",
925
+ "... ... ... \n",
926
+ "9301 NaN NaN \n",
927
+ "9302 NaN NaN \n",
928
+ "9304 NaN NaN \n",
929
+ "9310 NaN NaN \n",
930
+ "9317 NaN NaN \n",
931
+ "\n",
932
+ "[19570 rows x 47 columns]"
933
+ ]
934
+ },
935
+ "execution_count": 11,
936
+ "metadata": {},
937
+ "output_type": "execute_result"
938
+ }
939
+ ],
940
+ "source": [
941
+ "region_df"
942
+ ]
943
+ },
944
+ {
945
+ "cell_type": "code",
946
+ "execution_count": null,
947
+ "metadata": {},
948
+ "outputs": [],
949
+ "source": []
950
+ }
951
+ ],
952
+ "metadata": {
953
+ "kernelspec": {
954
+ "display_name": "ArtifactClassification",
955
+ "language": "python",
956
+ "name": "python3"
957
+ },
958
+ "language_info": {
959
+ "codemirror_mode": {
960
+ "name": "ipython",
961
+ "version": 3
962
+ },
963
+ "file_extension": ".py",
964
+ "mimetype": "text/x-python",
965
+ "name": "python",
966
+ "nbconvert_exporter": "python",
967
+ "pygments_lexer": "ipython3",
968
+ "version": "3.10.12"
969
+ }
970
+ },
971
+ "nbformat": 4,
972
+ "nbformat_minor": 2
973
+ }
0.4-testing_tif_images.ipynb ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 4,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from PIL import Image"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 5,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "im = Image.open(\"../data/raw/images/castle/1924/1924_4_738a_small.tif\")\n",
19
+ "name = str(\"../data/raw/images/castle/1924/1924_4_738a_small.tif\").rstrip(\".tif\")\n",
20
+ "im.save(\"image\" + \".jpg\", \"JPEG\")"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 6,
26
+ "metadata": {},
27
+ "outputs": [
28
+ {
29
+ "name": "stderr",
30
+ "output_type": "stream",
31
+ "text": [
32
+ "/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)\n",
33
+ "Failed to load module: /home/james/snap/code/common/.cache/gio-modules/libgiolibproxy.so\n",
34
+ "eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE\n"
35
+ ]
36
+ }
37
+ ],
38
+ "source": [
39
+ "im.show()"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": null,
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": []
48
+ }
49
+ ],
50
+ "metadata": {
51
+ "kernelspec": {
52
+ "display_name": "ArtifactClassification",
53
+ "language": "python",
54
+ "name": "python3"
55
+ },
56
+ "language_info": {
57
+ "codemirror_mode": {
58
+ "name": "ipython",
59
+ "version": 3
60
+ },
61
+ "file_extension": ".py",
62
+ "mimetype": "text/x-python",
63
+ "name": "python",
64
+ "nbconvert_exporter": "python",
65
+ "pygments_lexer": "ipython3",
66
+ "version": "3.10.12"
67
+ }
68
+ },
69
+ "nbformat": 4,
70
+ "nbformat_minor": 2
71
+ }
0.5-testing_transparent_background.ipynb ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 16,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import cv2\n",
10
+ "import numpy as np\n",
11
+ "\n",
12
+ "from PIL import Image\n",
13
+ "from transparent_background import Remover\n",
14
+ "import pandas as pd\n",
15
+ "from tqdm import tqdm\n",
16
+ "import os"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": 5,
22
+ "metadata": {},
23
+ "outputs": [
24
+ {
25
+ "name": "stdout",
26
+ "output_type": "stream",
27
+ "text": [
28
+ "Settings -> Mode=base-nightly, Device=cuda:0, Torchscript=disabled\n"
29
+ ]
30
+ }
31
+ ],
32
+ "source": [
33
+ "# Load model\n",
34
+ "# remover = Remover() # default setting\n",
35
+ "# remover = Remover(mode='fast', jit=True, device='cuda:0', ckpt='~/latest.pth', url=\"https://drive.google.com/file/d/13oBl5MTVcWER3YU4fSxW3ATlVfueFQPY/view?usp=share_link\", ckpt_name=\"ckpt_base.pth\")\n",
36
+ "remover = Remover(mode=\"base-nightly\") # nightly release checkpoint"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 7,
42
+ "metadata": {},
43
+ "outputs": [],
44
+ "source": [
45
+ "# Usage for image\n",
46
+ "img = Image.open(\"../data/raw/images/egyptian/1953/1953.1-tt.jpg\").convert(\"RGB\") # read image\n",
47
+ "\n",
48
+ "out = remover.process(img) # default setting - transparent background\n",
49
+ "# out = remover.process(img, type='rgba') # same as above\n",
50
+ "# out = remover.process(img, type='map') # object map only\n",
51
+ "# out = remover.process(img, type='green') # image matting - green screen\n",
52
+ "# out = remover.process(img, type='white') # change backround with white color\n",
53
+ "# out = remover.process(img, type=[255, 0, 0]) # change background with color code [255, 0, 0]\n",
54
+ "# out = remover.process(img, type='blur') # blur background\n",
55
+ "# out = remover.process(img, type='overlay') # overlay object map onto the image\n",
56
+ "# out = remover.process(img, type='samples/background.jpg') # use another image as a background\n",
57
+ "\n",
58
+ "# out = remover.process(img, threshold=0.5) # use threhold parameter for hard prediction.\n",
59
+ "\n",
60
+ "out.save(\"output.png\") # save result"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": 24,
66
+ "metadata": {},
67
+ "outputs": [],
68
+ "source": [
69
+ "img_df = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n",
70
+ "img_df[\"full_path\"] = img_df.apply(lambda row: os.path.join(row[\"root\"], row[\"file\"]), axis=1)\n",
71
+ "img_df[\"new_root\"] = img_df[\"root\"].apply(\n",
72
+ " lambda x: x.replace(\"data/raw/images/\", \"data/processed/OM_images_white/\")\n",
73
+ ")\n",
74
+ "img_df[\"new_full_path\"] = img_df.apply(lambda row: os.path.join(row[\"new_root\"], row[\"file\"]), axis=1)"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": 23,
80
+ "metadata": {},
81
+ "outputs": [
82
+ {
83
+ "data": {
84
+ "text/html": [
85
+ "<div>\n",
86
+ "<style scoped>\n",
87
+ " .dataframe tbody tr th:only-of-type {\n",
88
+ " vertical-align: middle;\n",
89
+ " }\n",
90
+ "\n",
91
+ " .dataframe tbody tr th {\n",
92
+ " vertical-align: top;\n",
93
+ " }\n",
94
+ "\n",
95
+ " .dataframe thead th {\n",
96
+ " text-align: right;\n",
97
+ " }\n",
98
+ "</style>\n",
99
+ "<table border=\"1\" class=\"dataframe\">\n",
100
+ " <thead>\n",
101
+ " <tr style=\"text-align: right;\">\n",
102
+ " <th></th>\n",
103
+ " <th>file</th>\n",
104
+ " <th>root</th>\n",
105
+ " <th>obj_num</th>\n",
106
+ " <th>full_path</th>\n",
107
+ " <th>new_root</th>\n",
108
+ " </tr>\n",
109
+ " </thead>\n",
110
+ " <tbody>\n",
111
+ " <tr>\n",
112
+ " <th>0</th>\n",
113
+ " <td>1985.15.68.jpg</td>\n",
114
+ " <td>data/raw/images/fulling_mill/1985</td>\n",
115
+ " <td>durma.1985.15.68</td>\n",
116
+ " <td>data/raw/images/fulling_mill/1985/1985.15.68.jpg</td>\n",
117
+ " <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
118
+ " </tr>\n",
119
+ " <tr>\n",
120
+ " <th>1</th>\n",
121
+ " <td>1985.52.37.ff2.jpg</td>\n",
122
+ " <td>data/raw/images/fulling_mill/1985</td>\n",
123
+ " <td>durma.1985.52.37</td>\n",
124
+ " <td>data/raw/images/fulling_mill/1985/1985.52.37.f...</td>\n",
125
+ " <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
126
+ " </tr>\n",
127
+ " <tr>\n",
128
+ " <th>2</th>\n",
129
+ " <td>1985.81.4496 d2.jpg</td>\n",
130
+ " <td>data/raw/images/fulling_mill/1985</td>\n",
131
+ " <td>durma.1985.81.4496</td>\n",
132
+ " <td>data/raw/images/fulling_mill/1985/1985.81.4496...</td>\n",
133
+ " <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
134
+ " </tr>\n",
135
+ " <tr>\n",
136
+ " <th>3</th>\n",
137
+ " <td>1985.9.1.1-d4.jpg</td>\n",
138
+ " <td>data/raw/images/fulling_mill/1985</td>\n",
139
+ " <td>durma.1985.9.1</td>\n",
140
+ " <td>data/raw/images/fulling_mill/1985/1985.9.1.1-d...</td>\n",
141
+ " <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
142
+ " </tr>\n",
143
+ " <tr>\n",
144
+ " <th>4</th>\n",
145
+ " <td>1985.52.37.sf2.jpg</td>\n",
146
+ " <td>data/raw/images/fulling_mill/1985</td>\n",
147
+ " <td>durma.1985.52.37</td>\n",
148
+ " <td>data/raw/images/fulling_mill/1985/1985.52.37.s...</td>\n",
149
+ " <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
150
+ " </tr>\n",
151
+ " <tr>\n",
152
+ " <th>...</th>\n",
153
+ " <td>...</td>\n",
154
+ " <td>...</td>\n",
155
+ " <td>...</td>\n",
156
+ " <td>...</td>\n",
157
+ " <td>...</td>\n",
158
+ " </tr>\n",
159
+ " <tr>\n",
160
+ " <th>39239</th>\n",
161
+ " <td>2014.1.2 bb.jpg</td>\n",
162
+ " <td>data/raw/images/egyptian/2014</td>\n",
163
+ " <td>durom.2014.1.2</td>\n",
164
+ " <td>data/raw/images/egyptian/2014/2014.1.2 bb.jpg</td>\n",
165
+ " <td>data/processed/OM_images_white/egyptian/2014</td>\n",
166
+ " </tr>\n",
167
+ " <tr>\n",
168
+ " <th>39240</th>\n",
169
+ " <td>2014.1.71 ll.jpg</td>\n",
170
+ " <td>data/raw/images/egyptian/2014</td>\n",
171
+ " <td>durom.2014.1.71</td>\n",
172
+ " <td>data/raw/images/egyptian/2014/2014.1.71 ll.jpg</td>\n",
173
+ " <td>data/processed/OM_images_white/egyptian/2014</td>\n",
174
+ " </tr>\n",
175
+ " <tr>\n",
176
+ " <th>39241</th>\n",
177
+ " <td>2014.1.2 rr.jpg</td>\n",
178
+ " <td>data/raw/images/egyptian/2014</td>\n",
179
+ " <td>durom.2014.1.2</td>\n",
180
+ " <td>data/raw/images/egyptian/2014/2014.1.2 rr.jpg</td>\n",
181
+ " <td>data/processed/OM_images_white/egyptian/2014</td>\n",
182
+ " </tr>\n",
183
+ " <tr>\n",
184
+ " <th>39242</th>\n",
185
+ " <td>1963.4.jpg</td>\n",
186
+ " <td>data/raw/images/egyptian/1963</td>\n",
187
+ " <td>durom.1963.4</td>\n",
188
+ " <td>data/raw/images/egyptian/1963/1963.4.jpg</td>\n",
189
+ " <td>data/processed/OM_images_white/egyptian/1963</td>\n",
190
+ " </tr>\n",
191
+ " <tr>\n",
192
+ " <th>39243</th>\n",
193
+ " <td>1963.4.2.jpg</td>\n",
194
+ " <td>data/raw/images/egyptian/1963</td>\n",
195
+ " <td>durom.1963.4</td>\n",
196
+ " <td>data/raw/images/egyptian/1963/1963.4.2.jpg</td>\n",
197
+ " <td>data/processed/OM_images_white/egyptian/1963</td>\n",
198
+ " </tr>\n",
199
+ " </tbody>\n",
200
+ "</table>\n",
201
+ "<p>39244 rows × 5 columns</p>\n",
202
+ "</div>"
203
+ ],
204
+ "text/plain": [
205
+ " file root \\\n",
206
+ "0 1985.15.68.jpg data/raw/images/fulling_mill/1985 \n",
207
+ "1 1985.52.37.ff2.jpg data/raw/images/fulling_mill/1985 \n",
208
+ "2 1985.81.4496 d2.jpg data/raw/images/fulling_mill/1985 \n",
209
+ "3 1985.9.1.1-d4.jpg data/raw/images/fulling_mill/1985 \n",
210
+ "4 1985.52.37.sf2.jpg data/raw/images/fulling_mill/1985 \n",
211
+ "... ... ... \n",
212
+ "39239 2014.1.2 bb.jpg data/raw/images/egyptian/2014 \n",
213
+ "39240 2014.1.71 ll.jpg data/raw/images/egyptian/2014 \n",
214
+ "39241 2014.1.2 rr.jpg data/raw/images/egyptian/2014 \n",
215
+ "39242 1963.4.jpg data/raw/images/egyptian/1963 \n",
216
+ "39243 1963.4.2.jpg data/raw/images/egyptian/1963 \n",
217
+ "\n",
218
+ " obj_num full_path \\\n",
219
+ "0 durma.1985.15.68 data/raw/images/fulling_mill/1985/1985.15.68.jpg \n",
220
+ "1 durma.1985.52.37 data/raw/images/fulling_mill/1985/1985.52.37.f... \n",
221
+ "2 durma.1985.81.4496 data/raw/images/fulling_mill/1985/1985.81.4496... \n",
222
+ "3 durma.1985.9.1 data/raw/images/fulling_mill/1985/1985.9.1.1-d... \n",
223
+ "4 durma.1985.52.37 data/raw/images/fulling_mill/1985/1985.52.37.s... \n",
224
+ "... ... ... \n",
225
+ "39239 durom.2014.1.2 data/raw/images/egyptian/2014/2014.1.2 bb.jpg \n",
226
+ "39240 durom.2014.1.71 data/raw/images/egyptian/2014/2014.1.71 ll.jpg \n",
227
+ "39241 durom.2014.1.2 data/raw/images/egyptian/2014/2014.1.2 rr.jpg \n",
228
+ "39242 durom.1963.4 data/raw/images/egyptian/1963/1963.4.jpg \n",
229
+ "39243 durom.1963.4 data/raw/images/egyptian/1963/1963.4.2.jpg \n",
230
+ "\n",
231
+ " new_root \n",
232
+ "0 data/processed/OM_images_white/fulling_mill/1985 \n",
233
+ "1 data/processed/OM_images_white/fulling_mill/1985 \n",
234
+ "2 data/processed/OM_images_white/fulling_mill/1985 \n",
235
+ "3 data/processed/OM_images_white/fulling_mill/1985 \n",
236
+ "4 data/processed/OM_images_white/fulling_mill/1985 \n",
237
+ "... ... \n",
238
+ "39239 data/processed/OM_images_white/egyptian/2014 \n",
239
+ "39240 data/processed/OM_images_white/egyptian/2014 \n",
240
+ "39241 data/processed/OM_images_white/egyptian/2014 \n",
241
+ "39242 data/processed/OM_images_white/egyptian/1963 \n",
242
+ "39243 data/processed/OM_images_white/egyptian/1963 \n",
243
+ "\n",
244
+ "[39244 rows x 5 columns]"
245
+ ]
246
+ },
247
+ "execution_count": 23,
248
+ "metadata": {},
249
+ "output_type": "execute_result"
250
+ }
251
+ ],
252
+ "source": [
253
+ "img_df"
254
+ ]
255
+ },
256
+ {
257
+ "cell_type": "code",
258
+ "execution_count": 26,
259
+ "metadata": {},
260
+ "outputs": [
261
+ {
262
+ "name": "stderr",
263
+ "output_type": "stream",
264
+ "text": [
265
+ " 0%| | 84/39244 [00:06<52:59, 12.32it/s] \n"
266
+ ]
267
+ },
268
+ {
269
+ "ename": "KeyboardInterrupt",
270
+ "evalue": "",
271
+ "output_type": "error",
272
+ "traceback": [
273
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
274
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
275
+ "Cell \u001b[0;32mIn[26], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m index, row \u001b[38;5;129;01min\u001b[39;00m tqdm(img_df\u001b[38;5;241m.\u001b[39miterrows(), total\u001b[38;5;241m=\u001b[39mimg_df\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m]):\n\u001b[1;32m 2\u001b[0m img \u001b[38;5;241m=\u001b[39m Image\u001b[38;5;241m.\u001b[39mopen(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m../\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m+\u001b[39mrow[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfull_path\u001b[39m\u001b[38;5;124m'\u001b[39m])\u001b[38;5;241m.\u001b[39mconvert(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mRGB\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;66;03m# read image\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mremover\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mtype\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mwhite\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# change backround with white color\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# make sure the directory exists\u001b[39;00m\n\u001b[1;32m 5\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m../\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m+\u001b[39mrow[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnew_root\u001b[39m\u001b[38;5;124m'\u001b[39m], exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
276
+ "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transparent_background/Remover.py:154\u001b[0m, in \u001b[0;36mRemover.process\u001b[0;34m(self, img, type, threshold)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 138\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[1;32m 139\u001b[0m \u001b[38;5;124;03m img (PIL.Image): input image as PIL.Image type\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 151\u001b[0m \n\u001b[1;32m 152\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 153\u001b[0m shape \u001b[38;5;241m=\u001b[39m img\u001b[38;5;241m.\u001b[39msize[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m--> 154\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtransform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 155\u001b[0m x \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39munsqueeze(\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 156\u001b[0m x \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n",
277
+ "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/torchvision/transforms/transforms.py:95\u001b[0m, in \u001b[0;36mCompose.__call__\u001b[0;34m(self, img)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, img):\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m t \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransforms:\n\u001b[0;32m---> 95\u001b[0m img \u001b[38;5;241m=\u001b[39m \u001b[43mt\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m img\n",
278
+ "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transparent_background/utils.py:105\u001b[0m, in \u001b[0;36mnormalize.__call__\u001b[0;34m(self, img)\u001b[0m\n\u001b[1;32m 103\u001b[0m img \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdiv\n\u001b[1;32m 104\u001b[0m img \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmean\n\u001b[0;32m--> 105\u001b[0m img \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstd\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m img\n",
279
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
280
+ ]
281
+ }
282
+ ],
283
+ "source": [
284
+ "for index, row in tqdm(img_df.iterrows(), total=img_df.shape[0]):\n",
285
+ " img = Image.open('../' + row['full_path']).convert('RGB') # read image\n",
286
+ " out = remover.process(img, type='white') # change backround with white color\n",
287
+ " # make sure the directory exists\n",
288
+ " os.makedirs('../' + row['new_root'], exist_ok=True)\n",
289
+ " out.save('../' + row['new_full_path']) # save result"
290
+ ]
291
+ },
292
+ {
293
+ "cell_type": "code",
294
+ "execution_count": null,
295
+ "metadata": {},
296
+ "outputs": [],
297
+ "source": []
298
+ }
299
+ ],
300
+ "metadata": {
301
+ "kernelspec": {
302
+ "display_name": "ArtifactClassification",
303
+ "language": "python",
304
+ "name": "python3"
305
+ },
306
+ "language_info": {
307
+ "codemirror_mode": {
308
+ "name": "ipython",
309
+ "version": 3
310
+ },
311
+ "file_extension": ".py",
312
+ "mimetype": "text/x-python",
313
+ "name": "python",
314
+ "nbconvert_exporter": "python",
315
+ "pygments_lexer": "ipython3",
316
+ "version": "3.10.12"
317
+ }
318
+ },
319
+ "nbformat": 4,
320
+ "nbformat_minor": 2
321
+ }
0.7Mahnaz-efficientnet.ipynb ADDED
@@ -0,0 +1,492 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Finetuning efficientNet"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "markdown",
12
+ "metadata": {},
13
+ "source": [
14
+ "Lets try the model that is trending for image classification on HuggingFace: efficientnet_b2.ra_in1k"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 1,
20
+ "metadata": {},
21
+ "outputs": [
22
+ {
23
+ "data": {
24
+ "application/vnd.jupyter.widget-view+json": {
25
+ "model_id": "0a87f155dc5d480c8b68caf0c69f69cd",
26
+ "version_major": 2,
27
+ "version_minor": 0
28
+ },
29
+ "text/plain": [
30
+ "Downloading readme: 0%| | 0.00/5.16k [00:00<?, ?B/s]"
31
+ ]
32
+ },
33
+ "metadata": {},
34
+ "output_type": "display_data"
35
+ },
36
+ {
37
+ "data": {
38
+ "application/vnd.jupyter.widget-view+json": {
39
+ "model_id": "8b705267629c4028bc48465ab583337b",
40
+ "version_major": 2,
41
+ "version_minor": 0
42
+ },
43
+ "text/plain": [
44
+ "Downloading data: 0%| | 0.00/120M [00:00<?, ?B/s]"
45
+ ]
46
+ },
47
+ "metadata": {},
48
+ "output_type": "display_data"
49
+ },
50
+ {
51
+ "data": {
52
+ "application/vnd.jupyter.widget-view+json": {
53
+ "model_id": "c825ef39efb24233acabac80abb960fa",
54
+ "version_major": 2,
55
+ "version_minor": 0
56
+ },
57
+ "text/plain": [
58
+ "Downloading data: 0%| | 0.00/23.9M [00:00<?, ?B/s]"
59
+ ]
60
+ },
61
+ "metadata": {},
62
+ "output_type": "display_data"
63
+ },
64
+ {
65
+ "data": {
66
+ "application/vnd.jupyter.widget-view+json": {
67
+ "model_id": "d41e4e130864414dba3e419eada3941d",
68
+ "version_major": 2,
69
+ "version_minor": 0
70
+ },
71
+ "text/plain": [
72
+ "Generating train split: 0%| | 0/50000 [00:00<?, ? examples/s]"
73
+ ]
74
+ },
75
+ "metadata": {},
76
+ "output_type": "display_data"
77
+ },
78
+ {
79
+ "data": {
80
+ "application/vnd.jupyter.widget-view+json": {
81
+ "model_id": "7e5cfe4b7b7244beb6a2b19d24fa9c63",
82
+ "version_major": 2,
83
+ "version_minor": 0
84
+ },
85
+ "text/plain": [
86
+ "Generating test split: 0%| | 0/10000 [00:00<?, ? examples/s]"
87
+ ]
88
+ },
89
+ "metadata": {},
90
+ "output_type": "display_data"
91
+ }
92
+ ],
93
+ "source": [
94
+ "from datasets import load_dataset\n",
95
+ "\n",
96
+ "cifar10dataset = load_dataset(\"cifar10\", split=\"train\")"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": 2,
102
+ "metadata": {},
103
+ "outputs": [],
104
+ "source": [
105
+ "cifar10dataset = cifar10dataset.train_test_split(test_size=0.2)"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": 3,
111
+ "metadata": {},
112
+ "outputs": [],
113
+ "source": [
114
+ "labels = cifar10dataset[\"train\"].features[\"label\"].names\n",
115
+ "label2id, id2label = dict(), dict()\n",
116
+ "for i, label in enumerate(labels):\n",
117
+ " label2id[label] = str(i)\n",
118
+ " id2label[str(i)] = label"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": 4,
124
+ "metadata": {},
125
+ "outputs": [],
126
+ "source": [
127
+ "from transformers import AutoImageProcessor\n",
128
+ "\n",
129
+ "# import timm\n",
130
+ "# model = timm.create_model(\"hf_hub:timm/efficientnet_b2.ra_in1k\", pretrained=True)\n",
131
+ "\n",
132
+ "checkpoint = \"google/efficientnet-b3\"\n",
133
+ "image_processor = AutoImageProcessor.from_pretrained(checkpoint)"
134
+ ]
135
+ },
136
+ {
137
+ "cell_type": "code",
138
+ "execution_count": 5,
139
+ "metadata": {},
140
+ "outputs": [],
141
+ "source": [
142
+ "from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor\n",
143
+ "\n",
144
+ "normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)\n",
145
+ "size = (\n",
146
+ " image_processor.size[\"shortest_edge\"]\n",
147
+ " if \"shortest_edge\" in image_processor.size\n",
148
+ " else (image_processor.size[\"height\"], image_processor.size[\"width\"])\n",
149
+ ")\n",
150
+ "_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])"
151
+ ]
152
+ },
153
+ {
154
+ "cell_type": "code",
155
+ "execution_count": 6,
156
+ "metadata": {},
157
+ "outputs": [],
158
+ "source": [
159
+ "def transforms(examples):\n",
160
+ " examples[\"pixel_values\"] = [_transforms(img.convert(\"RGB\")) for img in examples[\"img\"]]\n",
161
+ " del examples[\"img\"]\n",
162
+ " return examples"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": 14,
168
+ "metadata": {},
169
+ "outputs": [],
170
+ "source": [
171
+ "import pandas as pd\n",
172
+ "import os\n",
173
+ "\n",
174
+ "file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n",
175
+ "file2obj[\"image\"] = file2obj.apply(lambda x: os.path.join(\"..\", x[\"root\"], x[\"file\"]), axis=1)\n",
176
+ "\n",
177
+ "# Group by 'obj_num' and count occurrences\n",
178
+ "obj_num_counts = file2obj[\"obj_num\"].value_counts()\n",
179
+ "\n",
180
+ "# Filter rows where 'obj_num' appears more than twice\n",
181
+ "file2obj_3 = file2obj[file2obj[\"obj_num\"].isin(obj_num_counts[obj_num_counts > 2].index)]"
182
+ ]
183
+ },
184
+ {
185
+ "cell_type": "code",
186
+ "execution_count": 15,
187
+ "metadata": {},
188
+ "outputs": [
189
+ {
190
+ "data": {
191
+ "application/vnd.jupyter.widget-view+json": {
192
+ "model_id": "352630377c4f42adad3b161fd95e7545",
193
+ "version_major": 2,
194
+ "version_minor": 0
195
+ },
196
+ "text/plain": [
197
+ "Casting to class labels: 0%| | 0/25725 [00:00<?, ? examples/s]"
198
+ ]
199
+ },
200
+ "metadata": {},
201
+ "output_type": "display_data"
202
+ }
203
+ ],
204
+ "source": [
205
+ "from datasets import Dataset, Image, DatasetDict\n",
206
+ "\n",
207
+ "\n",
208
+ "ds = Dataset.from_pandas(file2obj_3[[\"image\", \"obj_num\"]], preserve_index=False).cast_column(\n",
209
+ " \"image\", Image()\n",
210
+ ")\n",
211
+ "ds = ds.class_encode_column(\"obj_num\")\n",
212
+ "trainval_test = ds.train_test_split(stratify_by_column=\"obj_num\", test_size=0.16)\n",
213
+ "train_val = trainval_test[\"train\"].train_test_split(\n",
214
+ " stratify_by_column=\"obj_num\", test_size=16 / 84\n",
215
+ ")\n",
216
+ "ds = DatasetDict(\n",
217
+ " {\"train\": train_val[\"train\"], \"valid\": train_val[\"test\"], \"test\": trainval_test[\"test\"]}\n",
218
+ ")"
219
+ ]
220
+ },
221
+ {
222
+ "cell_type": "code",
223
+ "execution_count": 17,
224
+ "metadata": {},
225
+ "outputs": [],
226
+ "source": [
227
+ "cifar10dataset = cifar10dataset.with_transform(transforms)\n",
228
+ "# cifar10dataset = ds.map(transforms)"
229
+ ]
230
+ },
231
+ {
232
+ "cell_type": "code",
233
+ "execution_count": null,
234
+ "metadata": {},
235
+ "outputs": [],
236
+ "source": [
237
+ "from transformers import DefaultDataCollator\n",
238
+ "\n",
239
+ "data_collator = DefaultDataCollator()"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": null,
245
+ "metadata": {},
246
+ "outputs": [],
247
+ "source": [
248
+ "import evaluate\n",
249
+ "\n",
250
+ "accuracy = evaluate.load(\"accuracy\")"
251
+ ]
252
+ },
253
+ {
254
+ "cell_type": "code",
255
+ "execution_count": null,
256
+ "metadata": {},
257
+ "outputs": [],
258
+ "source": [
259
+ "import numpy as np\n",
260
+ "\n",
261
+ "\n",
262
+ "def compute_metrics(eval_pred):\n",
263
+ " predictions, labels = eval_pred\n",
264
+ " predictions = np.argmax(predictions, axis=1)\n",
265
+ " return accuracy.compute(predictions=predictions, references=labels)"
266
+ ]
267
+ },
268
+ {
269
+ "cell_type": "code",
270
+ "execution_count": null,
271
+ "metadata": {},
272
+ "outputs": [
273
+ {
274
+ "name": "stderr",
275
+ "output_type": "stream",
276
+ "text": [
277
+ "Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:\n",
278
+ "- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([10, 1536]) in the model instantiated\n",
279
+ "- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([10]) in the model instantiated\n",
280
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
281
+ ]
282
+ }
283
+ ],
284
+ "source": [
285
+ "from transformers import AutoModelForImageClassification, TrainingArguments, Trainer\n",
286
+ "\n",
287
+ "model = AutoModelForImageClassification.from_pretrained(\n",
288
+ " checkpoint,\n",
289
+ " num_labels=len(labels),\n",
290
+ " ignore_mismatched_sizes=True,\n",
291
+ " id2label=id2label,\n",
292
+ " label2id=label2id,\n",
293
+ ")"
294
+ ]
295
+ },
296
+ {
297
+ "cell_type": "code",
298
+ "execution_count": null,
299
+ "metadata": {},
300
+ "outputs": [],
301
+ "source": [
302
+ "import torch\n",
303
+ "\n",
304
+ "torch.cuda.empty_cache()"
305
+ ]
306
+ },
307
+ {
308
+ "cell_type": "code",
309
+ "execution_count": null,
310
+ "metadata": {},
311
+ "outputs": [
312
+ {
313
+ "name": "stderr",
314
+ "output_type": "stream",
315
+ "text": [
316
+ "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
317
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mjameswburton18\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
318
+ ]
319
+ },
320
+ {
321
+ "data": {
322
+ "text/html": [
323
+ "Tracking run with wandb version 0.16.3"
324
+ ],
325
+ "text/plain": [
326
+ "<IPython.core.display.HTML object>"
327
+ ]
328
+ },
329
+ "metadata": {},
330
+ "output_type": "display_data"
331
+ },
332
+ {
333
+ "data": {
334
+ "text/html": [
335
+ "Run data is saved locally in <code>/home/james/CodingProjects/ArcPostDoc/ArtifactClassification/notebooks/wandb/run-20240214_115817-fyblqcba</code>"
336
+ ],
337
+ "text/plain": [
338
+ "<IPython.core.display.HTML object>"
339
+ ]
340
+ },
341
+ "metadata": {},
342
+ "output_type": "display_data"
343
+ },
344
+ {
345
+ "data": {
346
+ "text/html": [
347
+ "Syncing run <strong><a href='https://wandb.ai/jameswburton18/huggingface/runs/fyblqcba' target=\"_blank\">passionate-lovebird-214</a></strong> to <a href='https://wandb.ai/jameswburton18/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
348
+ ],
349
+ "text/plain": [
350
+ "<IPython.core.display.HTML object>"
351
+ ]
352
+ },
353
+ "metadata": {},
354
+ "output_type": "display_data"
355
+ },
356
+ {
357
+ "data": {
358
+ "text/html": [
359
+ " View project at <a href='https://wandb.ai/jameswburton18/huggingface' target=\"_blank\">https://wandb.ai/jameswburton18/huggingface</a>"
360
+ ],
361
+ "text/plain": [
362
+ "<IPython.core.display.HTML object>"
363
+ ]
364
+ },
365
+ "metadata": {},
366
+ "output_type": "display_data"
367
+ },
368
+ {
369
+ "data": {
370
+ "text/html": [
371
+ " View run at <a href='https://wandb.ai/jameswburton18/huggingface/runs/fyblqcba' target=\"_blank\">https://wandb.ai/jameswburton18/huggingface/runs/fyblqcba</a>"
372
+ ],
373
+ "text/plain": [
374
+ "<IPython.core.display.HTML object>"
375
+ ]
376
+ },
377
+ "metadata": {},
378
+ "output_type": "display_data"
379
+ },
380
+ {
381
+ "data": {
382
+ "application/vnd.jupyter.widget-view+json": {
383
+ "model_id": "a8012d9b2c7b47c5aa2533983016d3a2",
384
+ "version_major": 2,
385
+ "version_minor": 0
386
+ },
387
+ "text/plain": [
388
+ " 0%| | 0/3750 [00:00<?, ?it/s]"
389
+ ]
390
+ },
391
+ "metadata": {},
392
+ "output_type": "display_data"
393
+ },
394
+ {
395
+ "name": "stdout",
396
+ "output_type": "stream",
397
+ "text": [
398
+ "{'loss': 2.3286, 'learning_rate': 1.3333333333333334e-06, 'epoch': 0.01}\n"
399
+ ]
400
+ },
401
+ {
402
+ "ename": "KeyboardInterrupt",
403
+ "evalue": "",
404
+ "output_type": "error",
405
+ "traceback": [
406
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
407
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
408
+ "Cell \u001b[0;32mIn[13], line 28\u001b[0m\n\u001b[1;32m 1\u001b[0m training_args \u001b[38;5;241m=\u001b[39m TrainingArguments(\n\u001b[1;32m 2\u001b[0m output_dir\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcifar10_efficientnet\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 3\u001b[0m remove_unused_columns\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 15\u001b[0m push_to_hub\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 16\u001b[0m )\n\u001b[1;32m 18\u001b[0m trainer \u001b[38;5;241m=\u001b[39m Trainer(\n\u001b[1;32m 19\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 20\u001b[0m args\u001b[38;5;241m=\u001b[39mtraining_args,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 25\u001b[0m compute_metrics\u001b[38;5;241m=\u001b[39mcompute_metrics,\n\u001b[1;32m 26\u001b[0m )\n\u001b[0;32m---> 28\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
409
+ "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/trainer.py:1539\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1537\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m 1538\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1539\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1540\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1541\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1542\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1543\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1544\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
410
+ "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/trainer.py:1881\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1879\u001b[0m tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss_step\n\u001b[0;32m-> 1881\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcurrent_flos \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mfloat\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloating_point_ops\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 1883\u001b[0m is_last_step_and_steps_less_than_grad_acc \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1884\u001b[0m steps_in_epoch \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m args\u001b[38;5;241m.\u001b[39mgradient_accumulation_steps \u001b[38;5;129;01mand\u001b[39;00m (step \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m==\u001b[39m steps_in_epoch\n\u001b[1;32m 1885\u001b[0m )\n\u001b[1;32m 1887\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 1888\u001b[0m total_batched_samples \u001b[38;5;241m%\u001b[39m args\u001b[38;5;241m.\u001b[39mgradient_accumulation_steps \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 1889\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1893\u001b[0m \u001b[38;5;66;03m# the `or` condition of `is_last_step_and_steps_less_than_grad_acc` is not covered\u001b[39;00m\n\u001b[1;32m 1894\u001b[0m \u001b[38;5;66;03m# in accelerate. So, explicitly enable sync gradients to True in that case.\u001b[39;00m\n",
411
+ "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/trainer.py:3543\u001b[0m, in \u001b[0;36mTrainer.floating_point_ops\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 3530\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 3531\u001b[0m \u001b[38;5;124;03mFor models that inherit from [`PreTrainedModel`], uses that method to compute the number of floating point\u001b[39;00m\n\u001b[1;32m 3532\u001b[0m \u001b[38;5;124;03moperations for every backward + forward pass. If using another model, either implement such a method in the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 3540\u001b[0m \u001b[38;5;124;03m `int`: The number of floating-point operations.\u001b[39;00m\n\u001b[1;32m 3541\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 3542\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfloating_point_ops\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 3543\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloating_point_ops\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3544\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 3545\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n",
412
+ "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/modeling_utils.py:1154\u001b[0m, in \u001b[0;36mModuleUtilsMixin.floating_point_ops\u001b[0;34m(self, input_dict, exclude_embeddings)\u001b[0m\n\u001b[1;32m 1130\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfloating_point_ops\u001b[39m(\n\u001b[1;32m 1131\u001b[0m \u001b[38;5;28mself\u001b[39m, input_dict: Dict[\u001b[38;5;28mstr\u001b[39m, Union[torch\u001b[38;5;241m.\u001b[39mTensor, Any]], exclude_embeddings: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 1132\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mint\u001b[39m:\n\u001b[1;32m 1133\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1134\u001b[0m \u001b[38;5;124;03m Get number of (optionally, non-embeddings) floating-point operations for the forward and backward passes of a\u001b[39;00m\n\u001b[1;32m 1135\u001b[0m \u001b[38;5;124;03m batch with this transformer model. Default approximation neglects the quadratic dependency on the number of\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1151\u001b[0m \u001b[38;5;124;03m `int`: The number of floating-point operations.\u001b[39;00m\n\u001b[1;32m 1152\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1154\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m6\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestimate_tokens(input_dict) \u001b[38;5;241m*\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_parameters\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexclude_embeddings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude_embeddings\u001b[49m\u001b[43m)\u001b[49m\n",
413
+ "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/modeling_utils.py:1078\u001b[0m, in \u001b[0;36mModuleUtilsMixin.num_parameters\u001b[0;34m(self, only_trainable, exclude_embeddings)\u001b[0m\n\u001b[1;32m 1063\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1064\u001b[0m \u001b[38;5;124;03mGet number of (optionally, trainable or non-embeddings) parameters in the module.\u001b[39;00m\n\u001b[1;32m 1065\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1074\u001b[0m \u001b[38;5;124;03m `int`: The number of parameters.\u001b[39;00m\n\u001b[1;32m 1075\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1077\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exclude_embeddings:\n\u001b[0;32m-> 1078\u001b[0m embedding_param_names \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 1079\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.weight\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name, module_type \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_modules() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(module_type, nn\u001b[38;5;241m.\u001b[39mEmbedding)\n\u001b[1;32m 1080\u001b[0m ]\n\u001b[1;32m 1081\u001b[0m total_parameters \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 1082\u001b[0m parameter \u001b[38;5;28;01mfor\u001b[39;00m name, parameter \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_parameters() \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m embedding_param_names\n\u001b[1;32m 1083\u001b[0m ]\n\u001b[1;32m 1084\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
414
+ "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/modeling_utils.py:1079\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 1063\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1064\u001b[0m \u001b[38;5;124;03mGet number of (optionally, trainable or non-embeddings) parameters in the module.\u001b[39;00m\n\u001b[1;32m 1065\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1074\u001b[0m \u001b[38;5;124;03m `int`: The number of parameters.\u001b[39;00m\n\u001b[1;32m 1075\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1077\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exclude_embeddings:\n\u001b[1;32m 1078\u001b[0m embedding_param_names \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m-> 1079\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.weight\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name, module_type \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_modules() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmodule_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mEmbedding\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1080\u001b[0m ]\n\u001b[1;32m 1081\u001b[0m total_parameters \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 1082\u001b[0m parameter \u001b[38;5;28;01mfor\u001b[39;00m name, parameter \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_parameters() \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m embedding_param_names\n\u001b[1;32m 1083\u001b[0m ]\n\u001b[1;32m 1084\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
415
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
416
+ ]
417
+ }
418
+ ],
419
+ "source": [
420
+ "training_args = TrainingArguments(\n",
421
+ " output_dir=\"cifar10_efficientnet\",\n",
422
+ " remove_unused_columns=False,\n",
423
+ " evaluation_strategy=\"epoch\",\n",
424
+ " save_strategy=\"epoch\",\n",
425
+ " learning_rate=5e-5,\n",
426
+ " per_device_train_batch_size=8, # memory error with 16\n",
427
+ " gradient_accumulation_steps=4,\n",
428
+ " per_device_eval_batch_size=8,\n",
429
+ " num_train_epochs=3,\n",
430
+ " warmup_ratio=0.1,\n",
431
+ " logging_steps=10,\n",
432
+ " load_best_model_at_end=True,\n",
433
+ " metric_for_best_model=\"accuracy\",\n",
434
+ " push_to_hub=False,\n",
435
+ ")\n",
436
+ "\n",
437
+ "trainer = Trainer(\n",
438
+ " model=model,\n",
439
+ " args=training_args,\n",
440
+ " data_collator=data_collator,\n",
441
+ " train_dataset=cifar10dataset[\"train\"],\n",
442
+ " eval_dataset=cifar10dataset[\"test\"],\n",
443
+ " tokenizer=image_processor,\n",
444
+ " compute_metrics=compute_metrics,\n",
445
+ ")\n",
446
+ "\n",
447
+ "trainer.train()"
448
+ ]
449
+ },
450
+ {
451
+ "cell_type": "markdown",
452
+ "metadata": {},
453
+ "source": [
454
+ "### Evaluation"
455
+ ]
456
+ },
457
+ {
458
+ "cell_type": "code",
459
+ "execution_count": null,
460
+ "metadata": {},
461
+ "outputs": [],
462
+ "source": [
463
+ "results = trainer.evaluate()\n",
464
+ "print(results)\n",
465
+ "\n",
466
+ "test_results = trainer.predict("
467
+ ]
468
+ }
469
+ ],
470
+ "metadata": {
471
+ "kernelspec": {
472
+ "display_name": "venv_bloom-classifier",
473
+ "language": "python",
474
+ "name": "python3"
475
+ },
476
+ "language_info": {
477
+ "codemirror_mode": {
478
+ "name": "ipython",
479
+ "version": 3
480
+ },
481
+ "file_extension": ".py",
482
+ "mimetype": "text/x-python",
483
+ "name": "python",
484
+ "nbconvert_exporter": "python",
485
+ "pygments_lexer": "ipython3",
486
+ "version": "3.10.12"
487
+ },
488
+ "orig_nbformat": 4
489
+ },
490
+ "nbformat": 4,
491
+ "nbformat_minor": 2
492
+ }
0.8-testing_segmented_data.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
0.9-testing_om_datasets.ipynb ADDED
@@ -0,0 +1,459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import logging\n",
10
+ "import os\n",
11
+ "from pathlib import Path\n",
12
+ "import click\n",
13
+ "from dotenv import find_dotenv, load_dotenv\n",
14
+ "\n",
15
+ "from datasets import load_dataset, ClassLabel\n",
16
+ "import numpy as np\n",
17
+ "import wandb\n",
18
+ "import yaml\n",
19
+ "from transformers.trainer_callback import EarlyStoppingCallback\n",
20
+ "from artifact_classification.utils import ConfigLoader\n",
21
+ "from torchvision.transforms import (\n",
22
+ " Compose,\n",
23
+ " Normalize,\n",
24
+ " ToTensor,\n",
25
+ " CenterCrop,\n",
26
+ " Resize,\n",
27
+ ")\n",
28
+ "from transformers import (\n",
29
+ " AutoImageProcessor,\n",
30
+ " AutoModelForImageClassification,\n",
31
+ " TrainingArguments,\n",
32
+ " Trainer,\n",
33
+ " DefaultDataCollator,\n",
34
+ " AutoModelForSequenceClassification,\n",
35
+ " DataCollatorWithPadding,\n",
36
+ " AutoTokenizer,\n",
37
+ ")\n",
38
+ "from sklearn.metrics import top_k_accuracy_score\n",
39
+ "import evaluate"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 2,
45
+ "metadata": {},
46
+ "outputs": [
47
+ {
48
+ "name": "stdout",
49
+ "output_type": "stream",
50
+ "text": [
51
+ "Updating with:\n",
52
+ "{'config': 'om3txt_name', 'dataset': 'james-burton/OrientalMuseum_min3-name-text', 'wandb_proj_name': 'OrientalMuesumText', 'model_base': 'microsoft/deberta-v3-base', 'problem_type': 'text'}\n",
53
+ "\n",
54
+ "\n",
55
+ "{'config': 'om3txt_name', 'fast_dev_run': False, 'do_train': True, 'do_predict': True, 'batch_size': 16, 'model_base': 'microsoft/deberta-v3-base', 'output_root': 'models/', 'num_epochs': 100, 'early_stopping_patience': 5, 'grad_accumulation_steps': 1, 'seed': 42, 'logging_steps': 10, 'lr_scheduler': 'linear', 'warmup_ratio': 0, 'weight_decay': 0, 'device': 'cuda', 'num_workers': 1, 'resume_from_checkpoint': False, 'predict_batch_size': 16, 'save_total_limit': 1, 'lr': 5e-05, 'pytorch2_0': False, 'max_length': 512, 'text_column': 'description', 'fp16': True, 'dataset': 'james-burton/OrientalMuseum_min3-name-text', 'wandb_proj_name': 'OrientalMuesumText', 'problem_type': 'text'}\n",
56
+ "\n"
57
+ ]
58
+ }
59
+ ],
60
+ "source": [
61
+ "config = \"om3txt_name\"\n",
62
+ "\n",
63
+ "# Training args\n",
64
+ "args = ConfigLoader(config, \"../configs/train_configs.yaml\", \"../configs/train_default.yaml\")\n",
65
+ "\n",
66
+ "# # Load dataset, filter out na inputs and labels and encode labels (as label column can change)\n",
67
+ "# dataset = load_dataset(args.dataset) # , download_mode=\"force_redownload\")\n",
68
+ "# dataset = dataset.filter(lambda example: example[args.label_column] is not None)\n",
69
+ "# if args.problem_type == \"text\":\n",
70
+ "# dataset = dataset.filter(lambda example: example[args.text_column] is not None)\n",
71
+ "# dataset = dataset.rename_column(args.label_column, \"label\")\n",
72
+ "# if not isinstance(dataset[\"train\"].features[\"label\"], ClassLabel):\n",
73
+ "# dataset = dataset.class_encode_column(\"label\")"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 2,
79
+ "metadata": {},
80
+ "outputs": [
81
+ {
82
+ "name": "stdout",
83
+ "output_type": "stream",
84
+ "text": [
85
+ "testing om3_num om3_material om3_name om3txt_material om3txt_name om3-white_num om3-white_material om3-white_name om3-3Dwhite_num om3-3Dwhite_material om3-3Dwhite_name om3-3Dwhite-1frame_num om3-3Dwhite-1frame_material om3-3Dwhite-1frame_name om4_num om4_material om4_name om4txt_material om4txt_name om4-white_num om4-white_material om4-white_name om4-3Dwhite_num om4-3Dwhite_material om4-3Dwhite_name om4-3Dwhite-1frame_num om4-3Dwhite-1frame_material om4-3Dwhite-1frame_name om5_num om5_material om5_name om5txt_material om5txt_name om5-white_num om5-white_material om5-white_name om5-3Dwhite_num om5-3Dwhite_material om5-3Dwhite_name om5-3Dwhite-1frame_num om5-3Dwhite-1frame_material om5-3Dwhite-1frame_name om6_num om6_material om6_name om6txt_material om6txt_name om6-white_num om6-white_material om6-white_name om6-3Dwhite_num om6-3Dwhite_material om6-3Dwhite_name om6-3Dwhite-1frame_num om6-3Dwhite-1frame_material om6-3Dwhite-1frame_name om3-3DwhiteTVT_num om3-3DwhiteTVT_material om3-3DwhiteTVT_name\n"
86
+ ]
87
+ }
88
+ ],
89
+ "source": [
90
+ "import yaml\n",
91
+ "\n",
92
+ "with open(\"../configs/train_configs.yaml\", \"r\") as file:\n",
93
+ " configs = list(yaml.safe_load_all(file))\n",
94
+ "\n",
95
+ "config_names = \" \".join([cfg[\"config\"] for cfg in configs])\n",
96
+ "print(config_names)"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": 4,
102
+ "metadata": {},
103
+ "outputs": [
104
+ {
105
+ "data": {
106
+ "text/plain": [
107
+ "'testing om3_material om3_name om3-white_material om3-white_name om3-3Dwhite_material om3-3Dwhite_name om3-3Dwhite-1frame_material om3-3Dwhite-1frame_name om4_material om4_name om4-white_material om4-white_name om4-3Dwhite_material om4-3Dwhite_name om4-3Dwhite-1frame_material om4-3Dwhite-1frame_name om5_material om5_name om5-white_material om5-white_name om5-3Dwhite_material om5-3Dwhite_name om5-3Dwhite-1frame_material om5-3Dwhite-1frame_name om6_material om6_name om6-white_material om6-white_name om6-3Dwhite_material om6-3Dwhite_name om6-3Dwhite-1frame_material om6-3Dwhite-1frame_name om3-3DwhiteTVT_material om3-3DwhiteTVT_name'"
108
+ ]
109
+ },
110
+ "execution_count": 4,
111
+ "metadata": {},
112
+ "output_type": "execute_result"
113
+ }
114
+ ],
115
+ "source": [
116
+ "\" \".join(\n",
117
+ " [cfg[\"config\"] for cfg in configs if not (\"txt\" in cfg[\"config\"] or \"num\" in cfg[\"config\"])]\n",
118
+ ")\n",
119
+ "# \" \".join([cfg[\"config\"] for cfg in configs if \"1frame\" in cfg[\"config\"]])"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": 37,
125
+ "metadata": {},
126
+ "outputs": [],
127
+ "source": [
128
+ "l2i = {\n",
129
+ " \"Album Painting\": 0,\n",
130
+ " \"Animal Figurine\": 1,\n",
131
+ " \"Animal Mummy\": 2,\n",
132
+ " \"Animal bone\": 3,\n",
133
+ " \"Belt Hook\": 4,\n",
134
+ " \"Blouse\": 5,\n",
135
+ " \"Bolt\": 6,\n",
136
+ " \"Box\": 7,\n",
137
+ " \"Brush Pot\": 8,\n",
138
+ " \"Cap\": 9,\n",
139
+ " \"Case\": 10,\n",
140
+ " \"Clay pipe (smoking)\": 11,\n",
141
+ " \"Cosmetic and Medical Equipment and Implements\": 12,\n",
142
+ " \"Cup And Saucer\": 13,\n",
143
+ " \"DVDs\": 14,\n",
144
+ " \"Dagger\": 15,\n",
145
+ " \"Disc\": 16,\n",
146
+ " \"Domestic Equipment and Utensils\": 17,\n",
147
+ " \"Earring\": 18,\n",
148
+ " \"Finger Ring\": 19,\n",
149
+ " \"Funerary Cone\": 20,\n",
150
+ " \"Funerary goods\": 21,\n",
151
+ " \"Funerary money\": 22,\n",
152
+ " \"Hanging\": 23,\n",
153
+ " \"Heart Scarab\": 24,\n",
154
+ " \"Human Figurine\": 25,\n",
155
+ " \"Inkstick\": 26,\n",
156
+ " \"Kite\": 27,\n",
157
+ " \"Kohl Pot\": 28,\n",
158
+ " \"Letter\": 29,\n",
159
+ " \"Manuscript Page\": 30,\n",
160
+ " \"Mat\": 31,\n",
161
+ " \"Mica Painting\": 32,\n",
162
+ " \"Miniature Painting\": 33,\n",
163
+ " \"Mortar\": 34,\n",
164
+ " \"Mummy Label\": 35,\n",
165
+ " \"Oracle Bone\": 36,\n",
166
+ " \"Ostraka\": 37,\n",
167
+ " \"Palette\": 38,\n",
168
+ " \"Panel\": 39,\n",
169
+ " \"Part\": 40,\n",
170
+ " \"Pendant\": 41,\n",
171
+ " \"Pipe\": 42,\n",
172
+ " \"Pith Painting\": 43,\n",
173
+ " \"Plaque\": 44,\n",
174
+ " \"Plate\": 45,\n",
175
+ " \"Scarab Seal\": 46,\n",
176
+ " \"Scarf\": 47,\n",
177
+ " \"Screen\": 48,\n",
178
+ " \"Seal\": 49,\n",
179
+ " \"Slide\": 50,\n",
180
+ " \"Stand\": 51,\n",
181
+ " \"Thangka\": 52,\n",
182
+ " \"Water Dropper\": 53,\n",
183
+ " \"Water Pot\": 54,\n",
184
+ " \"Woodblock Print\": 55,\n",
185
+ " \"accessories\": 56,\n",
186
+ " \"albums\": 57,\n",
187
+ " \"amulets\": 58,\n",
188
+ " \"animation cels\": 59,\n",
189
+ " \"animation drawings\": 60,\n",
190
+ " \"armor\": 61,\n",
191
+ " \"arrowheads\": 62,\n",
192
+ " \"axes: woodworking tools\": 63,\n",
193
+ " \"badges\": 64,\n",
194
+ " \"bags\": 65,\n",
195
+ " \"bandages\": 66,\n",
196
+ " \"baskets\": 67,\n",
197
+ " \"beads\": 68,\n",
198
+ " \"bells\": 69,\n",
199
+ " \"belts\": 70,\n",
200
+ " \"blades\": 71,\n",
201
+ " \"books\": 72,\n",
202
+ " \"bottles\": 73,\n",
203
+ " \"bowls\": 74,\n",
204
+ " \"boxes\": 75,\n",
205
+ " \"bracelets\": 76,\n",
206
+ " \"brick\": 77,\n",
207
+ " \"brooches\": 78,\n",
208
+ " \"brush washers\": 79,\n",
209
+ " \"buckets\": 80,\n",
210
+ " \"buckles\": 81,\n",
211
+ " \"calligraphy\": 82,\n",
212
+ " \"canopic jars\": 83,\n",
213
+ " \"cards\": 84,\n",
214
+ " \"carvings\": 85,\n",
215
+ " \"chains\": 86,\n",
216
+ " \"chessmen\": 87,\n",
217
+ " \"chopsticks\": 88,\n",
218
+ " \"claypipe\": 89,\n",
219
+ " \"cloth\": 90,\n",
220
+ " \"clothing\": 91,\n",
221
+ " \"coats\": 92,\n",
222
+ " \"coins\": 93,\n",
223
+ " \"collar\": 94,\n",
224
+ " \"compact discs\": 95,\n",
225
+ " \"containers\": 96,\n",
226
+ " \"coverings\": 97,\n",
227
+ " \"covers\": 98,\n",
228
+ " \"cups\": 99,\n",
229
+ " \"deity figurine\": 100,\n",
230
+ " \"diagrams\": 101,\n",
231
+ " \"dishes\": 102,\n",
232
+ " \"dolls\": 103,\n",
233
+ " \"drawings\": 104,\n",
234
+ " \"dresses\": 105,\n",
235
+ " \"drums\": 106,\n",
236
+ " \"earrings\": 107,\n",
237
+ " \"embroidery\": 108,\n",
238
+ " \"ensembles\": 109,\n",
239
+ " \"envelopes\": 110,\n",
240
+ " \"equipment for personal use: grooming, hygiene and health care\": 111,\n",
241
+ " \"ewers\": 112,\n",
242
+ " \"fans\": 113,\n",
243
+ " \"figures\": 114,\n",
244
+ " \"figurines\": 115,\n",
245
+ " \"flags\": 116,\n",
246
+ " \"flasks\": 117,\n",
247
+ " \"furniture components\": 118,\n",
248
+ " \"gaming counters\": 119,\n",
249
+ " \"glassware\": 120,\n",
250
+ " \"hairpins\": 121,\n",
251
+ " \"handles\": 122,\n",
252
+ " \"harnesses\": 123,\n",
253
+ " \"hats\": 124,\n",
254
+ " \"headdresses\": 125,\n",
255
+ " \"heads\": 126,\n",
256
+ " \"incense burners\": 127,\n",
257
+ " \"inlays\": 128,\n",
258
+ " \"jackets\": 129,\n",
259
+ " \"jars\": 130,\n",
260
+ " \"jewelry\": 131,\n",
261
+ " \"juglets\": 132,\n",
262
+ " \"jugs\": 133,\n",
263
+ " \"keys\": 134,\n",
264
+ " \"kimonos\": 135,\n",
265
+ " \"knives\": 136,\n",
266
+ " \"lamps\": 137,\n",
267
+ " \"lanterns\": 138,\n",
268
+ " \"lids\": 139,\n",
269
+ " \"maces\": 140,\n",
270
+ " \"masks\": 141,\n",
271
+ " \"medals\": 142,\n",
272
+ " \"mirrors\": 143,\n",
273
+ " \"models\": 144,\n",
274
+ " \"mounts\": 145,\n",
275
+ " \"nails\": 146,\n",
276
+ " \"necklaces\": 147,\n",
277
+ " \"needles\": 148,\n",
278
+ " \"netsukes\": 149,\n",
279
+ " \"ornaments\": 150,\n",
280
+ " \"pages\": 151,\n",
281
+ " \"paintings\": 152,\n",
282
+ " \"paper money\": 153,\n",
283
+ " \"pendants\": 154,\n",
284
+ " \"petticoats\": 155,\n",
285
+ " \"photographs\": 156,\n",
286
+ " \"pictures\": 157,\n",
287
+ " \"pins\": 158,\n",
288
+ " \"playing cards\": 159,\n",
289
+ " \"poker\": 160,\n",
290
+ " \"postage stamps\": 161,\n",
291
+ " \"postcards\": 162,\n",
292
+ " \"posters\": 163,\n",
293
+ " \"pots\": 164,\n",
294
+ " \"pottery\": 165,\n",
295
+ " \"prints\": 166,\n",
296
+ " \"puppets\": 167,\n",
297
+ " \"purses\": 168,\n",
298
+ " \"reliefs\": 169,\n",
299
+ " \"rings\": 170,\n",
300
+ " \"robes\": 171,\n",
301
+ " \"rubbings\": 172,\n",
302
+ " \"rugs\": 173,\n",
303
+ " \"sandals\": 174,\n",
304
+ " \"saris\": 175,\n",
305
+ " \"sarongs\": 176,\n",
306
+ " \"scabbards\": 177,\n",
307
+ " \"scaraboids\": 178,\n",
308
+ " \"scarabs\": 179,\n",
309
+ " \"scrolls\": 180,\n",
310
+ " \"seed\": 181,\n",
311
+ " \"seppa\": 182,\n",
312
+ " \"shadow puppets\": 183,\n",
313
+ " \"shawls\": 184,\n",
314
+ " \"shell\": 185,\n",
315
+ " \"sherds\": 186,\n",
316
+ " \"shields\": 187,\n",
317
+ " \"shoes\": 188,\n",
318
+ " \"sketches\": 189,\n",
319
+ " \"skirts\": 190,\n",
320
+ " \"snuff bottles\": 191,\n",
321
+ " \"socks\": 192,\n",
322
+ " \"spatulas\": 193,\n",
323
+ " \"spoons\": 194,\n",
324
+ " \"statues\": 195,\n",
325
+ " \"statuettes\": 196,\n",
326
+ " \"stelae\": 197,\n",
327
+ " \"straps\": 198,\n",
328
+ " \"studs\": 199,\n",
329
+ " \"swords\": 200,\n",
330
+ " \"tablets\": 201,\n",
331
+ " \"tacks\": 202,\n",
332
+ " \"tea bowls\": 203,\n",
333
+ " \"teapots\": 204,\n",
334
+ " \"tiles\": 205,\n",
335
+ " \"tools\": 206,\n",
336
+ " \"toys\": 207,\n",
337
+ " \"trays\": 208,\n",
338
+ " \"tubes\": 209,\n",
339
+ " \"tweezers\": 210,\n",
340
+ " \"underwear\": 211,\n",
341
+ " \"unidentified\": 212,\n",
342
+ " \"ushabti\": 213,\n",
343
+ " \"utensils\": 214,\n",
344
+ " \"vases\": 215,\n",
345
+ " \"vessels\": 216,\n",
346
+ " \"weight\": 217,\n",
347
+ " \"weights\": 218,\n",
348
+ " \"whorls\": 219,\n",
349
+ " \"wood blocks\": 220,\n",
350
+ "}"
351
+ ]
352
+ },
353
+ {
354
+ "cell_type": "code",
355
+ "execution_count": 38,
356
+ "metadata": {},
357
+ "outputs": [],
358
+ "source": [
359
+ "import json"
360
+ ]
361
+ },
362
+ {
363
+ "cell_type": "code",
364
+ "execution_count": 39,
365
+ "metadata": {},
366
+ "outputs": [],
367
+ "source": [
368
+ "# json dump\n",
369
+ "with open(\"l2i.json\", \"w\") as f:\n",
370
+ " json.dump({str(v): k for k, v in l2i.items()}, f)\n",
371
+ "# {str(v): k for k, v in l2i.items()}"
372
+ ]
373
+ },
374
+ {
375
+ "cell_type": "code",
376
+ "execution_count": 7,
377
+ "metadata": {},
378
+ "outputs": [],
379
+ "source": [
380
+ "from transformers import AutoConfig"
381
+ ]
382
+ },
383
+ {
384
+ "cell_type": "code",
385
+ "execution_count": 45,
386
+ "metadata": {},
387
+ "outputs": [
388
+ {
389
+ "data": {
390
+ "application/vnd.jupyter.widget-view+json": {
391
+ "model_id": "005c080fdcf141acaa30ba191a8c8f3c",
392
+ "version_major": 2,
393
+ "version_minor": 0
394
+ },
395
+ "text/plain": [
396
+ "config.json: 0%| | 0.00/10.9k [00:00<?, ?B/s]"
397
+ ]
398
+ },
399
+ "metadata": {},
400
+ "output_type": "display_data"
401
+ }
402
+ ],
403
+ "source": [
404
+ "config = AutoConfig.from_pretrained(\"james-burton/om6txt_name\")"
405
+ ]
406
+ },
407
+ {
408
+ "cell_type": "code",
409
+ "execution_count": 46,
410
+ "metadata": {},
411
+ "outputs": [
412
+ {
413
+ "name": "stderr",
414
+ "output_type": "stream",
415
+ "text": [
416
+ "/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)\n",
417
+ "Failed to load module: /home/james/snap/code/common/.cache/gio-modules/libgiolibproxy.so\n",
418
+ "eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE\n"
419
+ ]
420
+ }
421
+ ],
422
+ "source": [
423
+ "from PIL import Image\n",
424
+ "\n",
425
+ "image_path = \"../data/processed/OM_3Dimages_white/egyptian/1951/1951.42-tt_2.png\"\n",
426
+ "image = Image.open(image_path)\n",
427
+ "image.show()"
428
+ ]
429
+ },
430
+ {
431
+ "cell_type": "code",
432
+ "execution_count": null,
433
+ "metadata": {},
434
+ "outputs": [],
435
+ "source": []
436
+ }
437
+ ],
438
+ "metadata": {
439
+ "kernelspec": {
440
+ "display_name": "ArtifactClassification",
441
+ "language": "python",
442
+ "name": "python3"
443
+ },
444
+ "language_info": {
445
+ "codemirror_mode": {
446
+ "name": "ipython",
447
+ "version": 3
448
+ },
449
+ "file_extension": ".py",
450
+ "mimetype": "text/x-python",
451
+ "name": "python",
452
+ "nbconvert_exporter": "python",
453
+ "pygments_lexer": "ipython3",
454
+ "version": "3.10.12"
455
+ }
456
+ },
457
+ "nbformat": 4,
458
+ "nbformat_minor": 2
459
+ }
1.0-checking_dataset_size.ipynb ADDED
@@ -0,0 +1,559 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import os\n",
11
+ "import re"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 4,
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "# Loaded variable 'df' from URI: /home/james/CodingProjects/ArcPostDoc/HeDAP-imagesearch/Durham_University_Museums_data (1).xlsx\n",
21
+ "df = pd.read_excel(\n",
22
+ " r\"/home/james/CodingProjects/ArcPostDoc/HeDAP-imagesearch/Durham_University_Museums_data (1).xlsx\"\n",
23
+ ")"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": 5,
29
+ "metadata": {},
30
+ "outputs": [
31
+ {
32
+ "data": {
33
+ "text/html": [
34
+ "<div>\n",
35
+ "<style scoped>\n",
36
+ " .dataframe tbody tr th:only-of-type {\n",
37
+ " vertical-align: middle;\n",
38
+ " }\n",
39
+ "\n",
40
+ " .dataframe tbody tr th {\n",
41
+ " vertical-align: top;\n",
42
+ " }\n",
43
+ "\n",
44
+ " .dataframe thead th {\n",
45
+ " text-align: right;\n",
46
+ " }\n",
47
+ "</style>\n",
48
+ "<table border=\"1\" class=\"dataframe\">\n",
49
+ " <thead>\n",
50
+ " <tr style=\"text-align: right;\">\n",
51
+ " <th></th>\n",
52
+ " <th>object_number</th>\n",
53
+ " <th>object_name</th>\n",
54
+ " <th>other_name</th>\n",
55
+ " <th>reproduction.reference</th>\n",
56
+ " <th>description</th>\n",
57
+ " <th>label.text</th>\n",
58
+ " <th>material</th>\n",
59
+ " <th>technique</th>\n",
60
+ " <th>physical_description</th>\n",
61
+ " <th>number_of_parts</th>\n",
62
+ " <th>...</th>\n",
63
+ " <th>Unnamed: 25</th>\n",
64
+ " <th>Unnamed: 26</th>\n",
65
+ " <th>Unnamed: 27</th>\n",
66
+ " <th>Unnamed: 28</th>\n",
67
+ " <th>Unnamed: 29</th>\n",
68
+ " <th>Unnamed: 30</th>\n",
69
+ " <th>Unnamed: 31</th>\n",
70
+ " <th>Unnamed: 32</th>\n",
71
+ " <th>Unnamed: 33</th>\n",
72
+ " <th>Unnamed: 34</th>\n",
73
+ " </tr>\n",
74
+ " </thead>\n",
75
+ " <tbody>\n",
76
+ " <tr>\n",
77
+ " <th>0</th>\n",
78
+ " <td>EG1</td>\n",
79
+ " <td>Kohl Pot</td>\n",
80
+ " <td>jar</td>\n",
81
+ " <td>NaN</td>\n",
82
+ " <td>Shouldered, squat, incised kohl jar, blackened...</td>\n",
83
+ " <td>NaN</td>\n",
84
+ " <td>limestone</td>\n",
85
+ " <td>NaN</td>\n",
86
+ " <td>NaN</td>\n",
87
+ " <td>NaN</td>\n",
88
+ " <td>...</td>\n",
89
+ " <td>NaN</td>\n",
90
+ " <td>NaN</td>\n",
91
+ " <td>NaN</td>\n",
92
+ " <td>NaN</td>\n",
93
+ " <td>NaN</td>\n",
94
+ " <td>NaN</td>\n",
95
+ " <td>NaN</td>\n",
96
+ " <td>NaN</td>\n",
97
+ " <td>NaN</td>\n",
98
+ " <td>NaN</td>\n",
99
+ " </tr>\n",
100
+ " <tr>\n",
101
+ " <th>1</th>\n",
102
+ " <td>EG2</td>\n",
103
+ " <td>cups</td>\n",
104
+ " <td>beaker</td>\n",
105
+ " <td>NaN</td>\n",
106
+ " <td>slightly concave beaker with flaring rim and c...</td>\n",
107
+ " <td>NaN</td>\n",
108
+ " <td>travertine</td>\n",
109
+ " <td>NaN</td>\n",
110
+ " <td>NaN</td>\n",
111
+ " <td>NaN</td>\n",
112
+ " <td>...</td>\n",
113
+ " <td>NaN</td>\n",
114
+ " <td>NaN</td>\n",
115
+ " <td>NaN</td>\n",
116
+ " <td>NaN</td>\n",
117
+ " <td>NaN</td>\n",
118
+ " <td>NaN</td>\n",
119
+ " <td>NaN</td>\n",
120
+ " <td>NaN</td>\n",
121
+ " <td>NaN</td>\n",
122
+ " <td>NaN</td>\n",
123
+ " </tr>\n",
124
+ " <tr>\n",
125
+ " <th>2</th>\n",
126
+ " <td>EG3</td>\n",
127
+ " <td>bowls</td>\n",
128
+ " <td>bowl</td>\n",
129
+ " <td>../images/egyptian/eg/eg3-409-d1.jpg</td>\n",
130
+ " <td>squat shouldered jar, no rim</td>\n",
131
+ " <td>&lt;SPAN lang=en-GB style='FONT-SIZE: 12pt; FONT-...</td>\n",
132
+ " <td>limestone</td>\n",
133
+ " <td>NaN</td>\n",
134
+ " <td>NaN</td>\n",
135
+ " <td>NaN</td>\n",
136
+ " <td>...</td>\n",
137
+ " <td>NaN</td>\n",
138
+ " <td>NaN</td>\n",
139
+ " <td>NaN</td>\n",
140
+ " <td>NaN</td>\n",
141
+ " <td>NaN</td>\n",
142
+ " <td>NaN</td>\n",
143
+ " <td>NaN</td>\n",
144
+ " <td>NaN</td>\n",
145
+ " <td>NaN</td>\n",
146
+ " <td>NaN</td>\n",
147
+ " </tr>\n",
148
+ " <tr>\n",
149
+ " <th>3</th>\n",
150
+ " <td>EG4</td>\n",
151
+ " <td>bottles</td>\n",
152
+ " <td>jar</td>\n",
153
+ " <td>NaN</td>\n",
154
+ " <td>necked, globular jar with flared rim</td>\n",
155
+ " <td>NaN</td>\n",
156
+ " <td>travertine</td>\n",
157
+ " <td>NaN</td>\n",
158
+ " <td>NaN</td>\n",
159
+ " <td>NaN</td>\n",
160
+ " <td>...</td>\n",
161
+ " <td>NaN</td>\n",
162
+ " <td>NaN</td>\n",
163
+ " <td>NaN</td>\n",
164
+ " <td>NaN</td>\n",
165
+ " <td>NaN</td>\n",
166
+ " <td>NaN</td>\n",
167
+ " <td>NaN</td>\n",
168
+ " <td>NaN</td>\n",
169
+ " <td>NaN</td>\n",
170
+ " <td>NaN</td>\n",
171
+ " </tr>\n",
172
+ " <tr>\n",
173
+ " <th>4</th>\n",
174
+ " <td>EG5</td>\n",
175
+ " <td>bottles</td>\n",
176
+ " <td>jar</td>\n",
177
+ " <td>NaN</td>\n",
178
+ " <td>necked, globular jar with narrow rim, plus sto...</td>\n",
179
+ " <td>NaN</td>\n",
180
+ " <td>travertine</td>\n",
181
+ " <td>NaN</td>\n",
182
+ " <td>NaN</td>\n",
183
+ " <td>NaN</td>\n",
184
+ " <td>...</td>\n",
185
+ " <td>NaN</td>\n",
186
+ " <td>NaN</td>\n",
187
+ " <td>NaN</td>\n",
188
+ " <td>NaN</td>\n",
189
+ " <td>NaN</td>\n",
190
+ " <td>NaN</td>\n",
191
+ " <td>NaN</td>\n",
192
+ " <td>NaN</td>\n",
193
+ " <td>NaN</td>\n",
194
+ " <td>NaN</td>\n",
195
+ " </tr>\n",
196
+ " <tr>\n",
197
+ " <th>...</th>\n",
198
+ " <td>...</td>\n",
199
+ " <td>...</td>\n",
200
+ " <td>...</td>\n",
201
+ " <td>...</td>\n",
202
+ " <td>...</td>\n",
203
+ " <td>...</td>\n",
204
+ " <td>...</td>\n",
205
+ " <td>...</td>\n",
206
+ " <td>...</td>\n",
207
+ " <td>...</td>\n",
208
+ " <td>...</td>\n",
209
+ " <td>...</td>\n",
210
+ " <td>...</td>\n",
211
+ " <td>...</td>\n",
212
+ " <td>...</td>\n",
213
+ " <td>...</td>\n",
214
+ " <td>...</td>\n",
215
+ " <td>...</td>\n",
216
+ " <td>...</td>\n",
217
+ " <td>...</td>\n",
218
+ " <td>...</td>\n",
219
+ " </tr>\n",
220
+ " <tr>\n",
221
+ " <th>60081</th>\n",
222
+ " <td>DURMA.2020.3.2072</td>\n",
223
+ " <td>coins</td>\n",
224
+ " <td>NaN</td>\n",
225
+ " <td>../images/fulling_mill/2020/DURMA.2020.3.2072-...</td>\n",
226
+ " <td>A silver Roman coin which is a part of the Pie...</td>\n",
227
+ " <td>NaN</td>\n",
228
+ " <td>metal</td>\n",
229
+ " <td>hammering</td>\n",
230
+ " <td>A silver denarius of Elagabalus dating to the ...</td>\n",
231
+ " <td>1</td>\n",
232
+ " <td>...</td>\n",
233
+ " <td>NaN</td>\n",
234
+ " <td>NaN</td>\n",
235
+ " <td>NaN</td>\n",
236
+ " <td>NaN</td>\n",
237
+ " <td>NaN</td>\n",
238
+ " <td>NaN</td>\n",
239
+ " <td>NaN</td>\n",
240
+ " <td>NaN</td>\n",
241
+ " <td>NaN</td>\n",
242
+ " <td>NaN</td>\n",
243
+ " </tr>\n",
244
+ " <tr>\n",
245
+ " <th>60082</th>\n",
246
+ " <td>DUROM.2021.286</td>\n",
247
+ " <td>postcards</td>\n",
248
+ " <td>NaN</td>\n",
249
+ " <td>NaN</td>\n",
250
+ " <td>Portrait orientation postcard for the 1996 Ind...</td>\n",
251
+ " <td>NaN</td>\n",
252
+ " <td>paper</td>\n",
253
+ " <td>printing</td>\n",
254
+ " <td>Digital printed onto paper/card</td>\n",
255
+ " <td>1</td>\n",
256
+ " <td>...</td>\n",
257
+ " <td>NaN</td>\n",
258
+ " <td>NaN</td>\n",
259
+ " <td>NaN</td>\n",
260
+ " <td>NaN</td>\n",
261
+ " <td>NaN</td>\n",
262
+ " <td>NaN</td>\n",
263
+ " <td>NaN</td>\n",
264
+ " <td>NaN</td>\n",
265
+ " <td>NaN</td>\n",
266
+ " <td>NaN</td>\n",
267
+ " </tr>\n",
268
+ " <tr>\n",
269
+ " <th>60083</th>\n",
270
+ " <td>DUROM.2021.287</td>\n",
271
+ " <td>postcards</td>\n",
272
+ " <td>NaN</td>\n",
273
+ " <td>NaN</td>\n",
274
+ " <td>Pair of landscape orientation postcard sized p...</td>\n",
275
+ " <td>NaN</td>\n",
276
+ " <td>paper</td>\n",
277
+ " <td>printing</td>\n",
278
+ " <td>Digital print on card</td>\n",
279
+ " <td>2</td>\n",
280
+ " <td>...</td>\n",
281
+ " <td>NaN</td>\n",
282
+ " <td>NaN</td>\n",
283
+ " <td>NaN</td>\n",
284
+ " <td>NaN</td>\n",
285
+ " <td>NaN</td>\n",
286
+ " <td>NaN</td>\n",
287
+ " <td>NaN</td>\n",
288
+ " <td>NaN</td>\n",
289
+ " <td>NaN</td>\n",
290
+ " <td>NaN</td>\n",
291
+ " </tr>\n",
292
+ " <tr>\n",
293
+ " <th>60084</th>\n",
294
+ " <td>DUROM.2021.289</td>\n",
295
+ " <td>posters</td>\n",
296
+ " <td>NaN</td>\n",
297
+ " <td>NaN</td>\n",
298
+ " <td>Portrait orientation poster for the 1996 India...</td>\n",
299
+ " <td>NaN</td>\n",
300
+ " <td>paper</td>\n",
301
+ " <td>printing</td>\n",
302
+ " <td>digital print on gloss paper</td>\n",
303
+ " <td>1</td>\n",
304
+ " <td>...</td>\n",
305
+ " <td>NaN</td>\n",
306
+ " <td>NaN</td>\n",
307
+ " <td>NaN</td>\n",
308
+ " <td>NaN</td>\n",
309
+ " <td>NaN</td>\n",
310
+ " <td>NaN</td>\n",
311
+ " <td>NaN</td>\n",
312
+ " <td>NaN</td>\n",
313
+ " <td>NaN</td>\n",
314
+ " <td>NaN</td>\n",
315
+ " </tr>\n",
316
+ " <tr>\n",
317
+ " <th>60085</th>\n",
318
+ " <td>DUROM.2021.288</td>\n",
319
+ " <td>posters</td>\n",
320
+ " <td>NaN</td>\n",
321
+ " <td>NaN</td>\n",
322
+ " <td>Portrait orientation poster for the 1996 India...</td>\n",
323
+ " <td>NaN</td>\n",
324
+ " <td>paper</td>\n",
325
+ " <td>printing</td>\n",
326
+ " <td>Digital print on paper</td>\n",
327
+ " <td>1</td>\n",
328
+ " <td>...</td>\n",
329
+ " <td>NaN</td>\n",
330
+ " <td>NaN</td>\n",
331
+ " <td>NaN</td>\n",
332
+ " <td>NaN</td>\n",
333
+ " <td>NaN</td>\n",
334
+ " <td>NaN</td>\n",
335
+ " <td>NaN</td>\n",
336
+ " <td>NaN</td>\n",
337
+ " <td>NaN</td>\n",
338
+ " <td>NaN</td>\n",
339
+ " </tr>\n",
340
+ " </tbody>\n",
341
+ "</table>\n",
342
+ "<p>60086 rows × 35 columns</p>\n",
343
+ "</div>"
344
+ ],
345
+ "text/plain": [
346
+ " object_number object_name other_name \\\n",
347
+ "0 EG1 Kohl Pot jar \n",
348
+ "1 EG2 cups beaker \n",
349
+ "2 EG3 bowls bowl \n",
350
+ "3 EG4 bottles jar \n",
351
+ "4 EG5 bottles jar \n",
352
+ "... ... ... ... \n",
353
+ "60081 DURMA.2020.3.2072 coins NaN \n",
354
+ "60082 DUROM.2021.286 postcards NaN \n",
355
+ "60083 DUROM.2021.287 postcards NaN \n",
356
+ "60084 DUROM.2021.289 posters NaN \n",
357
+ "60085 DUROM.2021.288 posters NaN \n",
358
+ "\n",
359
+ " reproduction.reference \\\n",
360
+ "0 NaN \n",
361
+ "1 NaN \n",
362
+ "2 ../images/egyptian/eg/eg3-409-d1.jpg \n",
363
+ "3 NaN \n",
364
+ "4 NaN \n",
365
+ "... ... \n",
366
+ "60081 ../images/fulling_mill/2020/DURMA.2020.3.2072-... \n",
367
+ "60082 NaN \n",
368
+ "60083 NaN \n",
369
+ "60084 NaN \n",
370
+ "60085 NaN \n",
371
+ "\n",
372
+ " description \\\n",
373
+ "0 Shouldered, squat, incised kohl jar, blackened... \n",
374
+ "1 slightly concave beaker with flaring rim and c... \n",
375
+ "2 squat shouldered jar, no rim \n",
376
+ "3 necked, globular jar with flared rim \n",
377
+ "4 necked, globular jar with narrow rim, plus sto... \n",
378
+ "... ... \n",
379
+ "60081 A silver Roman coin which is a part of the Pie... \n",
380
+ "60082 Portrait orientation postcard for the 1996 Ind... \n",
381
+ "60083 Pair of landscape orientation postcard sized p... \n",
382
+ "60084 Portrait orientation poster for the 1996 India... \n",
383
+ "60085 Portrait orientation poster for the 1996 India... \n",
384
+ "\n",
385
+ " label.text material \\\n",
386
+ "0 NaN limestone \n",
387
+ "1 NaN travertine \n",
388
+ "2 <SPAN lang=en-GB style='FONT-SIZE: 12pt; FONT-... limestone \n",
389
+ "3 NaN travertine \n",
390
+ "4 NaN travertine \n",
391
+ "... ... ... \n",
392
+ "60081 NaN metal \n",
393
+ "60082 NaN paper \n",
394
+ "60083 NaN paper \n",
395
+ "60084 NaN paper \n",
396
+ "60085 NaN paper \n",
397
+ "\n",
398
+ " technique physical_description \\\n",
399
+ "0 NaN NaN \n",
400
+ "1 NaN NaN \n",
401
+ "2 NaN NaN \n",
402
+ "3 NaN NaN \n",
403
+ "4 NaN NaN \n",
404
+ "... ... ... \n",
405
+ "60081 hammering A silver denarius of Elagabalus dating to the ... \n",
406
+ "60082 printing Digital printed onto paper/card \n",
407
+ "60083 printing Digital print on card \n",
408
+ "60084 printing digital print on gloss paper \n",
409
+ "60085 printing Digital print on paper \n",
410
+ "\n",
411
+ " number_of_parts ... Unnamed: 25 Unnamed: 26 Unnamed: 27 Unnamed: 28 \\\n",
412
+ "0 NaN ... NaN NaN NaN NaN \n",
413
+ "1 NaN ... NaN NaN NaN NaN \n",
414
+ "2 NaN ... NaN NaN NaN NaN \n",
415
+ "3 NaN ... NaN NaN NaN NaN \n",
416
+ "4 NaN ... NaN NaN NaN NaN \n",
417
+ "... ... ... ... ... ... ... \n",
418
+ "60081 1 ... NaN NaN NaN NaN \n",
419
+ "60082 1 ... NaN NaN NaN NaN \n",
420
+ "60083 2 ... NaN NaN NaN NaN \n",
421
+ "60084 1 ... NaN NaN NaN NaN \n",
422
+ "60085 1 ... NaN NaN NaN NaN \n",
423
+ "\n",
424
+ " Unnamed: 29 Unnamed: 30 Unnamed: 31 Unnamed: 32 Unnamed: 33 Unnamed: 34 \n",
425
+ "0 NaN NaN NaN NaN NaN NaN \n",
426
+ "1 NaN NaN NaN NaN NaN NaN \n",
427
+ "2 NaN NaN NaN NaN NaN NaN \n",
428
+ "3 NaN NaN NaN NaN NaN NaN \n",
429
+ "4 NaN NaN NaN NaN NaN NaN \n",
430
+ "... ... ... ... ... ... ... \n",
431
+ "60081 NaN NaN NaN NaN NaN NaN \n",
432
+ "60082 NaN NaN NaN NaN NaN NaN \n",
433
+ "60083 NaN NaN NaN NaN NaN NaN \n",
434
+ "60084 NaN NaN NaN NaN NaN NaN \n",
435
+ "60085 NaN NaN NaN NaN NaN NaN \n",
436
+ "\n",
437
+ "[60086 rows x 35 columns]"
438
+ ]
439
+ },
440
+ "execution_count": 5,
441
+ "metadata": {},
442
+ "output_type": "execute_result"
443
+ }
444
+ ],
445
+ "source": [
446
+ "df"
447
+ ]
448
+ },
449
+ {
450
+ "cell_type": "code",
451
+ "execution_count": 6,
452
+ "metadata": {},
453
+ "outputs": [
454
+ {
455
+ "data": {
456
+ "text/plain": [
457
+ "6625"
458
+ ]
459
+ },
460
+ "execution_count": 6,
461
+ "metadata": {},
462
+ "output_type": "execute_result"
463
+ }
464
+ ],
465
+ "source": [
466
+ "df[\"description\"].isna().sum()"
467
+ ]
468
+ },
469
+ {
470
+ "cell_type": "code",
471
+ "execution_count": 7,
472
+ "metadata": {},
473
+ "outputs": [
474
+ {
475
+ "name": "stdout",
476
+ "output_type": "stream",
477
+ "text": [
478
+ "There are 60086 records in the dataset\n",
479
+ "26809 records have an image location\n",
480
+ "53461 records have an description\n",
481
+ "There are 60067 unique museum numbers\n",
482
+ "There are 46166 unique descriptions\n"
483
+ ]
484
+ }
485
+ ],
486
+ "source": [
487
+ "print(f\"There are {len(df)} records in the dataset\")\n",
488
+ "print(f\"{df['reproduction.reference'].notna().sum()} records have an image location\")\n",
489
+ "print(f\"{df['description'].notna().sum()} records have an description\")\n",
490
+ "print(f\"There are {len(df['object_number'].unique())} unique museum numbers\")\n",
491
+ "print(f\"There are {len(df['description'].unique())} unique descriptions\")"
492
+ ]
493
+ },
494
+ {
495
+ "cell_type": "code",
496
+ "execution_count": 8,
497
+ "metadata": {},
498
+ "outputs": [
499
+ {
500
+ "name": "stdout",
501
+ "output_type": "stream",
502
+ "text": [
503
+ "Total number of images in ../data/raw/images/: 39200\n",
504
+ "Total number of files in ../data/raw/images/: 39244\n"
505
+ ]
506
+ }
507
+ ],
508
+ "source": [
509
+ "import os\n",
510
+ "\n",
511
+ "image_count = 0\n",
512
+ "file_count = 0\n",
513
+ "\n",
514
+ "# Define the root directory\n",
515
+ "root_dir = \"../data/raw/images/\"\n",
516
+ "\n",
517
+ "# Iterate through all subdirectories and files\n",
518
+ "for root, dirs, files in os.walk(root_dir):\n",
519
+ " for file in files:\n",
520
+ " file_count += 1\n",
521
+ " # Check if the file is an image file\n",
522
+ " if file.endswith((\".jpg\", \".jpeg\", \".png\", \".gif\", \".JPG\", \".JPEG\", \".PNG\", \".GIF\")):\n",
523
+ " # Increment the image count\n",
524
+ " image_count += 1\n",
525
+ "\n",
526
+ "print(f\"Total number of images in {root_dir}: {image_count}\")\n",
527
+ "print(f\"Total number of files in {root_dir}: {file_count}\")"
528
+ ]
529
+ },
530
+ {
531
+ "cell_type": "code",
532
+ "execution_count": null,
533
+ "metadata": {},
534
+ "outputs": [],
535
+ "source": []
536
+ }
537
+ ],
538
+ "metadata": {
539
+ "kernelspec": {
540
+ "display_name": "env",
541
+ "language": "python",
542
+ "name": "python3"
543
+ },
544
+ "language_info": {
545
+ "codemirror_mode": {
546
+ "name": "ipython",
547
+ "version": 3
548
+ },
549
+ "file_extension": ".py",
550
+ "mimetype": "text/x-python",
551
+ "name": "python",
552
+ "nbconvert_exporter": "python",
553
+ "pygments_lexer": "ipython3",
554
+ "version": "3.10.12"
555
+ }
556
+ },
557
+ "nbformat": 4,
558
+ "nbformat_minor": 2
559
+ }
1.1-exploring_OM_image_matching.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
2.0-assessing_OM_dataset.ipynb ADDED
@@ -0,0 +1,1468 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 89,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import matplotlib.pyplot as plt\n",
11
+ "import numpy as np"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 2,
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "obj2info = pd.read_csv(\"../data/processed/OM_obj_to_info.csv\")\n",
21
+ "file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 3,
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "file_counts = file2obj[\"obj_num\"].value_counts()\n",
31
+ "# file2obj"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 4,
37
+ "metadata": {},
38
+ "outputs": [
39
+ {
40
+ "data": {
41
+ "text/plain": [
42
+ "obj_num\n",
43
+ "durom.1969.406 249\n",
44
+ "durom.1973.47 191\n",
45
+ "DUROM.1954.Spalding29.W 112\n",
46
+ "durom.1960.2332 101\n",
47
+ "durom.2014.1 76\n",
48
+ " ... \n",
49
+ "durom.2006.46.32 1\n",
50
+ "durom.2006.44.16 1\n",
51
+ "durom.2006.45.194 1\n",
52
+ "durom.2006.46.13 1\n",
53
+ "durom.1964.183 1\n",
54
+ "Name: count, Length: 12642, dtype: int64"
55
+ ]
56
+ },
57
+ "execution_count": 4,
58
+ "metadata": {},
59
+ "output_type": "execute_result"
60
+ }
61
+ ],
62
+ "source": [
63
+ "file_counts"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 5,
69
+ "metadata": {},
70
+ "outputs": [
71
+ {
72
+ "data": {
73
+ "text/html": [
74
+ "<div>\n",
75
+ "<style scoped>\n",
76
+ " .dataframe tbody tr th:only-of-type {\n",
77
+ " vertical-align: middle;\n",
78
+ " }\n",
79
+ "\n",
80
+ " .dataframe tbody tr th {\n",
81
+ " vertical-align: top;\n",
82
+ " }\n",
83
+ "\n",
84
+ " .dataframe thead th {\n",
85
+ " text-align: right;\n",
86
+ " }\n",
87
+ "</style>\n",
88
+ "<table border=\"1\" class=\"dataframe\">\n",
89
+ " <thead>\n",
90
+ " <tr style=\"text-align: right;\">\n",
91
+ " <th></th>\n",
92
+ " <th>Images per instance</th>\n",
93
+ " <th>Number of instances</th>\n",
94
+ " <th>Number of images</th>\n",
95
+ " </tr>\n",
96
+ " </thead>\n",
97
+ " <tbody>\n",
98
+ " <tr>\n",
99
+ " <th>0</th>\n",
100
+ " <td>3</td>\n",
101
+ " <td>696</td>\n",
102
+ " <td>2088</td>\n",
103
+ " </tr>\n",
104
+ " <tr>\n",
105
+ " <th>1</th>\n",
106
+ " <td>4</td>\n",
107
+ " <td>703</td>\n",
108
+ " <td>2812</td>\n",
109
+ " </tr>\n",
110
+ " <tr>\n",
111
+ " <th>2</th>\n",
112
+ " <td>5</td>\n",
113
+ " <td>360</td>\n",
114
+ " <td>1800</td>\n",
115
+ " </tr>\n",
116
+ " <tr>\n",
117
+ " <th>3</th>\n",
118
+ " <td>6</td>\n",
119
+ " <td>853</td>\n",
120
+ " <td>5118</td>\n",
121
+ " </tr>\n",
122
+ " <tr>\n",
123
+ " <th>4</th>\n",
124
+ " <td>7</td>\n",
125
+ " <td>471</td>\n",
126
+ " <td>3297</td>\n",
127
+ " </tr>\n",
128
+ " <tr>\n",
129
+ " <th>5</th>\n",
130
+ " <td>8</td>\n",
131
+ " <td>223</td>\n",
132
+ " <td>1784</td>\n",
133
+ " </tr>\n",
134
+ " <tr>\n",
135
+ " <th>6</th>\n",
136
+ " <td>9</td>\n",
137
+ " <td>110</td>\n",
138
+ " <td>990</td>\n",
139
+ " </tr>\n",
140
+ " <tr>\n",
141
+ " <th>7</th>\n",
142
+ " <td>10+</td>\n",
143
+ " <td>456</td>\n",
144
+ " <td>7836</td>\n",
145
+ " </tr>\n",
146
+ " <tr>\n",
147
+ " <th>8</th>\n",
148
+ " <td>Total</td>\n",
149
+ " <td>3872</td>\n",
150
+ " <td>25725</td>\n",
151
+ " </tr>\n",
152
+ " </tbody>\n",
153
+ "</table>\n",
154
+ "</div>"
155
+ ],
156
+ "text/plain": [
157
+ " Images per instance Number of instances Number of images\n",
158
+ "0 3 696 2088\n",
159
+ "1 4 703 2812\n",
160
+ "2 5 360 1800\n",
161
+ "3 6 853 5118\n",
162
+ "4 7 471 3297\n",
163
+ "5 8 223 1784\n",
164
+ "6 9 110 990\n",
165
+ "7 10+ 456 7836\n",
166
+ "8 Total 3872 25725"
167
+ ]
168
+ },
169
+ "execution_count": 5,
170
+ "metadata": {},
171
+ "output_type": "execute_result"
172
+ }
173
+ ],
174
+ "source": [
175
+ "distribution_df = pd.DataFrame()\n",
176
+ "distribution_df[\"Images per instance\"] = file_counts.value_counts().sort_index().index\n",
177
+ "distribution_df[\"Number of instances\"] = file_counts.value_counts().sort_index().values\n",
178
+ "distribution_df[\"Number of images\"] = (\n",
179
+ " distribution_df[\"Images per instance\"] * distribution_df[\"Number of instances\"]\n",
180
+ ")\n",
181
+ "num_instances_10plus = distribution_df[distribution_df[\"Images per instance\"] >= 10][\n",
182
+ " \"Number of instances\"\n",
183
+ "].sum()\n",
184
+ "num_images_10plus = distribution_df[distribution_df[\"Images per instance\"] >= 10][\n",
185
+ " \"Number of images\"\n",
186
+ "].sum()\n",
187
+ "distribution_df = distribution_df[\n",
188
+ " (distribution_df[\"Images per instance\"] < 10) & (distribution_df[\"Images per instance\"] > 2)\n",
189
+ "]\n",
190
+ "\n",
191
+ "distribution_df = pd.concat(\n",
192
+ " [\n",
193
+ " distribution_df,\n",
194
+ " pd.DataFrame(\n",
195
+ " {\n",
196
+ " \"Images per instance\": [\"10+\"],\n",
197
+ " \"Number of instances\": [num_instances_10plus],\n",
198
+ " \"Number of images\": [num_images_10plus],\n",
199
+ " }\n",
200
+ " ),\n",
201
+ " ],\n",
202
+ " ignore_index=True,\n",
203
+ ")\n",
204
+ "\n",
205
+ "# append total\n",
206
+ "distribution_df = pd.concat(\n",
207
+ " [\n",
208
+ " distribution_df,\n",
209
+ " pd.DataFrame(\n",
210
+ " {\n",
211
+ " \"Images per instance\": [\"Total\"],\n",
212
+ " \"Number of instances\": [distribution_df[\"Number of instances\"].sum()],\n",
213
+ " \"Number of images\": [distribution_df[\"Number of images\"].sum()],\n",
214
+ " }\n",
215
+ " ),\n",
216
+ " ],\n",
217
+ " ignore_index=True,\n",
218
+ ")\n",
219
+ "# distribution_df = distribution_df[['Images per instance', 'Number of images', 'Number of instances']]\n",
220
+ "distribution_df"
221
+ ]
222
+ },
223
+ {
224
+ "cell_type": "markdown",
225
+ "metadata": {},
226
+ "source": [
227
+ "This distribution broadly follows that from Winterbottom's paper, with a few minor differences. \n",
228
+ "\n",
229
+ "I am not expecting it to be exactly the same, as winterbottom did not use the database at all, instead just looked at the images"
230
+ ]
231
+ },
232
+ {
233
+ "cell_type": "markdown",
234
+ "metadata": {},
235
+ "source": [
236
+ "## Assessing for alternative text labels"
237
+ ]
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "execution_count": 61,
242
+ "metadata": {},
243
+ "outputs": [],
244
+ "source": [
245
+ "full_df = pd.read_excel(\"../data/raw/Durham_University_Museums_data.xlsx\")\n",
246
+ "full_df = full_df.filter(regex=r\"^(?!Unnamed).*$\")\n",
247
+ "full_df = full_df.dropna(subset=[\"description\"])"
248
+ ]
249
+ },
250
+ {
251
+ "cell_type": "code",
252
+ "execution_count": 69,
253
+ "metadata": {},
254
+ "outputs": [
255
+ {
256
+ "data": {
257
+ "text/html": [
258
+ "<div>\n",
259
+ "<style scoped>\n",
260
+ " .dataframe tbody tr th:only-of-type {\n",
261
+ " vertical-align: middle;\n",
262
+ " }\n",
263
+ "\n",
264
+ " .dataframe tbody tr th {\n",
265
+ " vertical-align: top;\n",
266
+ " }\n",
267
+ "\n",
268
+ " .dataframe thead th {\n",
269
+ " text-align: right;\n",
270
+ " }\n",
271
+ "</style>\n",
272
+ "<table border=\"1\" class=\"dataframe\">\n",
273
+ " <thead>\n",
274
+ " <tr style=\"text-align: right;\">\n",
275
+ " <th></th>\n",
276
+ " <th>Column</th>\n",
277
+ " <th>Null Percentage</th>\n",
278
+ " <th>unique_values</th>\n",
279
+ " </tr>\n",
280
+ " </thead>\n",
281
+ " <tbody>\n",
282
+ " <tr>\n",
283
+ " <th>0</th>\n",
284
+ " <td>object_number</td>\n",
285
+ " <td>0.00</td>\n",
286
+ " <td>53460</td>\n",
287
+ " </tr>\n",
288
+ " <tr>\n",
289
+ " <th>4</th>\n",
290
+ " <td>description</td>\n",
291
+ " <td>0.00</td>\n",
292
+ " <td>1191</td>\n",
293
+ " </tr>\n",
294
+ " <tr>\n",
295
+ " <th>6</th>\n",
296
+ " <td>material</td>\n",
297
+ " <td>4.43</td>\n",
298
+ " <td>6442</td>\n",
299
+ " </tr>\n",
300
+ " <tr>\n",
301
+ " <th>1</th>\n",
302
+ " <td>object_name</td>\n",
303
+ " <td>8.96</td>\n",
304
+ " <td>26163</td>\n",
305
+ " </tr>\n",
306
+ " <tr>\n",
307
+ " <th>22</th>\n",
308
+ " <td>alternative_number</td>\n",
309
+ " <td>18.30</td>\n",
310
+ " <td>46165</td>\n",
311
+ " </tr>\n",
312
+ " <tr>\n",
313
+ " <th>13</th>\n",
314
+ " <td>production.place</td>\n",
315
+ " <td>34.42</td>\n",
316
+ " <td>3234</td>\n",
317
+ " </tr>\n",
318
+ " <tr>\n",
319
+ " <th>12</th>\n",
320
+ " <td>production.period</td>\n",
321
+ " <td>40.90</td>\n",
322
+ " <td>414</td>\n",
323
+ " </tr>\n",
324
+ " <tr>\n",
325
+ " <th>3</th>\n",
326
+ " <td>reproduction.reference</td>\n",
327
+ " <td>50.23</td>\n",
328
+ " <td>76</td>\n",
329
+ " </tr>\n",
330
+ " <tr>\n",
331
+ " <th>11</th>\n",
332
+ " <td>production.date.end</td>\n",
333
+ " <td>50.90</td>\n",
334
+ " <td>6923</td>\n",
335
+ " </tr>\n",
336
+ " <tr>\n",
337
+ " <th>10</th>\n",
338
+ " <td>production.date.start</td>\n",
339
+ " <td>51.04</td>\n",
340
+ " <td>127</td>\n",
341
+ " </tr>\n",
342
+ " <tr>\n",
343
+ " <th>2</th>\n",
344
+ " <td>other_name</td>\n",
345
+ " <td>58.72</td>\n",
346
+ " <td>968</td>\n",
347
+ " </tr>\n",
348
+ " <tr>\n",
349
+ " <th>9</th>\n",
350
+ " <td>number_of_parts</td>\n",
351
+ " <td>62.08</td>\n",
352
+ " <td>949</td>\n",
353
+ " </tr>\n",
354
+ " <tr>\n",
355
+ " <th>8</th>\n",
356
+ " <td>physical_description</td>\n",
357
+ " <td>73.54</td>\n",
358
+ " <td>485</td>\n",
359
+ " </tr>\n",
360
+ " <tr>\n",
361
+ " <th>14</th>\n",
362
+ " <td>field_coll.place</td>\n",
363
+ " <td>77.88</td>\n",
364
+ " <td>812</td>\n",
365
+ " </tr>\n",
366
+ " <tr>\n",
367
+ " <th>16</th>\n",
368
+ " <td>field_coll.method</td>\n",
369
+ " <td>83.38</td>\n",
370
+ " <td>546</td>\n",
371
+ " </tr>\n",
372
+ " <tr>\n",
373
+ " <th>18</th>\n",
374
+ " <td>content.subject</td>\n",
375
+ " <td>87.25</td>\n",
376
+ " <td>1449</td>\n",
377
+ " </tr>\n",
378
+ " <tr>\n",
379
+ " <th>7</th>\n",
380
+ " <td>technique</td>\n",
381
+ " <td>87.58</td>\n",
382
+ " <td>22</td>\n",
383
+ " </tr>\n",
384
+ " <tr>\n",
385
+ " <th>21</th>\n",
386
+ " <td>association.subject</td>\n",
387
+ " <td>88.35</td>\n",
388
+ " <td>516</td>\n",
389
+ " </tr>\n",
390
+ " <tr>\n",
391
+ " <th>15</th>\n",
392
+ " <td>field_coll.notes</td>\n",
393
+ " <td>91.09</td>\n",
394
+ " <td>773</td>\n",
395
+ " </tr>\n",
396
+ " <tr>\n",
397
+ " <th>5</th>\n",
398
+ " <td>label.text</td>\n",
399
+ " <td>91.69</td>\n",
400
+ " <td>78</td>\n",
401
+ " </tr>\n",
402
+ " <tr>\n",
403
+ " <th>20</th>\n",
404
+ " <td>association.person</td>\n",
405
+ " <td>95.54</td>\n",
406
+ " <td>289</td>\n",
407
+ " </tr>\n",
408
+ " <tr>\n",
409
+ " <th>17</th>\n",
410
+ " <td>content.person.name</td>\n",
411
+ " <td>95.89</td>\n",
412
+ " <td>247</td>\n",
413
+ " </tr>\n",
414
+ " <tr>\n",
415
+ " <th>19</th>\n",
416
+ " <td>association.period</td>\n",
417
+ " <td>97.70</td>\n",
418
+ " <td>36718</td>\n",
419
+ " </tr>\n",
420
+ " </tbody>\n",
421
+ "</table>\n",
422
+ "</div>"
423
+ ],
424
+ "text/plain": [
425
+ " Column Null Percentage unique_values\n",
426
+ "0 object_number 0.00 53460\n",
427
+ "4 description 0.00 1191\n",
428
+ "6 material 4.43 6442\n",
429
+ "1 object_name 8.96 26163\n",
430
+ "22 alternative_number 18.30 46165\n",
431
+ "13 production.place 34.42 3234\n",
432
+ "12 production.period 40.90 414\n",
433
+ "3 reproduction.reference 50.23 76\n",
434
+ "11 production.date.end 50.90 6923\n",
435
+ "10 production.date.start 51.04 127\n",
436
+ "2 other_name 58.72 968\n",
437
+ "9 number_of_parts 62.08 949\n",
438
+ "8 physical_description 73.54 485\n",
439
+ "14 field_coll.place 77.88 812\n",
440
+ "16 field_coll.method 83.38 546\n",
441
+ "18 content.subject 87.25 1449\n",
442
+ "7 technique 87.58 22\n",
443
+ "21 association.subject 88.35 516\n",
444
+ "15 field_coll.notes 91.09 773\n",
445
+ "5 label.text 91.69 78\n",
446
+ "20 association.person 95.54 289\n",
447
+ "17 content.person.name 95.89 247\n",
448
+ "19 association.period 97.70 36718"
449
+ ]
450
+ },
451
+ "execution_count": 69,
452
+ "metadata": {},
453
+ "output_type": "execute_result"
454
+ }
455
+ ],
456
+ "source": [
457
+ "null_percentage = (full_df.isnull().sum() / len(full_df)) * 100\n",
458
+ "desc_df = pd.DataFrame(\n",
459
+ " {\"Column\": null_percentage.index, \"Null Percentage\": null_percentage.values}\n",
460
+ ")\n",
461
+ "desc_df[\"Null Percentage\"] = desc_df[\"Null Percentage\"].round(2)\n",
462
+ "desc_df = desc_df.sort_values(by=\"Null Percentage\")\n",
463
+ "desc_df[\"unique_values\"] = full_df.nunique().values\n",
464
+ "desc_df"
465
+ ]
466
+ },
467
+ {
468
+ "cell_type": "code",
469
+ "execution_count": 97,
470
+ "metadata": {},
471
+ "outputs": [],
472
+ "source": [
473
+ "def get_distribution(df, column, lower_bound=2):\n",
474
+ " distribution = pd.DataFrame()\n",
475
+ " col_counts = df[column].value_counts()\n",
476
+ " distribution[f\"{column}s per instance\"] = col_counts.value_counts().sort_index().index\n",
477
+ " distribution[\"Number of instances\"] = col_counts.value_counts().sort_index().values\n",
478
+ " distribution[f\"Number of {column}s\"] = (\n",
479
+ " distribution[f\"{column}s per instance\"] * distribution[\"Number of instances\"]\n",
480
+ " )\n",
481
+ " num_instances_10_50 = distribution[\n",
482
+ " (distribution[f\"{column}s per instance\"] >= 10)\n",
483
+ " & (distribution[f\"{column}s per instance\"] < 50)\n",
484
+ " ][\"Number of instances\"].sum()\n",
485
+ " num_images_10_50 = distribution[\n",
486
+ " (distribution[f\"{column}s per instance\"] >= 10)\n",
487
+ " & (distribution[f\"{column}s per instance\"] < 50)\n",
488
+ " ][f\"Number of {column}s\"].sum()\n",
489
+ " num_instances_50_100 = distribution[\n",
490
+ " (distribution[f\"{column}s per instance\"] >= 50)\n",
491
+ " & (distribution[f\"{column}s per instance\"] < 100)\n",
492
+ " ][\"Number of instances\"].sum()\n",
493
+ " num_images_50_100 = distribution[\n",
494
+ " (distribution[f\"{column}s per instance\"] >= 50)\n",
495
+ " & (distribution[f\"{column}s per instance\"] < 100)\n",
496
+ " ][f\"Number of {column}s\"].sum()\n",
497
+ " num_instances_100_1000 = distribution[\n",
498
+ " (distribution[f\"{column}s per instance\"] >= 100)\n",
499
+ " & (distribution[f\"{column}s per instance\"] < 1000)\n",
500
+ " ][\"Number of instances\"].sum()\n",
501
+ " num_images_100_1000 = distribution[\n",
502
+ " (distribution[f\"{column}s per instance\"] >= 100)\n",
503
+ " & (distribution[f\"{column}s per instance\"] < 1000)\n",
504
+ " ][f\"Number of {column}s\"].sum()\n",
505
+ " num_instances_1000plus = distribution[distribution[f\"{column}s per instance\"] >= 1000][\n",
506
+ " \"Number of instances\"\n",
507
+ " ].sum()\n",
508
+ " num_images_1000plus = distribution[distribution[f\"{column}s per instance\"] >= 1000][\n",
509
+ " f\"Number of {column}s\"\n",
510
+ " ].sum()\n",
511
+ "\n",
512
+ " distribution = distribution[\n",
513
+ " (distribution[f\"{column}s per instance\"] < 10)\n",
514
+ " & (distribution[f\"{column}s per instance\"] > lower_bound)\n",
515
+ " ]\n",
516
+ "\n",
517
+ " distribution = pd.concat(\n",
518
+ " [\n",
519
+ " distribution,\n",
520
+ " pd.DataFrame(\n",
521
+ " {\n",
522
+ " f\"{column}s per instance\": [\"10-50\"],\n",
523
+ " \"Number of instances\": [num_instances_10_50],\n",
524
+ " f\"Number of {column}s\": [num_images_10_50],\n",
525
+ " }\n",
526
+ " ),\n",
527
+ " pd.DataFrame(\n",
528
+ " {\n",
529
+ " f\"{column}s per instance\": [\"50-100\"],\n",
530
+ " \"Number of instances\": [num_instances_50_100],\n",
531
+ " f\"Number of {column}s\": [num_images_50_100],\n",
532
+ " }\n",
533
+ " ),\n",
534
+ " pd.DataFrame(\n",
535
+ " {\n",
536
+ " f\"{column}s per instance\": [\"100-1000\"],\n",
537
+ " \"Number of instances\": [num_instances_100_1000],\n",
538
+ " f\"Number of {column}s\": [num_images_100_1000],\n",
539
+ " }\n",
540
+ " ),\n",
541
+ " pd.DataFrame(\n",
542
+ " {\n",
543
+ " f\"{column}s per instance\": [\"1000+\"],\n",
544
+ " \"Number of instances\": [num_instances_1000plus],\n",
545
+ " f\"Number of {column}s\": [num_images_1000plus],\n",
546
+ " }\n",
547
+ " ),\n",
548
+ " ],\n",
549
+ " ignore_index=True,\n",
550
+ " )\n",
551
+ "\n",
552
+ " distribution = pd.concat(\n",
553
+ " [\n",
554
+ " distribution,\n",
555
+ " pd.DataFrame(\n",
556
+ " {\n",
557
+ " f\"{column}s per instance\": [\"Total\"],\n",
558
+ " \"Number of instances\": [distribution[\"Number of instances\"].sum()],\n",
559
+ " f\"Number of {column}s\": [distribution[f\"Number of {column}s\"].sum()],\n",
560
+ " }\n",
561
+ " ),\n",
562
+ " ],\n",
563
+ " ignore_index=True,\n",
564
+ " )\n",
565
+ " # rename columns\n",
566
+ " return distribution"
567
+ ]
568
+ },
569
+ {
570
+ "cell_type": "code",
571
+ "execution_count": 107,
572
+ "metadata": {},
573
+ "outputs": [
574
+ {
575
+ "data": {
576
+ "text/html": [
577
+ "<div>\n",
578
+ "<style scoped>\n",
579
+ " .dataframe tbody tr th:only-of-type {\n",
580
+ " vertical-align: middle;\n",
581
+ " }\n",
582
+ "\n",
583
+ " .dataframe tbody tr th {\n",
584
+ " vertical-align: top;\n",
585
+ " }\n",
586
+ "\n",
587
+ " .dataframe thead th {\n",
588
+ " text-align: right;\n",
589
+ " }\n",
590
+ "</style>\n",
591
+ "<table border=\"1\" class=\"dataframe\">\n",
592
+ " <thead>\n",
593
+ " <tr style=\"text-align: right;\">\n",
594
+ " <th></th>\n",
595
+ " <th>object_names per instance</th>\n",
596
+ " <th>Number of instances</th>\n",
597
+ " <th>Number of object_names</th>\n",
598
+ " </tr>\n",
599
+ " </thead>\n",
600
+ " <tbody>\n",
601
+ " <tr>\n",
602
+ " <th>0</th>\n",
603
+ " <td>3</td>\n",
604
+ " <td>93</td>\n",
605
+ " <td>279</td>\n",
606
+ " </tr>\n",
607
+ " <tr>\n",
608
+ " <th>1</th>\n",
609
+ " <td>4</td>\n",
610
+ " <td>57</td>\n",
611
+ " <td>228</td>\n",
612
+ " </tr>\n",
613
+ " <tr>\n",
614
+ " <th>2</th>\n",
615
+ " <td>5</td>\n",
616
+ " <td>53</td>\n",
617
+ " <td>265</td>\n",
618
+ " </tr>\n",
619
+ " <tr>\n",
620
+ " <th>3</th>\n",
621
+ " <td>6</td>\n",
622
+ " <td>32</td>\n",
623
+ " <td>192</td>\n",
624
+ " </tr>\n",
625
+ " <tr>\n",
626
+ " <th>4</th>\n",
627
+ " <td>7</td>\n",
628
+ " <td>27</td>\n",
629
+ " <td>189</td>\n",
630
+ " </tr>\n",
631
+ " <tr>\n",
632
+ " <th>5</th>\n",
633
+ " <td>8</td>\n",
634
+ " <td>24</td>\n",
635
+ " <td>192</td>\n",
636
+ " </tr>\n",
637
+ " <tr>\n",
638
+ " <th>6</th>\n",
639
+ " <td>9</td>\n",
640
+ " <td>27</td>\n",
641
+ " <td>243</td>\n",
642
+ " </tr>\n",
643
+ " <tr>\n",
644
+ " <th>7</th>\n",
645
+ " <td>10-50</td>\n",
646
+ " <td>227</td>\n",
647
+ " <td>4921</td>\n",
648
+ " </tr>\n",
649
+ " <tr>\n",
650
+ " <th>8</th>\n",
651
+ " <td>50-100</td>\n",
652
+ " <td>51</td>\n",
653
+ " <td>3683</td>\n",
654
+ " </tr>\n",
655
+ " <tr>\n",
656
+ " <th>9</th>\n",
657
+ " <td>100-1000</td>\n",
658
+ " <td>65</td>\n",
659
+ " <td>17027</td>\n",
660
+ " </tr>\n",
661
+ " <tr>\n",
662
+ " <th>10</th>\n",
663
+ " <td>1000+</td>\n",
664
+ " <td>7</td>\n",
665
+ " <td>20758</td>\n",
666
+ " </tr>\n",
667
+ " <tr>\n",
668
+ " <th>11</th>\n",
669
+ " <td>Total</td>\n",
670
+ " <td>663</td>\n",
671
+ " <td>47977</td>\n",
672
+ " </tr>\n",
673
+ " </tbody>\n",
674
+ "</table>\n",
675
+ "</div>"
676
+ ],
677
+ "text/plain": [
678
+ " object_names per instance Number of instances Number of object_names\n",
679
+ "0 3 93 279\n",
680
+ "1 4 57 228\n",
681
+ "2 5 53 265\n",
682
+ "3 6 32 192\n",
683
+ "4 7 27 189\n",
684
+ "5 8 24 192\n",
685
+ "6 9 27 243\n",
686
+ "7 10-50 227 4921\n",
687
+ "8 50-100 51 3683\n",
688
+ "9 100-1000 65 17027\n",
689
+ "10 1000+ 7 20758\n",
690
+ "11 Total 663 47977"
691
+ ]
692
+ },
693
+ "execution_count": 107,
694
+ "metadata": {},
695
+ "output_type": "execute_result"
696
+ }
697
+ ],
698
+ "source": [
699
+ "get_distribution(full_df, \"object_name\")"
700
+ ]
701
+ },
702
+ {
703
+ "cell_type": "code",
704
+ "execution_count": 100,
705
+ "metadata": {},
706
+ "outputs": [
707
+ {
708
+ "data": {
709
+ "text/html": [
710
+ "<div>\n",
711
+ "<style scoped>\n",
712
+ " .dataframe tbody tr th:only-of-type {\n",
713
+ " vertical-align: middle;\n",
714
+ " }\n",
715
+ "\n",
716
+ " .dataframe tbody tr th {\n",
717
+ " vertical-align: top;\n",
718
+ " }\n",
719
+ "\n",
720
+ " .dataframe thead th {\n",
721
+ " text-align: right;\n",
722
+ " }\n",
723
+ "</style>\n",
724
+ "<table border=\"1\" class=\"dataframe\">\n",
725
+ " <thead>\n",
726
+ " <tr style=\"text-align: right;\">\n",
727
+ " <th></th>\n",
728
+ " <th>materials per instance</th>\n",
729
+ " <th>Number of instances</th>\n",
730
+ " <th>Number of materials</th>\n",
731
+ " </tr>\n",
732
+ " </thead>\n",
733
+ " <tbody>\n",
734
+ " <tr>\n",
735
+ " <th>0</th>\n",
736
+ " <td>3</td>\n",
737
+ " <td>30</td>\n",
738
+ " <td>90</td>\n",
739
+ " </tr>\n",
740
+ " <tr>\n",
741
+ " <th>1</th>\n",
742
+ " <td>4</td>\n",
743
+ " <td>16</td>\n",
744
+ " <td>64</td>\n",
745
+ " </tr>\n",
746
+ " <tr>\n",
747
+ " <th>2</th>\n",
748
+ " <td>5</td>\n",
749
+ " <td>14</td>\n",
750
+ " <td>70</td>\n",
751
+ " </tr>\n",
752
+ " <tr>\n",
753
+ " <th>3</th>\n",
754
+ " <td>6</td>\n",
755
+ " <td>9</td>\n",
756
+ " <td>54</td>\n",
757
+ " </tr>\n",
758
+ " <tr>\n",
759
+ " <th>4</th>\n",
760
+ " <td>7</td>\n",
761
+ " <td>10</td>\n",
762
+ " <td>70</td>\n",
763
+ " </tr>\n",
764
+ " <tr>\n",
765
+ " <th>5</th>\n",
766
+ " <td>8</td>\n",
767
+ " <td>6</td>\n",
768
+ " <td>48</td>\n",
769
+ " </tr>\n",
770
+ " <tr>\n",
771
+ " <th>6</th>\n",
772
+ " <td>9</td>\n",
773
+ " <td>5</td>\n",
774
+ " <td>45</td>\n",
775
+ " </tr>\n",
776
+ " <tr>\n",
777
+ " <th>7</th>\n",
778
+ " <td>10-50</td>\n",
779
+ " <td>88</td>\n",
780
+ " <td>1975</td>\n",
781
+ " </tr>\n",
782
+ " <tr>\n",
783
+ " <th>8</th>\n",
784
+ " <td>50-100</td>\n",
785
+ " <td>21</td>\n",
786
+ " <td>1409</td>\n",
787
+ " </tr>\n",
788
+ " <tr>\n",
789
+ " <th>9</th>\n",
790
+ " <td>100-1000</td>\n",
791
+ " <td>43</td>\n",
792
+ " <td>13030</td>\n",
793
+ " </tr>\n",
794
+ " <tr>\n",
795
+ " <th>10</th>\n",
796
+ " <td>1000+</td>\n",
797
+ " <td>12</td>\n",
798
+ " <td>34036</td>\n",
799
+ " </tr>\n",
800
+ " <tr>\n",
801
+ " <th>11</th>\n",
802
+ " <td>Total</td>\n",
803
+ " <td>254</td>\n",
804
+ " <td>50891</td>\n",
805
+ " </tr>\n",
806
+ " </tbody>\n",
807
+ "</table>\n",
808
+ "</div>"
809
+ ],
810
+ "text/plain": [
811
+ " materials per instance Number of instances Number of materials\n",
812
+ "0 3 30 90\n",
813
+ "1 4 16 64\n",
814
+ "2 5 14 70\n",
815
+ "3 6 9 54\n",
816
+ "4 7 10 70\n",
817
+ "5 8 6 48\n",
818
+ "6 9 5 45\n",
819
+ "7 10-50 88 1975\n",
820
+ "8 50-100 21 1409\n",
821
+ "9 100-1000 43 13030\n",
822
+ "10 1000+ 12 34036\n",
823
+ "11 Total 254 50891"
824
+ ]
825
+ },
826
+ "execution_count": 100,
827
+ "metadata": {},
828
+ "output_type": "execute_result"
829
+ }
830
+ ],
831
+ "source": [
832
+ "get_distribution(full_df, \"material\")"
833
+ ]
834
+ },
835
+ {
836
+ "cell_type": "markdown",
837
+ "metadata": {},
838
+ "source": [
839
+ "Production date could be used for a regression task, and the other fields could be used for a classification task."
840
+ ]
841
+ },
842
+ {
843
+ "cell_type": "markdown",
844
+ "metadata": {},
845
+ "source": [
846
+ "### Year"
847
+ ]
848
+ },
849
+ {
850
+ "cell_type": "code",
851
+ "execution_count": 101,
852
+ "metadata": {},
853
+ "outputs": [
854
+ {
855
+ "data": {
856
+ "text/html": [
857
+ "<div>\n",
858
+ "<style scoped>\n",
859
+ " .dataframe tbody tr th:only-of-type {\n",
860
+ " vertical-align: middle;\n",
861
+ " }\n",
862
+ "\n",
863
+ " .dataframe tbody tr th {\n",
864
+ " vertical-align: top;\n",
865
+ " }\n",
866
+ "\n",
867
+ " .dataframe thead th {\n",
868
+ " text-align: right;\n",
869
+ " }\n",
870
+ "</style>\n",
871
+ "<table border=\"1\" class=\"dataframe\">\n",
872
+ " <thead>\n",
873
+ " <tr style=\"text-align: right;\">\n",
874
+ " <th></th>\n",
875
+ " <th>production.date.starts per instance</th>\n",
876
+ " <th>Number of instances</th>\n",
877
+ " <th>Number of production.date.starts</th>\n",
878
+ " </tr>\n",
879
+ " </thead>\n",
880
+ " <tbody>\n",
881
+ " <tr>\n",
882
+ " <th>0</th>\n",
883
+ " <td>1</td>\n",
884
+ " <td>275</td>\n",
885
+ " <td>275</td>\n",
886
+ " </tr>\n",
887
+ " <tr>\n",
888
+ " <th>1</th>\n",
889
+ " <td>2</td>\n",
890
+ " <td>129</td>\n",
891
+ " <td>258</td>\n",
892
+ " </tr>\n",
893
+ " <tr>\n",
894
+ " <th>2</th>\n",
895
+ " <td>3</td>\n",
896
+ " <td>75</td>\n",
897
+ " <td>225</td>\n",
898
+ " </tr>\n",
899
+ " <tr>\n",
900
+ " <th>3</th>\n",
901
+ " <td>4</td>\n",
902
+ " <td>72</td>\n",
903
+ " <td>288</td>\n",
904
+ " </tr>\n",
905
+ " <tr>\n",
906
+ " <th>4</th>\n",
907
+ " <td>5</td>\n",
908
+ " <td>45</td>\n",
909
+ " <td>225</td>\n",
910
+ " </tr>\n",
911
+ " <tr>\n",
912
+ " <th>5</th>\n",
913
+ " <td>6</td>\n",
914
+ " <td>32</td>\n",
915
+ " <td>192</td>\n",
916
+ " </tr>\n",
917
+ " <tr>\n",
918
+ " <th>6</th>\n",
919
+ " <td>7</td>\n",
920
+ " <td>20</td>\n",
921
+ " <td>140</td>\n",
922
+ " </tr>\n",
923
+ " <tr>\n",
924
+ " <th>7</th>\n",
925
+ " <td>8</td>\n",
926
+ " <td>16</td>\n",
927
+ " <td>128</td>\n",
928
+ " </tr>\n",
929
+ " <tr>\n",
930
+ " <th>8</th>\n",
931
+ " <td>9</td>\n",
932
+ " <td>21</td>\n",
933
+ " <td>189</td>\n",
934
+ " </tr>\n",
935
+ " <tr>\n",
936
+ " <th>9</th>\n",
937
+ " <td>10-50</td>\n",
938
+ " <td>199</td>\n",
939
+ " <td>4226</td>\n",
940
+ " </tr>\n",
941
+ " <tr>\n",
942
+ " <th>10</th>\n",
943
+ " <td>50-100</td>\n",
944
+ " <td>39</td>\n",
945
+ " <td>2661</td>\n",
946
+ " </tr>\n",
947
+ " <tr>\n",
948
+ " <th>11</th>\n",
949
+ " <td>100-1000</td>\n",
950
+ " <td>41</td>\n",
951
+ " <td>10259</td>\n",
952
+ " </tr>\n",
953
+ " <tr>\n",
954
+ " <th>12</th>\n",
955
+ " <td>1000+</td>\n",
956
+ " <td>4</td>\n",
957
+ " <td>7110</td>\n",
958
+ " </tr>\n",
959
+ " <tr>\n",
960
+ " <th>13</th>\n",
961
+ " <td>Total</td>\n",
962
+ " <td>968</td>\n",
963
+ " <td>26176</td>\n",
964
+ " </tr>\n",
965
+ " </tbody>\n",
966
+ "</table>\n",
967
+ "</div>"
968
+ ],
969
+ "text/plain": [
970
+ " production.date.starts per instance Number of instances \\\n",
971
+ "0 1 275 \n",
972
+ "1 2 129 \n",
973
+ "2 3 75 \n",
974
+ "3 4 72 \n",
975
+ "4 5 45 \n",
976
+ "5 6 32 \n",
977
+ "6 7 20 \n",
978
+ "7 8 16 \n",
979
+ "8 9 21 \n",
980
+ "9 10-50 199 \n",
981
+ "10 50-100 39 \n",
982
+ "11 100-1000 41 \n",
983
+ "12 1000+ 4 \n",
984
+ "13 Total 968 \n",
985
+ "\n",
986
+ " Number of production.date.starts \n",
987
+ "0 275 \n",
988
+ "1 258 \n",
989
+ "2 225 \n",
990
+ "3 288 \n",
991
+ "4 225 \n",
992
+ "5 192 \n",
993
+ "6 140 \n",
994
+ "7 128 \n",
995
+ "8 189 \n",
996
+ "9 4226 \n",
997
+ "10 2661 \n",
998
+ "11 10259 \n",
999
+ "12 7110 \n",
1000
+ "13 26176 "
1001
+ ]
1002
+ },
1003
+ "execution_count": 101,
1004
+ "metadata": {},
1005
+ "output_type": "execute_result"
1006
+ }
1007
+ ],
1008
+ "source": [
1009
+ "get_distribution(full_df, \"production.date.start\", lower_bound=0)"
1010
+ ]
1011
+ },
1012
+ {
1013
+ "cell_type": "code",
1014
+ "execution_count": 102,
1015
+ "metadata": {},
1016
+ "outputs": [
1017
+ {
1018
+ "data": {
1019
+ "text/html": [
1020
+ "<div>\n",
1021
+ "<style scoped>\n",
1022
+ " .dataframe tbody tr th:only-of-type {\n",
1023
+ " vertical-align: middle;\n",
1024
+ " }\n",
1025
+ "\n",
1026
+ " .dataframe tbody tr th {\n",
1027
+ " vertical-align: top;\n",
1028
+ " }\n",
1029
+ "\n",
1030
+ " .dataframe thead th {\n",
1031
+ " text-align: right;\n",
1032
+ " }\n",
1033
+ "</style>\n",
1034
+ "<table border=\"1\" class=\"dataframe\">\n",
1035
+ " <thead>\n",
1036
+ " <tr style=\"text-align: right;\">\n",
1037
+ " <th></th>\n",
1038
+ " <th>production.date.ends per instance</th>\n",
1039
+ " <th>Number of instances</th>\n",
1040
+ " <th>Number of production.date.ends</th>\n",
1041
+ " </tr>\n",
1042
+ " </thead>\n",
1043
+ " <tbody>\n",
1044
+ " <tr>\n",
1045
+ " <th>0</th>\n",
1046
+ " <td>1</td>\n",
1047
+ " <td>285</td>\n",
1048
+ " <td>285</td>\n",
1049
+ " </tr>\n",
1050
+ " <tr>\n",
1051
+ " <th>1</th>\n",
1052
+ " <td>2</td>\n",
1053
+ " <td>120</td>\n",
1054
+ " <td>240</td>\n",
1055
+ " </tr>\n",
1056
+ " <tr>\n",
1057
+ " <th>2</th>\n",
1058
+ " <td>3</td>\n",
1059
+ " <td>63</td>\n",
1060
+ " <td>189</td>\n",
1061
+ " </tr>\n",
1062
+ " <tr>\n",
1063
+ " <th>3</th>\n",
1064
+ " <td>4</td>\n",
1065
+ " <td>46</td>\n",
1066
+ " <td>184</td>\n",
1067
+ " </tr>\n",
1068
+ " <tr>\n",
1069
+ " <th>4</th>\n",
1070
+ " <td>5</td>\n",
1071
+ " <td>32</td>\n",
1072
+ " <td>160</td>\n",
1073
+ " </tr>\n",
1074
+ " <tr>\n",
1075
+ " <th>5</th>\n",
1076
+ " <td>6</td>\n",
1077
+ " <td>37</td>\n",
1078
+ " <td>222</td>\n",
1079
+ " </tr>\n",
1080
+ " <tr>\n",
1081
+ " <th>6</th>\n",
1082
+ " <td>7</td>\n",
1083
+ " <td>26</td>\n",
1084
+ " <td>182</td>\n",
1085
+ " </tr>\n",
1086
+ " <tr>\n",
1087
+ " <th>7</th>\n",
1088
+ " <td>8</td>\n",
1089
+ " <td>20</td>\n",
1090
+ " <td>160</td>\n",
1091
+ " </tr>\n",
1092
+ " <tr>\n",
1093
+ " <th>8</th>\n",
1094
+ " <td>9</td>\n",
1095
+ " <td>19</td>\n",
1096
+ " <td>171</td>\n",
1097
+ " </tr>\n",
1098
+ " <tr>\n",
1099
+ " <th>9</th>\n",
1100
+ " <td>10-50</td>\n",
1101
+ " <td>210</td>\n",
1102
+ " <td>4562</td>\n",
1103
+ " </tr>\n",
1104
+ " <tr>\n",
1105
+ " <th>10</th>\n",
1106
+ " <td>50-100</td>\n",
1107
+ " <td>41</td>\n",
1108
+ " <td>2588</td>\n",
1109
+ " </tr>\n",
1110
+ " <tr>\n",
1111
+ " <th>11</th>\n",
1112
+ " <td>100-1000</td>\n",
1113
+ " <td>47</td>\n",
1114
+ " <td>11609</td>\n",
1115
+ " </tr>\n",
1116
+ " <tr>\n",
1117
+ " <th>12</th>\n",
1118
+ " <td>1000+</td>\n",
1119
+ " <td>3</td>\n",
1120
+ " <td>5696</td>\n",
1121
+ " </tr>\n",
1122
+ " <tr>\n",
1123
+ " <th>13</th>\n",
1124
+ " <td>Total</td>\n",
1125
+ " <td>949</td>\n",
1126
+ " <td>26248</td>\n",
1127
+ " </tr>\n",
1128
+ " </tbody>\n",
1129
+ "</table>\n",
1130
+ "</div>"
1131
+ ],
1132
+ "text/plain": [
1133
+ " production.date.ends per instance Number of instances \\\n",
1134
+ "0 1 285 \n",
1135
+ "1 2 120 \n",
1136
+ "2 3 63 \n",
1137
+ "3 4 46 \n",
1138
+ "4 5 32 \n",
1139
+ "5 6 37 \n",
1140
+ "6 7 26 \n",
1141
+ "7 8 20 \n",
1142
+ "8 9 19 \n",
1143
+ "9 10-50 210 \n",
1144
+ "10 50-100 41 \n",
1145
+ "11 100-1000 47 \n",
1146
+ "12 1000+ 3 \n",
1147
+ "13 Total 949 \n",
1148
+ "\n",
1149
+ " Number of production.date.ends \n",
1150
+ "0 285 \n",
1151
+ "1 240 \n",
1152
+ "2 189 \n",
1153
+ "3 184 \n",
1154
+ "4 160 \n",
1155
+ "5 222 \n",
1156
+ "6 182 \n",
1157
+ "7 160 \n",
1158
+ "8 171 \n",
1159
+ "9 4562 \n",
1160
+ "10 2588 \n",
1161
+ "11 11609 \n",
1162
+ "12 5696 \n",
1163
+ "13 26248 "
1164
+ ]
1165
+ },
1166
+ "execution_count": 102,
1167
+ "metadata": {},
1168
+ "output_type": "execute_result"
1169
+ }
1170
+ ],
1171
+ "source": [
1172
+ "get_distribution(full_df, \"production.date.end\", lower_bound=0)"
1173
+ ]
1174
+ },
1175
+ {
1176
+ "cell_type": "code",
1177
+ "execution_count": 91,
1178
+ "metadata": {},
1179
+ "outputs": [
1180
+ {
1181
+ "data": {
1182
+ "text/html": [
1183
+ "<div>\n",
1184
+ "<style scoped>\n",
1185
+ " .dataframe tbody tr th:only-of-type {\n",
1186
+ " vertical-align: middle;\n",
1187
+ " }\n",
1188
+ "\n",
1189
+ " .dataframe tbody tr th {\n",
1190
+ " vertical-align: top;\n",
1191
+ " }\n",
1192
+ "\n",
1193
+ " .dataframe thead th {\n",
1194
+ " text-align: right;\n",
1195
+ " }\n",
1196
+ "</style>\n",
1197
+ "<table border=\"1\" class=\"dataframe\">\n",
1198
+ " <thead>\n",
1199
+ " <tr style=\"text-align: right;\">\n",
1200
+ " <th></th>\n",
1201
+ " <th>start_year</th>\n",
1202
+ " <th>end_year</th>\n",
1203
+ " <th>year_diff</th>\n",
1204
+ " <th>mid_year</th>\n",
1205
+ " </tr>\n",
1206
+ " </thead>\n",
1207
+ " <tbody>\n",
1208
+ " <tr>\n",
1209
+ " <th>2</th>\n",
1210
+ " <td>-3000</td>\n",
1211
+ " <td>-3000</td>\n",
1212
+ " <td>0</td>\n",
1213
+ " <td>-3000</td>\n",
1214
+ " </tr>\n",
1215
+ " <tr>\n",
1216
+ " <th>142</th>\n",
1217
+ " <td>-600</td>\n",
1218
+ " <td>-332</td>\n",
1219
+ " <td>268</td>\n",
1220
+ " <td>-466</td>\n",
1221
+ " </tr>\n",
1222
+ " <tr>\n",
1223
+ " <th>143</th>\n",
1224
+ " <td>-1069</td>\n",
1225
+ " <td>-716</td>\n",
1226
+ " <td>353</td>\n",
1227
+ " <td>-893</td>\n",
1228
+ " </tr>\n",
1229
+ " <tr>\n",
1230
+ " <th>147</th>\n",
1231
+ " <td>-716</td>\n",
1232
+ " <td>-332</td>\n",
1233
+ " <td>384</td>\n",
1234
+ " <td>-524</td>\n",
1235
+ " </tr>\n",
1236
+ " <tr>\n",
1237
+ " <th>148</th>\n",
1238
+ " <td>-716</td>\n",
1239
+ " <td>-332</td>\n",
1240
+ " <td>384</td>\n",
1241
+ " <td>-524</td>\n",
1242
+ " </tr>\n",
1243
+ " <tr>\n",
1244
+ " <th>...</th>\n",
1245
+ " <td>...</td>\n",
1246
+ " <td>...</td>\n",
1247
+ " <td>...</td>\n",
1248
+ " <td>...</td>\n",
1249
+ " </tr>\n",
1250
+ " <tr>\n",
1251
+ " <th>60081</th>\n",
1252
+ " <td>218</td>\n",
1253
+ " <td>222</td>\n",
1254
+ " <td>4</td>\n",
1255
+ " <td>220</td>\n",
1256
+ " </tr>\n",
1257
+ " <tr>\n",
1258
+ " <th>60082</th>\n",
1259
+ " <td>1996</td>\n",
1260
+ " <td>1996</td>\n",
1261
+ " <td>0</td>\n",
1262
+ " <td>1996</td>\n",
1263
+ " </tr>\n",
1264
+ " <tr>\n",
1265
+ " <th>60083</th>\n",
1266
+ " <td>2016</td>\n",
1267
+ " <td>2016</td>\n",
1268
+ " <td>0</td>\n",
1269
+ " <td>2016</td>\n",
1270
+ " </tr>\n",
1271
+ " <tr>\n",
1272
+ " <th>60084</th>\n",
1273
+ " <td>1996</td>\n",
1274
+ " <td>1996</td>\n",
1275
+ " <td>0</td>\n",
1276
+ " <td>1996</td>\n",
1277
+ " </tr>\n",
1278
+ " <tr>\n",
1279
+ " <th>60085</th>\n",
1280
+ " <td>1996</td>\n",
1281
+ " <td>1996</td>\n",
1282
+ " <td>0</td>\n",
1283
+ " <td>1996</td>\n",
1284
+ " </tr>\n",
1285
+ " </tbody>\n",
1286
+ "</table>\n",
1287
+ "<p>26016 rows × 4 columns</p>\n",
1288
+ "</div>"
1289
+ ],
1290
+ "text/plain": [
1291
+ " start_year end_year year_diff mid_year\n",
1292
+ "2 -3000 -3000 0 -3000\n",
1293
+ "142 -600 -332 268 -466\n",
1294
+ "143 -1069 -716 353 -893\n",
1295
+ "147 -716 -332 384 -524\n",
1296
+ "148 -716 -332 384 -524\n",
1297
+ "... ... ... ... ...\n",
1298
+ "60081 218 222 4 220\n",
1299
+ "60082 1996 1996 0 1996\n",
1300
+ "60083 2016 2016 0 2016\n",
1301
+ "60084 1996 1996 0 1996\n",
1302
+ "60085 1996 1996 0 1996\n",
1303
+ "\n",
1304
+ "[26016 rows x 4 columns]"
1305
+ ]
1306
+ },
1307
+ "execution_count": 91,
1308
+ "metadata": {},
1309
+ "output_type": "execute_result"
1310
+ }
1311
+ ],
1312
+ "source": [
1313
+ "year_df = pd.DataFrame()\n",
1314
+ "year_df[\"start_year\"] = full_df[\"production.date.start\"]\n",
1315
+ "year_df[\"end_year\"] = full_df[\"production.date.end\"]\n",
1316
+ "year_df = year_df.dropna()\n",
1317
+ "\n",
1318
+ "non_numeric_instances = year_df[\n",
1319
+ " pd.to_numeric(year_df[\"start_year\"], errors=\"coerce\").isna()\n",
1320
+ " | pd.to_numeric(year_df[\"end_year\"], errors=\"coerce\").isna()\n",
1321
+ "]\n",
1322
+ "# get non-numeric instances\n",
1323
+ "year_df = year_df[~year_df.index.isin(non_numeric_instances.index)]\n",
1324
+ "year_df[\"start_year\"] = year_df[\"start_year\"].astype(int)\n",
1325
+ "year_df[\"end_year\"] = year_df[\"end_year\"].astype(int)\n",
1326
+ "year_df[\"year_diff\"] = year_df[\"end_year\"] - year_df[\"start_year\"]\n",
1327
+ "\n",
1328
+ "year_df[\"mid_year\"] = year_df[\"start_year\"] + year_df[\"year_diff\"] / 2\n",
1329
+ "year_df[\"mid_year\"] = year_df[\"mid_year\"].apply(lambda x: int(np.floor(x)))\n",
1330
+ "year_df"
1331
+ ]
1332
+ },
1333
+ {
1334
+ "cell_type": "code",
1335
+ "execution_count": 93,
1336
+ "metadata": {},
1337
+ "outputs": [
1338
+ {
1339
+ "data": {
1340
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHHCAYAAACiOWx7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABEHUlEQVR4nO3df3zP9f7/8ft7s723sZ/YZjWzIhQdoTQRsTasRE4dUaFFTtvJIp2UJCpZ0RDJOZkU+XUcdRCWn6Xlxwr5kVJ+xraKmZ/bbK/vH332+nrb8DJv29vcrpeLS72er8f79Xq83k/b7l7v5/s9m2EYhgAAAHBBbhXdAAAAwNWA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEVGLDhw+XzWYrl3O1bdtWbdu2NbdXrVolm82mefPmlcv5e/furTp16pTLucrq+PHjevLJJxUaGiqbzaakpCSnn+NS5txms2n48OFO7wGorAhNwFVi2rRpstls5h8vLy+FhYUpNjZW48eP17Fjx5xynoMHD2r48OHatGmTU47nTK7cmxVvvPGGpk2bpr///e/66KOP9Nhjj523tk6dOrLZbIqOji51/7/+9S/z78LGjRuvSL9vvvmmbDabli5dWur+Tp06yd/fXwcPHrwi5wdcjgHgqpCammpIMkaMGGF89NFHxtSpU4033njDiImJMWw2mxEREWFs3rzZ4TEFBQXGqVOnLuk8GzZsMCQZqampl/S4vLw8Iy8vz9xeuXKlIcmYO3fuJR2nrL3l5+cbp0+fdtq5roQWLVoYd911l6XaiIgIw8vLy3BzczMOHTpUYn+bNm0MLy8vQ5KxYcMGc/xS5lyS8corr5x3f35+vtG4cWPjhhtuME6ePOmwb86cOYYkY+LEiZbOBVQG3GkCrjIdO3bUo48+qj59+mjIkCFaunSpvvjiC2VnZ6tz5846deqUWVulShV5eXld0X5OnjwpSfL09JSnp+cVPdeFeHh4yG63V9j5rcjOzlZAQIDl+rvuukvVqlXT7NmzHcYPHDigL7/8UnFxcSUe48w59/Dw0JQpU7Rnzx6NHDnSHD927JiSkpJ05513qn///k4514UUFRXp9OnTV/w8wMUQmoBKoF27dnr55Ze1d+9effzxx+Z4aetb0tLS1KpVKwUEBKhatWqqX7++XnzxRUl/rkO6/fbbJUl9+vQxX/6ZNm2apD/XLTVq1EgZGRm6++675ePjYz723DVNxQoLC/Xiiy8qNDRUVatWVefOnbV//36Hmjp16qh3794lHnv2MS/WW2lrmk6cOKFBgwYpPDxcdrtd9evX19tvvy3DMBzqbDabEhMTtWDBAjVq1Eh2u1233HKLlixZUvoTfo7s7GzFx8crJCREXl5e+stf/qIPP/zQ3F+8vmv37t1atGiR2fuePXsueFwvLy89+OCDmjlzpsP4J598osDAQMXGxpZ4TGlznpeXp2effVY1a9aUr6+vOnfurAMHDli6tuJg9Pbbb2v79u2SpKFDhyo7O1tTpkyRm5ubcnJylJSUZD7PdevW1ejRo1VUVORwrLffflstW7ZU9erV5e3trWbNmpW65q14PmbMmKFbbrlFdrvd8lwAV1KVim4AgHM89thjevHFF7Vs2TL17du31Jpt27bpvvvu06233qoRI0bIbrdr165dWrt2rSSpYcOGGjFihIYNG6Z+/fqpdevWkqSWLVuax/jjjz/UsWNHde/eXY8++qhCQkIu2Nfrr78um82mf/7zn8rOzlZKSoqio6O1adMmeXt7W74+K72dzTAMde7cWStXrlR8fLyaNGmipUuXavDgwfr111/1zjvvONR/9dVXmj9/vp5++mn5+vpq/Pjx6tatm/bt26fq1auft69Tp06pbdu22rVrlxITExUZGam5c+eqd+/eysnJ0YABA9SwYUN99NFHevbZZ3X99ddr0KBBkqSaNWte9Lp79OihmJgY/fzzz7rxxhslSTNnztRf//pXeXh4WHrunnzySX388cfq0aOHWrZsqRUrVpR6l+p8Ro0apQULFuipp55SSkqKJk6cqMGDB6tx48Y6efKk2rRpo19//VVPPfWUateura+//lpDhgzRoUOHlJKSYh5n3Lhx6ty5s3r27Kn8/HzNmjVLDz30kBYuXFiinxUrVmjOnDlKTExUjRo1XH6RP64RFf36IABritc0nb1+5Vz+/v7GbbfdZm6/8sorxtlf5u+8844hyfjtt9/Oe4wLrRtq06aNIcmYPHlyqfvatGljbhevabruuuuM3Nxcc7x4Lcy4cePMsYiICKNXr14XPeaFeuvVq5cRERFhbi9YsMCQZLz22msOdX/9618Nm81m7Nq1yxyTZHh6ejqMbd682ZBkTJgwocS5zpaSkmJIMj7++GNzLD8/34iKijKqVavmcO0RERFGXFzcBY93bu2ZM2eM0NBQY+TIkYZhGMb27dsNScbq1atL/Ttx7pxv2rTJkGQ8/fTTDsfv0aPHRdc0nW3evHmGJCMoKMhhjdPIkSONqlWrGj/++KND/QsvvGC4u7sb+/btM8fOXReVn59vNGrUyGjXrp3DuCTDzc3N2LZtm6XegPLCy3NAJVKtWrULvouueD3Np59+WuKlE6vsdrv69Oljuf7xxx+Xr6+vuf3Xv/5VtWrV0uLFi8t0fqsWL14sd3d3PfPMMw7jgwYNkmEY+vzzzx3Go6OjzTs5knTrrbfKz89Pv/zyy0XPExoaqkceecQc8/Dw0DPPPKPjx49r9erVl3Ud7u7uevjhh/XJJ59IkmbMmKHw8HDzTtvFFD/P5z4Pl/pxB926dVOnTp10+PBhTZw40bxLOHfuXLVu3VqBgYH6/fffzT/R0dEqLCzUmjVrzGOcfWfxyJEjOnr0qFq3bq1vv/22xPnatGmjm2+++ZJ6BK40QhNQiRw/ftwhoJzrb3/7m+666y49+eSTCgkJUffu3TVnzpxLClDXXXfdJS34rlevnsO2zWZT3bp1L7qe53Lt3btXYWFhJZ6Phg0bmvvPVrt27RLHCAwM1JEjRy56nnr16snNzfHb6fnOUxY9evTQ9u3btXnzZs2cOVPdu3e3/FlMe/fulZubm0MglKT69etfch/Fa8qaN29ujv30009asmSJatas6fCn+KMSsrOzzdqFCxfqzjvvlJeXl4KCglSzZk299957Onr0aIlzRUZGXnJ/wJXGmiagkjhw4ICOHj2qunXrnrfG29tba9as0cqVK7Vo0SItWbJEs2fPVrt27bRs2TK5u7tf9DyXsg7JqvMFgMLCQks9OcP5zmOcs2i8IrRo0UI33nijkpKStHv3bvXo0aOiWzIVFRXp3nvv1fPPP1/q/ptuukmS9OWXX6pz5866++67NWnSJNWqVUseHh5KTU0tsdBdujJ/z4DLRWgCKomPPvpIkkp9R9XZ3Nzc1L59e7Vv315jx47VG2+8oZdeekkrV65UdHS00z9B/KeffnLYNgxDu3bt0q233mqOBQYGKicnp8Rj9+7dqxtuuMHcvpTeIiIi9MUXX+jYsWMOd5t++OEHc78zREREaMuWLSoqKnK42+Ts8zzyyCN67bXX1LBhQzVp0uSS+isqKtLPP//scHdp586dTunrxhtv1PHjx8/7IZzF/vOf/8jLy0tLly51+GiI1NRUp/QBlAdengMqgRUrVmjkyJGKjIxUz549z1t3+PDhEmPFP4Dz8vIkSVWrVpWkUkNMWUyfPt1hndW8efN06NAhdezY0Ry78cYb9c033yg/P98cW7hwYYmPJriU3jp16qTCwkK9++67DuPvvPOObDabw/kvR6dOnZSZmenwWUpnzpzRhAkTVK1aNbVp08Yp53nyySf1yiuvaMyYMZf0uOLrHD9+vMP42e9quxwPP/yw0tPTS/3U8JycHJ05c0bSn3fybDabCgsLzf179uzRggULnNIHUB640wRcZT7//HP98MMPOnPmjLKysrRixQqlpaUpIiJCn3322QU/2HDEiBFas2aN4uLiFBERoezsbE2aNEnXX3+9WrVqJenPABMQEKDJkyfL19dXVatWVYsWLcq8xiQoKEitWrVSnz59lJWVpZSUFNWtW9fhYxGefPJJzZs3Tx06dNDDDz+sn3/+WR9//HGJdTiX0tv999+ve+65Ry+99JL27Nmjv/zlL1q2bJk+/fRTJSUllTh2WfXr10/vv/++evfurYyMDNWpU0fz5s3T2rVrlZKScsE1ZpciIiKiTL8nrkmTJnrkkUc0adIkHT16VC1bttTy5cu1a9cup/Q1ePBgffbZZ7rvvvvUu3dvNWvWTCdOnND333+vefPmac+ePapRo4bi4uI0duxYdejQQT169FB2drYmTpyounXrasuWLU7pBbjSCE3AVWbYsGGS/vwE7qCgIDVu3FgpKSnq06fPRX9Ad+7cWXv27NHUqVP1+++/q0aNGmrTpo1effVV+fv7S/rznV8ffvihhgwZov79++vMmTNKTU0tc2h68cUXtWXLFo0aNUrHjh1T+/btNWnSJPn4+Jg1sbGxGjNmjMaOHaukpCQ1b95cCxcuND/PqNil9Obm5qbPPvtMw4YN0+zZs5Wamqo6derorbfeKnHcy+Ht7a1Vq1bphRde0Icffqjc3FzVr19fqamppX5gZ0WYOnWqatasqRkzZmjBggVq166dFi1apPDw8Ms+to+Pj1avXq033nhDc+fO1fTp0+Xn56ebbrrJ4e9Vu3bt9MEHH+jNN99UUlKSIiMjNXr0aO3Zs4fQhKuGzXCFVY4AAAAujjVNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAI+p8lJioqKdPDgQfn6+jr911AAAIArwzAMHTt2TGFhYSV+8fa5CE1OcvDgQad8UBwAACh/+/fv1/XXX3/BGkKTkxR/EvP+/fvl5+dXwd04T0FBgZYtW6aYmBh5eHhUdDs4C3Pj2pgf18b8uK7ynpvc3FyFh4db+pVHhCYnKX5Jzs/Pr9KFJh8fH/n5+fGNxcUwN66N+XFtzI/rqqi5sbK0hoXgAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFVSq6AQAAULnVeWHRRWv2vBlXDp1cHu40AQAAWFChoWnNmjW6//77FRYWJpvNpgULFjjsNwxDw4YNU61ateTt7a3o6Gj99NNPDjWHDx9Wz5495efnp4CAAMXHx+v48eMONVu2bFHr1q3l5eWl8PBwJScnl+hl7ty5atCggby8vNS4cWMtXrzY6dcLAACuXhUamk6cOKG//OUvmjhxYqn7k5OTNX78eE2ePFnr1q1T1apVFRsbq9OnT5s1PXv21LZt25SWlqaFCxdqzZo16tevn7k/NzdXMTExioiIUEZGht566y0NHz5cU6ZMMWu+/vprPfLII4qPj9d3332nLl26qEuXLtq6deuVu3gAAHBVqdA1TR07dlTHjh1L3WcYhlJSUjR06FA98MADkqTp06crJCRECxYsUPfu3bVjxw4tWbJEGzZsUPPmzSVJEyZMUKdOnfT2228rLCxMM2bMUH5+vqZOnSpPT0/dcsst2rRpk8aOHWuGq3HjxqlDhw4aPHiwJGnkyJFKS0vTu+++q8mTJ5fDMwEAAFydyy4E3717tzIzMxUdHW2O+fv7q0WLFkpPT1f37t2Vnp6ugIAAMzBJUnR0tNzc3LRu3Tp17dpV6enpuvvuu+Xp6WnWxMbGavTo0Tpy5IgCAwOVnp6ugQMHOpw/Nja2xMuFZ8vLy1NeXp65nZubK0kqKChQQUHB5V6+yyi+lsp0TZUFc+PamB/XxvyUL7u7cdGac+ekvObmUs7jsqEpMzNTkhQSEuIwHhISYu7LzMxUcHCww/4qVaooKCjIoSYyMrLEMYr3BQYGKjMz84LnKc2oUaP06quvlhhftmyZfHx8rFziVSUtLa2iW8B5MDeujflxbcxP+Ui+4+I1564lLq+5OXnypOValw1Nrm7IkCEOd6dyc3MVHh6umJgY+fn5VWBnzlVQUKC0tDTde++98vDwqOh2cBbmxrUxP66N+SlfjYYvvWjN1uGxksp/bopfKbLCZUNTaGioJCkrK0u1atUyx7OystSkSROzJjs72+FxZ86c0eHDh83Hh4aGKisry6GmePtiNcX7S2O322W320uMe3h4VMovwMp6XZUBc+PamB/XxvyUj7xC20Vrzp2H8pqbSzmHy35OU2RkpEJDQ7V8+XJzLDc3V+vWrVNUVJQkKSoqSjk5OcrIyDBrVqxYoaKiIrVo0cKsWbNmjcNrlmlpaapfv74CAwPNmrPPU1xTfB4AAIAKDU3Hjx/Xpk2btGnTJkl/Lv7etGmT9u3bJ5vNpqSkJL322mv67LPP9P333+vxxx9XWFiYunTpIklq2LChOnTooL59+2r9+vVau3atEhMT1b17d4WFhUmSevToIU9PT8XHx2vbtm2aPXu2xo0b5/DS2oABA7RkyRKNGTNGP/zwg4YPH66NGzcqMTGxvJ8SAADgoir05bmNGzfqnnvuMbeLg0yvXr00bdo0Pf/88zpx4oT69eunnJwctWrVSkuWLJGXl5f5mBkzZigxMVHt27eXm5ubunXrpvHjx5v7/f39tWzZMiUkJKhZs2aqUaOGhg0b5vBZTi1bttTMmTM1dOhQvfjii6pXr54WLFigRo0alcOzAAAArgYVGpratm0rwzj/2xBtNptGjBihESNGnLcmKChIM2fOvOB5br31Vn355ZcXrHnooYf00EMPXbhhAABwzXLZNU0AAACuhNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYIFLh6bCwkK9/PLLioyMlLe3t2688UaNHDlShmGYNYZhaNiwYapVq5a8vb0VHR2tn376yeE4hw8fVs+ePeXn56eAgADFx8fr+PHjDjVbtmxR69at5eXlpfDwcCUnJ5fLNQIAgKuDS4em0aNH67333tO7776rHTt2aPTo0UpOTtaECRPMmuTkZI0fP16TJ0/WunXrVLVqVcXGxur06dNmTc+ePbVt2zalpaVp4cKFWrNmjfr162fuz83NVUxMjCIiIpSRkaG33npLw4cP15QpU8r1egEAgOuqUtENXMjXX3+tBx54QHFxcZKkOnXq6JNPPtH69esl/XmXKSUlRUOHDtUDDzwgSZo+fbpCQkK0YMECde/eXTt27NCSJUu0YcMGNW/eXJI0YcIEderUSW+//bbCwsI0Y8YM5efna+rUqfL09NQtt9yiTZs2aezYsQ7hCgAAXLtcOjS1bNlSU6ZM0Y8//qibbrpJmzdv1ldffaWxY8dKknbv3q3MzExFR0ebj/H391eLFi2Unp6u7t27Kz09XQEBAWZgkqTo6Gi5ublp3bp16tq1q9LT03X33XfL09PTrImNjdXo0aN15MgRBQYGlugtLy9PeXl55nZubq4kqaCgQAUFBU5/LipK8bVUpmuqLJgb18b8uDbmp3zZ3Y2L1pw7J+U1N5dyHpcOTS+88IJyc3PVoEEDubu7q7CwUK+//rp69uwpScrMzJQkhYSEODwuJCTE3JeZmang4GCH/VWqVFFQUJBDTWRkZIljFO8rLTSNGjVKr776aonxZcuWycfHpyyX69LS0tIqugWcB3Pj2pgf18b8lI/kOy5es3jxYoft8pqbkydPWq516dA0Z84czZgxQzNnzjRfMktKSlJYWJh69epVob0NGTJEAwcONLdzc3MVHh6umJgY+fn5VWBnzlVQUKC0tDTde++98vDwqOh2cBbmxrUxP66N+SlfjYYvvWjN1uGxksp/bopfKbLCpUPT4MGD9cILL6h79+6SpMaNG2vv3r0aNWqUevXqpdDQUElSVlaWatWqZT4uKytLTZo0kSSFhoYqOzvb4bhnzpzR4cOHzceHhoYqKyvLoaZ4u7jmXHa7XXa7vcS4h4dHpfwCrKzXVRkwN66N+XFtzE/5yCu0XbTm3Hkor7m5lHO49LvnTp48KTc3xxbd3d1VVFQkSYqMjFRoaKiWL19u7s/NzdW6desUFRUlSYqKilJOTo4yMjLMmhUrVqioqEgtWrQwa9asWePwumZaWprq169f6ktzAADg2uPSoen+++/X66+/rkWLFmnPnj3673//q7Fjx6pr166SJJvNpqSkJL322mv67LPP9P333+vxxx9XWFiYunTpIklq2LChOnTooL59+2r9+vVau3atEhMT1b17d4WFhUmSevToIU9PT8XHx2vbtm2aPXu2xo0b5/DyGwAAuLa59MtzEyZM0Msvv6ynn35a2dnZCgsL01NPPaVhw4aZNc8//7xOnDihfv36KScnR61atdKSJUvk5eVl1syYMUOJiYlq37693Nzc1K1bN40fP97c7+/vr2XLlikhIUHNmjVTjRo1NGzYMD5uAAAAmFw6NPn6+iolJUUpKSnnrbHZbBoxYoRGjBhx3pqgoCDNnDnzgue69dZb9eWXX5a1VQAAUMm59MtzAAAAroLQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAVlCk2//PKLs/sAAABwaWUKTXXr1tU999yjjz/+WKdPn3Z2TwAAAC6nTKHp22+/1a233qqBAwcqNDRUTz31lNavX+/s3gAAAFxGmUJTkyZNNG7cOB08eFBTp07VoUOH1KpVKzVq1Ehjx47Vb7/95uw+AQAAKtRlLQSvUqWKHnzwQc2dO1ejR4/Wrl279Nxzzyk8PFyPP/64Dh065Kw+AQAAKtRlhaaNGzfq6aefVq1atTR27Fg999xz+vnnn5WWlqaDBw/qgQcecFafAAAAFapMoWns2LFq3LixWrZsqYMHD2r69Onau3evXnvtNUVGRqp169aaNm2avv3228tu8Ndff9Wjjz6q6tWry9vbW40bN9bGjRvN/YZhaNiwYapVq5a8vb0VHR2tn376yeEYhw8fVs+ePeXn56eAgADFx8fr+PHjDjVbtmxR69at5eXlpfDwcCUnJ1927wAAoPIoU2h677331KNHD+3du1cLFizQfffdJzc3x0MFBwfrgw8+uKzmjhw5orvuukseHh76/PPPtX37do0ZM0aBgYFmTXJyssaPH6/Jkydr3bp1qlq1qmJjYx3e1dezZ09t27ZNaWlpWrhwodasWaN+/fqZ+3NzcxUTE6OIiAhlZGTorbfe0vDhwzVlypTL6h8AAFQeVcryoHPv5JTG09NTvXr1KsvhTaNHj1Z4eLhSU1PNscjISPP/DcNQSkqKhg4dar4UOH36dIWEhGjBggXq3r27duzYoSVLlmjDhg1q3ry5JGnChAnq1KmT3n77bYWFhWnGjBnKz8/X1KlT5enpqVtuuUWbNm3S2LFjHcIVAAC4dpUpNKWmpqpatWp66KGHHMbnzp2rkydPXnZYKvbZZ58pNjZWDz30kFavXq3rrrtOTz/9tPr27StJ2r17tzIzMxUdHW0+xt/fXy1atFB6erq6d++u9PR0BQQEmIFJkqKjo+Xm5qZ169apa9euSk9P19133y1PT0+zJjY2VqNHj9aRI0cc7mwVy8vLU15enrmdm5srSSooKFBBQYFTrt8VFF9LZbqmyoK5cW3Mj2tjfsqX3d24aM25c1Jec3Mp5ylTaBo1apTef//9EuPBwcHq16+f00LTL7/8ovfee08DBw7Uiy++qA0bNuiZZ54x72JlZmZKkkJCQhweFxISYu7LzMxUcHCww/4qVaooKCjIoebsO1hnHzMzM7PU0DRq1Ci9+uqrJcaXLVsmHx+fMl6x60pLS6voFnAezI1rY35cG/NTPpLvuHjN4sWLHbbLa25OnjxpubZMoWnfvn0lQoYkRUREaN++fWU5ZKmKiorUvHlzvfHGG5Kk2267TVu3btXkyZOdFszKasiQIRo4cKC5nZubq/DwcMXExMjPz68CO3OugoICpaWl6d5775WHh0dFt4OzMDeujflxbcxP+Wo0fOlFa7YOj5VU/nNT/EqRFWUKTcHBwdqyZYvq1KnjML5582ZVr169LIcsVa1atXTzzTc7jDVs2FD/+c9/JEmhoaGSpKysLNWqVcusycrKUpMmTcya7Oxsh2OcOXNGhw8fNh8fGhqqrKwsh5ri7eKac9ntdtnt9hLjHh4elfILsLJeV2XA3Lg25se1MT/lI6/QdtGac+ehvObmUs5RpnfPPfLII3rmmWe0cuVKFRYWqrCwUCtWrNCAAQPUvXv3shyyVHfddZd27tzpMPbjjz8qIiJC0p+LwkNDQ7V8+XJzf25urtatW6eoqChJUlRUlHJycpSRkWHWrFixQkVFRWrRooVZs2bNGofXNdPS0lS/fv1SX5oDAADXnjKFppEjR6pFixZq3769vL295e3trZiYGLVr1858Kc0Znn32WX3zzTd64403tGvXLs2cOVNTpkxRQkKCJMlmsykpKUmvvfaaPvvsM33//fd6/PHHFRYWpi5dukj6885Uhw4d1LdvX61fv15r165VYmKiunfvrrCwMElSjx495Onpqfj4eG3btk2zZ8/WuHHjHF5+AwAA17YyvTzn6emp2bNna+TIkdq8ebP5oZPFd4Cc5fbbb9d///tfDRkyRCNGjFBkZKRSUlLUs2dPs+b555/XiRMn1K9fP+Xk5KhVq1ZasmSJvLy8zJoZM2YoMTFR7du3l5ubm7p166bx48eb+/39/bVs2TIlJCSoWbNmqlGjhoYNG8bHDQAAAFOZQlOxm266STfddJOzeinVfffdp/vuu++8+202m0aMGKERI0actyYoKEgzZ8684HluvfVWffnll2XuEwAAVG5lCk2FhYWaNm2ali9fruzsbBUVFTnsX7FihVOaAwAAcBVlCk0DBgzQtGnTFBcXp0aNGslmu/iqeAAAgKtZmULTrFmzNGfOHHXq1MnZ/QAAALikMr17ztPTU3Xr1nV2LwAAAC6rTKFp0KBBGjdunAzj4r9LBgAAoDIo08tzX331lVauXKnPP/9ct9xyS4lP05w/f75TmgMAAHAVZQpNAQEB6tq1q7N7AQAAcFllCk2pqanO7gMAAMCllWlNk/TnL7394osv9P777+vYsWOSpIMHD+r48eNOaw4AAMBVlOlO0969e9WhQwft27dPeXl5uvfee+Xr66vRo0crLy9PkydPdnafAAAAFapMd5oGDBig5s2b68iRI/L29jbHu3btquXLlzutOQAAAFdRpjtNX375pb7++mt5eno6jNepU0e//vqrUxoDAABwJWW601RUVKTCwsIS4wcOHJCvr+9lNwUAAOBqyhSaYmJilJKSYm7bbDYdP35cr7zyCr9aBQAAVEplenluzJgxio2N1c0336zTp0+rR48e+umnn1SjRg198sknzu4RAACgwpUpNF1//fXavHmzZs2apS1btuj48eOKj49Xz549HRaGAwAAVBZlCk2SVKVKFT366KPO7AUAAMBllSk0TZ8+/YL7H3/88TI1AwAA4KrKFJoGDBjgsF1QUKCTJ0/K09NTPj4+hCYAAFDplOndc0eOHHH4c/z4ce3cuVOtWrViITgAAKiUyvy7585Vr149vfnmmyXuQgEAAFQGTgtN0p+Lww8ePOjMQwIAALiEMq1p+uyzzxy2DcPQoUOH9O677+quu+5ySmMAAACupEyhqUuXLg7bNptNNWvWVLt27TRmzBhn9AUAAOBSyhSaioqKnN0HAACAS3PqmiYAAIDKqkx3mgYOHGi5duzYsWU5BQAAgEspU2j67rvv9N1336mgoED169eXJP34449yd3dX06ZNzTqbzeacLgEAACpYmULT/fffL19fX3344YcKDAyU9OcHXvbp00etW7fWoEGDnNokAABARSvTmqYxY8Zo1KhRZmCSpMDAQL322mu8ew4AAFRKZQpNubm5+u2330qM//bbbzp27NhlNwUAAOBqyhSaunbtqj59+mj+/Pk6cOCADhw4oP/85z+Kj4/Xgw8+6OweAQAAKlyZ1jRNnjxZzz33nHr06KGCgoI/D1SliuLj4/XWW285tUEAAABXUKbQ5OPjo0mTJumtt97Szz//LEm68cYbVbVqVac2BwAA4Cou68MtDx06pEOHDqlevXqqWrWqDMNwVl8AAAAupUx3mv744w89/PDDWrlypWw2m3766SfdcMMNio+PV2BgIO+gAwAAl6TOC4skSXZ3Q8l3SI2GL1VeoePnPe55M64iWjOV6U7Ts88+Kw8PD+3bt08+Pj7m+N/+9jctWbLEac0BAAC4ijLdaVq2bJmWLl2q66+/3mG8Xr162rt3r1MaAwAAcCVlutN04sQJhztMxQ4fPiy73X7ZTQEAALiaMoWm1q1ba/r06ea2zWZTUVGRkpOTdc899zitOQAAAFdRppfnkpOT1b59e23cuFH5+fl6/vnntW3bNh0+fFhr1651do8AAAAVrkx3mho1aqQff/xRrVq10gMPPKATJ07owQcf1Hfffacbb7zR2T0CAABUuEu+01RQUKAOHTpo8uTJeumll65ETwAAAC7nku80eXh4aMuWLVeiFwAAAJdVppfnHn30UX3wwQfO7gUAAMBllWkh+JkzZzR16lR98cUXatasWYnfOTd27FinNAcAAOAqLik0/fLLL6pTp462bt2qpk2bSpJ+/PFHhxqbzVbaQwEAAK5qlxSa6tWrp0OHDmnlypWS/vy1KePHj1dISMgVaQ4AAMBVXNKaJsMwHLY///xznThxwqkNAQAAuKIyLQQvdm6IAgAAqKwuKTTZbLYSa5ZYwwQAAK4Fl7SmyTAM9e7d2/ylvKdPn1b//v1LvHtu/vz5zusQAADABVxSaOrVq5fD9qOPPurUZgAAAFzVJYWm1NTUK9UHAACAS7usheAAAADXCkITAACABYQmAAAACwhNAAAAFlxVoenNN9+UzWZTUlKSOXb69GklJCSoevXqqlatmrp166asrCyHx+3bt09xcXHy8fFRcHCwBg8erDNnzjjUrFq1Sk2bNpXdblfdunU1bdq0crgiAABwtbhqQtOGDRv0/vvv69Zbb3UYf/bZZ/W///1Pc+fO1erVq3Xw4EE9+OCD5v7CwkLFxcUpPz9fX3/9tT788ENNmzZNw4YNM2t2796tuLg43XPPPdq0aZOSkpL05JNPaunSpeV2fQAAwLVdFaHp+PHj6tmzp/71r38pMDDQHD969Kg++OADjR07Vu3atVOzZs2Umpqqr7/+Wt98840kadmyZdq+fbs+/vhjNWnSRB07dtTIkSM1ceJE5efnS5ImT56syMhIjRkzRg0bNlRiYqL++te/6p133qmQ6wUAAK7nqghNCQkJiouLU3R0tMN4RkaGCgoKHMYbNGig2rVrKz09XZKUnp6uxo0bKyQkxKyJjY1Vbm6utm3bZtace+zY2FjzGAAAAJf04ZYVYdasWfr222+1YcOGEvsyMzPl6empgIAAh/GQkBBlZmaaNWcHpuL9xfsuVJObm6tTp07J29u7xLnz8vKUl5dnbufm5kqSCgoKVFBQcIlX6bqKr6UyXVNlwdy4NubHtTE/5cvublivdTMc/nu2KzFfl3JMlw5N+/fv14ABA5SWliYvL6+KbsfBqFGj9Oqrr5YYX7ZsmXx8fCqgoysrLS2tolvAeTA3ro35cW3MT/lIvuPSHzOyeVGJscWLFzuhG0cnT560XOvSoSkjI0PZ2dlq2rSpOVZYWKg1a9bo3Xff1dKlS5Wfn6+cnByHu01ZWVkKDQ2VJIWGhmr9+vUOxy1+d93ZNee+4y4rK0t+fn6l3mWSpCFDhmjgwIHmdm5ursLDwxUTEyM/P7+yX7SLKSgoUFpamu699155eHhUdDs4C3Pj2pgf18b8lK9Gw62/scruZmhk8yK9vNFNeUU2h31bh8c6uzXzlSIrXDo0tW/fXt9//73DWJ8+fdSgQQP985//VHh4uDw8PLR8+XJ169ZNkrRz507t27dPUVFRkqSoqCi9/vrrys7OVnBwsKQ//2Xh5+enm2++2aw5N72mpaWZxyiN3W6X3W4vMe7h4VEpvwAr63VVBsyNa2N+XBvzUz7yCm0XLzr3MUW2Eo+7EnN1Kcd06dDk6+urRo0aOYxVrVpV1atXN8fj4+M1cOBABQUFyc/PT//4xz8UFRWlO++8U5IUExOjm2++WY899piSk5OVmZmpoUOHKiEhwQw9/fv317vvvqvnn39eTzzxhFasWKE5c+Zo0aJF5XvBAADAZbl0aLLinXfekZubm7p166a8vDzFxsZq0qRJ5n53d3ctXLhQf//73xUVFaWqVauqV69eGjFihFkTGRmpRYsW6dlnn9W4ceN0/fXX69///rdiY51/GxAAAFydrrrQtGrVKodtLy8vTZw4URMnTjzvYyIiIi66eKxt27b67rvvnNEiAACohK6Kz2kCAACoaIQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMCCKhXdAAAArqrOC4suWrPnzbhy6ASugDtNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGBBlYpuAACAitJo+FLlFdoqug1cJbjTBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAUuHZpGjRql22+/Xb6+vgoODlaXLl20c+dOh5rTp08rISFB1atXV7Vq1dStWzdlZWU51Ozbt09xcXHy8fFRcHCwBg8erDNnzjjUrFq1Sk2bNpXdblfdunU1bdq0K315AADgKuLSoWn16tVKSEjQN998o7S0NBUUFCgmJkYnTpwwa5599ln973//09y5c7V69WodPHhQDz74oLm/sLBQcXFxys/P19dff60PP/xQ06ZN07Bhw8ya3bt3Ky4uTvfcc482bdqkpKQkPfnkk1q6dGm5Xi8AAHBdVSq6gQtZsmSJw/a0adMUHBysjIwM3X333Tp69Kg++OADzZw5U+3atZMkpaamqmHDhvrmm2905513atmyZdq+fbu++OILhYSEqEmTJho5cqT++c9/avjw4fL09NTkyZMVGRmpMWPGSJIaNmyor776Su+8845iY2PL/boBAIDrcek7Tec6evSoJCkoKEiSlJGRoYKCAkVHR5s1DRo0UO3atZWeni5JSk9PV+PGjRUSEmLWxMbGKjc3V9u2bTNrzj5GcU3xMQAAAFz6TtPZioqKlJSUpLvuukuNGjWSJGVmZsrT01MBAQEOtSEhIcrMzDRrzg5MxfuL912oJjc3V6dOnZK3t3eJfvLy8pSXl2du5+bmSpIKCgpUUFBwGVfqWoqvpTJdU2XB3Lg25se1Fc+L3c1w2rFwfnZ3689z8ZyUNjdX4rm+lGNeNaEpISFBW7du1VdffVXRrUj6c5H6q6++WmJ82bJl8vHxqYCOrqy0tLSKbgHnwdy4NubHtY1sXnTZx1i8eLETOqncku+49MeUNjdX4rk+efKk5dqrIjQlJiZq4cKFWrNmja6//npzPDQ0VPn5+crJyXG425SVlaXQ0FCzZv369Q7HK3533dk1577jLisrS35+fqXeZZKkIUOGaODAgeZ2bm6uwsPDFRMTIz8/v7JfrIspKChQWlqa7r33Xnl4eFR0OzgLc+PamB/XVjw/L290U16R7bKOtXU4a18vptFw62+ssrsZGtm8qNS5uRLPdfErRVa4dGgyDEP/+Mc/9N///lerVq1SZGSkw/5mzZrJw8NDy5cvV7du3SRJO3fu1L59+xQVFSVJioqK0uuvv67s7GwFBwdL+vNffn5+frr55pvNmnPTa1pamnmM0tjtdtnt9hLjHh4elfIbZGW9rsqAuXFtzI9ryyuyKa/w8kIT83txZXmOS5ubK/FcX8oxXTo0JSQkaObMmfr000/l6+trrkHy9/eXt7e3/P39FR8fr4EDByooKEh+fn76xz/+oaioKN15552SpJiYGN1888167LHHlJycrMzMTA0dOlQJCQlm6Onfv7/effddPf/883riiSe0YsUKzZkzR4sWLaqwawcAAK7Fpd8999577+no0aNq27atatWqZf6ZPXu2WfPOO+/ovvvuU7du3XT33XcrNDRU8+fPN/e7u7tr4cKFcnd3V1RUlB599FE9/vjjGjFihFkTGRmpRYsWKS0tTX/5y180ZswY/fvf/+bjBgAAgMml7zQZxsVX23t5eWnixImaOHHieWsiIiIuunisbdu2+u677y65RwAAcG1w6TtNAAAAroLQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACxw6c9pAgAArq3OC9fOb8/gThMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgQZWKbgAAADhPnRcWWarb82bcFe6k8uFOEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYUKWiGwAAAOWvzguLLlqz5824cujk6kFoAgDgKmEl6ODKITQBQCXFnQTAuQhNAAC4AFe8i+SKPVUkQhOuGfyrG7h2XOzr3e5uKPmOcmoGlQbvnjvHxIkTVadOHXl5ealFixZav359RbcEAABcAHeazjJ79mwNHDhQkydPVosWLZSSkqLY2Fjt3LlTwcHBFd0eAFzVuNuLqx2h6Sxjx45V37591adPH0nS5MmTtWjRIk2dOlUvvPBCBXcHoCJV1h/4rrZmxdX6saKy/t1ASYSm/5Ofn6+MjAwNGTLEHHNzc1N0dLTS09MrsDMAl8PVfgg76wesq10XLoz5qhwITf/n999/V2FhoUJCQhzGQ0JC9MMPP5Soz8vLU15enrl99OhRSdLhw4dVUFBwZZstRwUFBTp58qT++OMPeXh4VHQ7l6XKmRMXrfnjjz/KoRPnqIi5aTFq+UVr1g1pXw6d/MlKP+X5Ta7uc3PM/7e7GRp6W5GavDRfeUW2S+rn7OOcD9+8L0+VIkMnTxapSoGbCs+aH1S8C83NlfgefezYMUmSYRgX783pZ79GjBo1Sq+++mqJ8cjIyAroBs5SY0xFd3D14zn8/3pUdAO4IObHdZ1vbq7k95djx47J39//gjWEpv9To0YNubu7Kysry2E8KytLoaGhJeqHDBmigQMHmttFRUU6fPiwqlevLput8vyrJTc3V+Hh4dq/f7/8/Pwquh2chblxbcyPa2N+XFd5z41hGDp27JjCwsIuWkto+j+enp5q1qyZli9fri5dukj6MwgtX75ciYmJJertdrvsdrvDWEBAQDl0WjH8/Pz4xuKimBvXxvy4NubHdZXn3FzsDlMxQtNZBg4cqF69eql58+a64447lJKSohMnTpjvpgMAANcuQtNZ/va3v+m3337TsGHDlJmZqSZNmmjJkiUlFocDAIBrD6HpHImJiaW+HHetstvteuWVV0q8FImKx9y4NubHtTE/rsuV58ZmWHmPHQAAwDWO3z0HAABgAaEJAADAAkITAACABYQmAAAACwhN17BFixapRYsW8vb2VmBgoPmhnsX27dunuLg4+fj4KDg4WIMHD9aZM2ccalatWqWmTZvKbrerbt26mjZtWonzTJw4UXXq1JGXl5datGih9evXX8Grqlzy8vLUpEkT2Ww2bdq0yWHfli1b1Lp1a3l5eSk8PFzJycklHj937lw1aNBAXl5eaty4sRYvXuyw3zAMDRs2TLVq1ZK3t7eio6P1008/XclLuqrt2bNH8fHxioyMlLe3t2688Ua98sorys/Pd6hjblwb35OuvFGjRun222+Xr6+vgoOD1aVLF+3cudOh5vTp00pISFD16tVVrVo1devWrcRv5XDWzyGnMXBNmjdvnhEYGGi89957xs6dO41t27YZs2fPNvefOXPGaNSokREdHW189913xuLFi40aNWoYQ4YMMWt++eUXw8fHxxg4cKCxfft2Y8KECYa7u7uxZMkSs2bWrFmGp6enMXXqVGPbtm1G3759jYCAACMrK6tcr/dq9cwzzxgdO3Y0JBnfffedOX706FEjJCTE6Nmzp7F161bjk08+Mby9vY3333/frFm7dq3h7u5uJCcnG9u3bzeGDh1qeHh4GN9//71Z8+abbxr+/v7GggULjM2bNxudO3c2IiMjjVOnTpXnZV41Pv/8c6N3797G0qVLjZ9//tn49NNPjeDgYGPQoEFmDXPj2vieVD5iY2ON1NRUY+vWrcamTZuMTp06GbVr1zaOHz9u1vTv398IDw83li9fbmzcuNG48847jZYtW5r7nfVzyJkITdeggoIC47rrrjP+/e9/n7dm8eLFhpubm5GZmWmOvffee4afn5+Rl5dnGIZhPP/888Ytt9zi8Li//e1vRmxsrLl9xx13GAkJCeZ2YWGhERYWZowaNcpZl1NpLV682GjQoIGxbdu2EqFp0qRJRmBgoDkXhmEY//znP4369eub2w8//LARFxfncMwWLVoYTz31lGEYhlFUVGSEhoYab731lrk/JyfHsNvtxieffHKFrqrySU5ONiIjI81t5sa18T2pYmRnZxuSjNWrVxuG8effZw8PD2Pu3LlmzY4dOwxJRnp6umEYzvs55Ey8PHcN+vbbb/Xrr7/Kzc1Nt912m2rVqqWOHTtq69atZk16eroaN27s8GnosbGxys3N1bZt28ya6Ohoh2PHxsYqPT1dkpSfn6+MjAyHGjc3N0VHR5s1KF1WVpb69u2rjz76SD4+PiX2p6en6+6775anp6c5Fhsbq507d+rIkSNmzYXmZ/fu3crMzHSo8ff3V4sWLZifS3D06FEFBQWZ28yN6+J7UsU5evSoJJlfKxkZGSooKHCYiwYNGqh27drmXDjj55CzEZquQb/88oskafjw4Ro6dKgWLlyowMBAtW3bVocPH5YkZWZmlvj1McXbmZmZF6zJzc3VqVOn9Pvvv6uwsLDUmuJjoCTDMNS7d2/1799fzZs3L7Xmcubn7P1nP660GlzYrl27NGHCBD311FPmGHPjuvieVDGKioqUlJSku+66S40aNZL0599xT0/PEr/o/tyvg8v9OeRshKZK5IUXXpDNZrvgnx9++EFFRUWSpJdeekndunVTs2bNlJqaKpvNprlz51bwVVReVudnwoQJOnbsmIYMGVLRLV8zrM7N2X799Vd16NBBDz30kPr27VtBnQOuLyEhQVu3btWsWbMqupXLxu+eq0QGDRqk3r17X7Dmhhtu0KFDhyRJN998szlut9t1ww03aN++fZKk0NDQEu8oKX5XQ2hoqPnfc9/pkJWVJT8/P3l7e8vd3V3u7u6l1hQf41pidX5WrFih9PT0Er93qXnz5urZs6c+/PDD8z730sXn5+z9xWO1atVyqGnSpMklX9/VzOrcFDt48KDuuecetWzZUlOmTHGoY25cV40aNfieVM4SExO1cOFCrVmzRtdff705Hhoaqvz8fOXk5DjcbTr36+Byfw453RVZKQWXdvToUcNutzssBM/PzzeCg4PNd/gUL8A7+x0l77//vuHn52ecPn3aMIw/F+A1atTI4diPPPJIiYXgiYmJ5nZhYaFx3XXXsejyAvbu3Wt8//335p+lS5cakox58+YZ+/fvNwzj/y82zs/PNx83ZMiQEouN77vvPodjR0VFlVhs/Pbbb5v7i/9usNj4/A4cOGDUq1fP6N69u3HmzJkS+5kb18b3pPJRVFRkJCQkGGFhYcaPP/5YYn/xQvB58+aZYz/88EOpC8Ev9+eQMxGarlEDBgwwrrvuOmPp0qXGDz/8YMTHxxvBwcHG4cOHDcP4/2/1jImJMTZt2mQsWbLEqFmzZqlv9Rw8eLCxY8cOY+LEiaV+5IDdbjemTZtmbN++3ejXr58REBDg8G4IXNju3btLvHsuJyfHCAkJMR577DFj69atxqxZswwfH58Sb2uvUqWK8fbbbxs7duwwXnnllVLf1h4QEGB8+umnxpYtW4wHHniAt7VfwIEDB4y6desa7du3Nw4cOGAcOnTI/FOMuXFtfE8qH3//+98Nf39/Y9WqVQ5fJydPnjRr+vfvb9SuXdtYsWKFsXHjRiMqKsqIiooy9zvr55AzEZquUfn5+cagQYOM4OBgw9fX14iOjja2bt3qULNnzx6jY8eOhre3t1GjRg1j0KBBRkFBgUPNypUrjSZNmhienp7GDTfcYKSmppY414QJE4zatWsbnp6exh133GF88803V/LSKp3SQpNhGMbmzZuNVq1aGXa73bjuuuuMN998s8Rj58yZY9x0002Gp6enccsttxiLFi1y2F9UVGS8/PLLRkhIiGG324327dsbO3fuvJKXc1VLTU01JJX652zMjWvje9KVd76vk7N/Rpw6dcp4+umnjcDAQMPHx8fo2rWrwz9ADMN5P4ecxfZ/FwcAAIAL4N1zAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgDXhLZt2yopKemCNXXq1FFKSkq59APg6kNoAnBV6t27t2w2m/r3719iX0JCgmw2m8Mv4Z0/f75GjhxZ5vPFx8ercePGys/PdxhfvHixPD099e2335b52ACuDoQmAFet8PBwzZo1S6dOnTLHTp8+rZkzZ6p27doOtUFBQfL19S3zud555x0dO3ZMr7zyijmWk5Ojvn376uWXX1bTpk3LfOzzKSgocPoxAZQdoQnAVatp06YKDw/X/PnzzbH58+erdu3auu222xxqz315Ljs7W/fff7+8vb0VGRmpGTNmXPBcfn5+Sk1N1ZgxY7Ru3TpJUlJSkq677joNGTJE+/fv18MPP6yAgAAFBQXpgQce0J49e8zHb9iwQffee69q1Kghf39/tWnTpsTdKZvNpvfee0+dO3dW1apV9frrr5fxmQFwJRCaAFzVnnjiCaWmpprbU6dOVZ8+fS76uN69e2v//v1auXKl5s2bp0mTJik7O/uCj7nnnnv09NNPq1evXpo7d67mzJmj6dOnyzAMxcbGytfXV19++aXWrl2ratWqqUOHDubLeceOHVOvXr301Vdf6ZtvvlG9evXUqVMnHTt2zOEcw4cPV9euXfX999/riSeeKMMzAuBKqVLRDQDA5Xj00Uc1ZMgQ7d27V5K0du1azZo1S6tWrTrvY3788Ud9/vnnWr9+vW6//XZJ0gcffKCGDRte9HyjRo3SkiVL1L17d40ZM0YNGjTQxx9/rKKiIv373/+WzWaTJKWmpiogIECrVq1STEyM2rVr53CcKVOmKCAgQKtXr9Z9991njvfo0cNS6ANQ/rjTBOCqVrNmTcXFxWnatGlKTU1VXFycatSoccHH7NixQ1WqVFGzZs3MsQYNGiggIOCi5/P29tZzzz0nHx8fDRgwQJK0efNm7dq1S76+vqpWrZqqVaumoKAgnT59Wj///LMkKSsrS3379lW9evXk7+8vPz8/HT9+XPv27XM4fvPmzS/xGQBQXrjTBOCq98QTTygxMVGSNHHixCt+vipVqsjd3d28q3T8+HE1a9as1HVRNWvWlCT16tVLf/zxh8aNG6eIiAjZ7XZFRUWVeDde1apVr3j/AMqG0ATgqle8dshmsyk2Nvai9Q0aNNCZM2eUkZFhvjy3c+dO5eTklOn8TZs21ezZsxUcHCw/P79Sa9auXatJkyapU6dOkqT9+/fr999/L9P5AFQMXp4DcNVzd3fXjh07tH37drm7u1+0vn79+urQoYOeeuoprVu3ThkZGXryySfl7e1dpvP37NlTNWrU0AMPPKAvv/xSu3fv1qpVq/TMM8/owIEDkqR69erpo48+0o4dO7Ru3Tr17NmzzOcDUDEITQAqBT8/v/Pe5SlNamqqwsLC1KZNGz344IPq16+fgoODy3RuHx8frVmzRrVr19aDDz6ohg0bKj4+XqdPnzZ7+uCDD3TkyBE1bdpUjz32mJ555pkynw9AxbAZhmFUdBMAAACujjtNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALDg/wGKLN6uKdIJRwAAAABJRU5ErkJggg==",
1341
+ "text/plain": [
1342
+ "<Figure size 640x480 with 1 Axes>"
1343
+ ]
1344
+ },
1345
+ "metadata": {},
1346
+ "output_type": "display_data"
1347
+ }
1348
+ ],
1349
+ "source": [
1350
+ "year_df[\"mid_year\"].hist(bins=50)\n",
1351
+ "plt.xlabel(\"Mid Year\")\n",
1352
+ "plt.ylabel(\"Frequency\")\n",
1353
+ "plt.title(\"Distribution of Mid Year\")\n",
1354
+ "plt.show()"
1355
+ ]
1356
+ },
1357
+ {
1358
+ "cell_type": "markdown",
1359
+ "metadata": {},
1360
+ "source": [
1361
+ "### Object name"
1362
+ ]
1363
+ },
1364
+ {
1365
+ "cell_type": "code",
1366
+ "execution_count": 106,
1367
+ "metadata": {},
1368
+ "outputs": [
1369
+ {
1370
+ "data": {
1371
+ "text/plain": [
1372
+ "object_name\n",
1373
+ "sherds 5068\n",
1374
+ "photographs 4729\n",
1375
+ "coins 4609\n",
1376
+ "amulets 2485\n",
1377
+ "Woodblock Print 1386\n",
1378
+ "figures 1316\n",
1379
+ "vessels 1165\n",
1380
+ "bowls 807\n",
1381
+ "Papercut 731\n",
1382
+ "pages 647\n",
1383
+ "Slide 633\n",
1384
+ "jars 566\n",
1385
+ "Seal 554\n",
1386
+ "postcards 541\n",
1387
+ "vases 518\n",
1388
+ "Name: count, dtype: int64"
1389
+ ]
1390
+ },
1391
+ "execution_count": 106,
1392
+ "metadata": {},
1393
+ "output_type": "execute_result"
1394
+ }
1395
+ ],
1396
+ "source": [
1397
+ "full_df[\"object_name\"].value_counts()[:15]"
1398
+ ]
1399
+ },
1400
+ {
1401
+ "cell_type": "markdown",
1402
+ "metadata": {},
1403
+ "source": [
1404
+ "### Material"
1405
+ ]
1406
+ },
1407
+ {
1408
+ "cell_type": "code",
1409
+ "execution_count": 104,
1410
+ "metadata": {},
1411
+ "outputs": [
1412
+ {
1413
+ "data": {
1414
+ "text/plain": [
1415
+ "material\n",
1416
+ "pottery 8844\n",
1417
+ "paper 5822\n",
1418
+ "metal 3739\n",
1419
+ "photographic paper 3396\n",
1420
+ "faience 2961\n",
1421
+ " ... \n",
1422
+ "Shell (Ostrich Egg) 1\n",
1423
+ "serpentinite 1\n",
1424
+ "balsa 1\n",
1425
+ "maple 1\n",
1426
+ "fabric art 1\n",
1427
+ "Name: count, Length: 414, dtype: int64"
1428
+ ]
1429
+ },
1430
+ "execution_count": 104,
1431
+ "metadata": {},
1432
+ "output_type": "execute_result"
1433
+ }
1434
+ ],
1435
+ "source": [
1436
+ "full_df[\"material\"].value_counts()"
1437
+ ]
1438
+ },
1439
+ {
1440
+ "cell_type": "code",
1441
+ "execution_count": null,
1442
+ "metadata": {},
1443
+ "outputs": [],
1444
+ "source": []
1445
+ }
1446
+ ],
1447
+ "metadata": {
1448
+ "kernelspec": {
1449
+ "display_name": "ArtifactClassification",
1450
+ "language": "python",
1451
+ "name": "python3"
1452
+ },
1453
+ "language_info": {
1454
+ "codemirror_mode": {
1455
+ "name": "ipython",
1456
+ "version": 3
1457
+ },
1458
+ "file_extension": ".py",
1459
+ "mimetype": "text/x-python",
1460
+ "name": "python",
1461
+ "nbconvert_exporter": "python",
1462
+ "pygments_lexer": "ipython3",
1463
+ "version": "3.10.12"
1464
+ }
1465
+ },
1466
+ "nbformat": 4,
1467
+ "nbformat_minor": 2
1468
+ }
3.0-efficientnet_example.ipynb ADDED
@@ -0,0 +1,1062 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from PIL import Image\n",
10
+ "import pandas as pd\n",
11
+ "import os\n",
12
+ "from datasets import Dataset, Image, DatasetDict\n",
13
+ "from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor\n",
14
+ "from transformers import (\n",
15
+ " AutoImageProcessor,\n",
16
+ " AutoModelForImageClassification,\n",
17
+ " TrainingArguments,\n",
18
+ " Trainer,\n",
19
+ " DefaultDataCollator,\n",
20
+ ")\n",
21
+ "import evaluate\n",
22
+ "import numpy as np"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "markdown",
27
+ "metadata": {},
28
+ "source": [
29
+ "### Load data"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 2,
35
+ "metadata": {},
36
+ "outputs": [],
37
+ "source": [
38
+ "file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n",
39
+ "file2obj[\"image\"] = file2obj.apply(lambda x: os.path.join(\"..\", x[\"root\"], x[\"file\"]), axis=1)\n",
40
+ "file2obj.rename(columns={\"obj_num\": \"label\"}, inplace=True)\n",
41
+ "\n",
42
+ "# Group by 'obj_num' and count occurrences\n",
43
+ "obj_num_counts = file2obj[\"label\"].value_counts()\n",
44
+ "\n",
45
+ "# Filter rows where 'obj_num' appears more than twice\n",
46
+ "file2obj_3 = file2obj[file2obj[\"label\"].isin(obj_num_counts[obj_num_counts > 2].index)]"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "markdown",
51
+ "metadata": {},
52
+ "source": [
53
+ "### Form HF dataset"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 3,
59
+ "metadata": {},
60
+ "outputs": [
61
+ {
62
+ "data": {
63
+ "application/vnd.jupyter.widget-view+json": {
64
+ "model_id": "59370086a1b64dc5842d9becd9019aad",
65
+ "version_major": 2,
66
+ "version_minor": 0
67
+ },
68
+ "text/plain": [
69
+ "Casting to class labels: 0%| | 0/25725 [00:00<?, ? examples/s]"
70
+ ]
71
+ },
72
+ "metadata": {},
73
+ "output_type": "display_data"
74
+ }
75
+ ],
76
+ "source": [
77
+ "ds = Dataset.from_pandas(file2obj_3[[\"image\", \"label\"]], preserve_index=False).cast_column(\n",
78
+ " \"image\", Image()\n",
79
+ ")\n",
80
+ "ds = ds.class_encode_column(\"label\")\n",
81
+ "trainval_test = ds.train_test_split(stratify_by_column=\"label\", test_size=0.16)\n",
82
+ "train_val = trainval_test[\"train\"].train_test_split(stratify_by_column=\"label\", test_size=16 / 84)\n",
83
+ "ds = DatasetDict(\n",
84
+ " {\"train\": train_val[\"train\"], \"valid\": train_val[\"test\"], \"test\": trainval_test[\"test\"]}\n",
85
+ ")"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "markdown",
90
+ "metadata": {},
91
+ "source": [
92
+ "### Transform data"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": 4,
98
+ "metadata": {},
99
+ "outputs": [],
100
+ "source": [
101
+ "checkpoint = \"google/efficientnet-b3\"\n",
102
+ "image_processor = AutoImageProcessor.from_pretrained(checkpoint)\n",
103
+ "\n",
104
+ "\n",
105
+ "normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)\n",
106
+ "size = (\n",
107
+ " image_processor.size[\"shortest_edge\"]\n",
108
+ " if \"shortest_edge\" in image_processor.size\n",
109
+ " else (image_processor.size[\"height\"], image_processor.size[\"width\"])\n",
110
+ ")\n",
111
+ "_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])\n",
112
+ "\n",
113
+ "\n",
114
+ "def transforms(examples):\n",
115
+ " examples[\"pixel_values\"] = [_transforms(img.convert(\"RGB\")) for img in examples[\"image\"]]\n",
116
+ " del examples[\"image\"]\n",
117
+ " return examples\n",
118
+ "\n",
119
+ "\n",
120
+ "ds = ds.with_transform(transforms)"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "markdown",
125
+ "metadata": {},
126
+ "source": [
127
+ "### Set up model and metrics"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 5,
133
+ "metadata": {},
134
+ "outputs": [
135
+ {
136
+ "name": "stderr",
137
+ "output_type": "stream",
138
+ "text": [
139
+ "Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:\n",
140
+ "- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([3872, 1536]) in the model instantiated\n",
141
+ "- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3872]) in the model instantiated\n",
142
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
143
+ ]
144
+ }
145
+ ],
146
+ "source": [
147
+ "labels = ds[\"train\"].features[\"label\"].names\n",
148
+ "model = AutoModelForImageClassification.from_pretrained(\n",
149
+ " checkpoint,\n",
150
+ " num_labels=len(labels),\n",
151
+ " id2label={str(i): c for i, c in enumerate(labels)},\n",
152
+ " label2id={c: str(i) for i, c in enumerate(labels)},\n",
153
+ " ignore_mismatched_sizes=True,\n",
154
+ ")\n",
155
+ "\n",
156
+ "data_collator = DefaultDataCollator()\n",
157
+ "\n",
158
+ "accuracy = evaluate.load(\"accuracy\")\n",
159
+ "\n",
160
+ "\n",
161
+ "def compute_metrics(eval_pred):\n",
162
+ " predictions, labels = eval_pred\n",
163
+ " predictions = np.argmax(predictions, axis=1)\n",
164
+ " return accuracy.compute(predictions=predictions, references=labels)"
165
+ ]
166
+ },
167
+ {
168
+ "cell_type": "markdown",
169
+ "metadata": {},
170
+ "source": [
171
+ "### Train model"
172
+ ]
173
+ },
174
+ {
175
+ "cell_type": "code",
176
+ "execution_count": 10,
177
+ "metadata": {},
178
+ "outputs": [
179
+ {
180
+ "name": "stderr",
181
+ "output_type": "stream",
182
+ "text": [
183
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Serializing object of type dict that is 147552 bytes\n",
184
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Serializing object of type dict that is 147552 bytes\n"
185
+ ]
186
+ },
187
+ {
188
+ "data": {
189
+ "application/vnd.jupyter.widget-view+json": {
190
+ "model_id": "cb34337db5584dfbbc4a76bb7e724b26",
191
+ "version_major": 2,
192
+ "version_minor": 0
193
+ },
194
+ "text/plain": [
195
+ " 0%| | 0/274 [00:00<?, ?it/s]"
196
+ ]
197
+ },
198
+ "metadata": {},
199
+ "output_type": "display_data"
200
+ },
201
+ {
202
+ "name": "stdout",
203
+ "output_type": "stream",
204
+ "text": [
205
+ "{'loss': 8.0521, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.04}\n",
206
+ "{'loss': 8.0927, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.07}\n",
207
+ "{'loss': 8.1187, 'learning_rate': 4.959349593495935e-05, 'epoch': 0.11}\n",
208
+ "{'loss': 8.2335, 'learning_rate': 4.75609756097561e-05, 'epoch': 0.15}\n",
209
+ "{'loss': 8.2531, 'learning_rate': 4.5528455284552844e-05, 'epoch': 0.18}\n",
210
+ "{'loss': 8.2873, 'learning_rate': 4.3495934959349595e-05, 'epoch': 0.22}\n",
211
+ "{'loss': 8.2071, 'learning_rate': 4.146341463414634e-05, 'epoch': 0.26}\n",
212
+ "{'loss': 8.2287, 'learning_rate': 3.943089430894309e-05, 'epoch': 0.29}\n",
213
+ "{'loss': 8.1928, 'learning_rate': 3.739837398373984e-05, 'epoch': 0.33}\n",
214
+ "{'loss': 8.2053, 'learning_rate': 3.5365853658536584e-05, 'epoch': 0.36}\n",
215
+ "{'loss': 8.1621, 'learning_rate': 3.3333333333333335e-05, 'epoch': 0.4}\n",
216
+ "{'loss': 8.1731, 'learning_rate': 3.130081300813008e-05, 'epoch': 0.44}\n",
217
+ "{'loss': 8.1447, 'learning_rate': 2.926829268292683e-05, 'epoch': 0.47}\n",
218
+ "{'loss': 8.1161, 'learning_rate': 2.7235772357723577e-05, 'epoch': 0.51}\n",
219
+ "{'loss': 8.1081, 'learning_rate': 2.5203252032520324e-05, 'epoch': 0.55}\n",
220
+ "{'loss': 8.0828, 'learning_rate': 2.3170731707317075e-05, 'epoch': 0.58}\n",
221
+ "{'loss': 8.1312, 'learning_rate': 2.1138211382113822e-05, 'epoch': 0.62}\n",
222
+ "{'loss': 8.1627, 'learning_rate': 1.9105691056910573e-05, 'epoch': 0.66}\n",
223
+ "{'loss': 8.0692, 'learning_rate': 1.707317073170732e-05, 'epoch': 0.69}\n",
224
+ "{'loss': 7.9943, 'learning_rate': 1.5040650406504067e-05, 'epoch': 0.73}\n",
225
+ "{'loss': 8.0139, 'learning_rate': 1.3008130081300815e-05, 'epoch': 0.77}\n",
226
+ "{'loss': 8.027, 'learning_rate': 1.0975609756097562e-05, 'epoch': 0.8}\n",
227
+ "{'loss': 8.0628, 'learning_rate': 8.94308943089431e-06, 'epoch': 0.84}\n",
228
+ "{'loss': 8.1264, 'learning_rate': 6.910569105691057e-06, 'epoch': 0.88}\n",
229
+ "{'loss': 8.0408, 'learning_rate': 4.8780487804878055e-06, 'epoch': 0.91}\n",
230
+ "{'loss': 8.071, 'learning_rate': 2.8455284552845528e-06, 'epoch': 0.95}\n",
231
+ "{'loss': 8.1237, 'learning_rate': 8.130081300813009e-07, 'epoch': 0.99}\n"
232
+ ]
233
+ },
234
+ {
235
+ "data": {
236
+ "application/vnd.jupyter.widget-view+json": {
237
+ "model_id": "5915cfa41d474a399ce0f53bc8f6f947",
238
+ "version_major": 2,
239
+ "version_minor": 0
240
+ },
241
+ "text/plain": [
242
+ " 0%| | 0/65 [00:00<?, ?it/s]"
243
+ ]
244
+ },
245
+ "metadata": {},
246
+ "output_type": "display_data"
247
+ },
248
+ {
249
+ "name": "stdout",
250
+ "output_type": "stream",
251
+ "text": [
252
+ "{'eval_loss': 8.02699089050293, 'eval_accuracy': 0.02575315840621963, 'eval_runtime': 25.2001, 'eval_samples_per_second': 163.333, 'eval_steps_per_second': 2.579, 'epoch': 1.0}\n",
253
+ "{'train_runtime': 236.2359, 'train_samples_per_second': 74.049, 'train_steps_per_second': 1.16, 'train_loss': 8.129460439194728, 'epoch': 1.0}\n"
254
+ ]
255
+ },
256
+ {
257
+ "data": {
258
+ "text/plain": [
259
+ "TrainOutput(global_step=274, training_loss=8.129460439194728, metrics={'train_runtime': 236.2359, 'train_samples_per_second': 74.049, 'train_steps_per_second': 1.16, 'train_loss': 8.129460439194728, 'epoch': 1.0})"
260
+ ]
261
+ },
262
+ "execution_count": 10,
263
+ "metadata": {},
264
+ "output_type": "execute_result"
265
+ }
266
+ ],
267
+ "source": [
268
+ "training_args = TrainingArguments(\n",
269
+ " output_dir=\"../models/test\",\n",
270
+ " remove_unused_columns=False,\n",
271
+ " evaluation_strategy=\"epoch\",\n",
272
+ " save_strategy=\"epoch\",\n",
273
+ " learning_rate=5e-5,\n",
274
+ " per_device_train_batch_size=64,\n",
275
+ " # gradient_accumulation_steps=2,\n",
276
+ " per_device_eval_batch_size=64,\n",
277
+ " num_train_epochs=1,\n",
278
+ " warmup_ratio=0.1,\n",
279
+ " logging_steps=10,\n",
280
+ " load_best_model_at_end=True,\n",
281
+ " metric_for_best_model=\"accuracy\",\n",
282
+ " push_to_hub=False,\n",
283
+ "\n",
284
+ ")\n",
285
+ "\n",
286
+ "trainer = Trainer(\n",
287
+ " model=model,\n",
288
+ " args=training_args,\n",
289
+ " train_dataset=ds[\"train\"], # .select(range(100)),\n",
290
+ " eval_dataset=ds[\"valid\"], # .select(range(100)),\n",
291
+ " tokenizer=image_processor,\n",
292
+ " compute_metrics=compute_metrics,\n",
293
+ " data_collator=data_collator,\n",
294
+ ")\n",
295
+ "\n",
296
+ "trainer.train()"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "markdown",
301
+ "metadata": {},
302
+ "source": [
303
+ "### Evaluation"
304
+ ]
305
+ },
306
+ {
307
+ "cell_type": "code",
308
+ "execution_count": 7,
309
+ "metadata": {},
310
+ "outputs": [
311
+ {
312
+ "data": {
313
+ "application/vnd.jupyter.widget-view+json": {
314
+ "model_id": "4979f1d5536f4a3e97ecbc36c7eebbfa",
315
+ "version_major": 2,
316
+ "version_minor": 0
317
+ },
318
+ "text/plain": [
319
+ " 0%| | 0/7 [00:00<?, ?it/s]"
320
+ ]
321
+ },
322
+ "metadata": {},
323
+ "output_type": "display_data"
324
+ },
325
+ {
326
+ "name": "stdout",
327
+ "output_type": "stream",
328
+ "text": [
329
+ "{'eval_loss': 8.275933265686035, 'eval_accuracy': 0.0, 'eval_runtime': 0.6419, 'eval_samples_per_second': 155.791, 'eval_steps_per_second': 10.905, 'epoch': 0.57}\n"
330
+ ]
331
+ },
332
+ {
333
+ "data": {
334
+ "application/vnd.jupyter.widget-view+json": {
335
+ "model_id": "cf6f48e995bf427db3c86d1d988bf752",
336
+ "version_major": 2,
337
+ "version_minor": 0
338
+ },
339
+ "text/plain": [
340
+ " 0%| | 0/7 [00:00<?, ?it/s]"
341
+ ]
342
+ },
343
+ "metadata": {},
344
+ "output_type": "display_data"
345
+ }
346
+ ],
347
+ "source": [
348
+ "results = trainer.evaluate()\n",
349
+ "print(results)\n",
350
+ "\n",
351
+ "test_results = trainer.predict(ds[\"test\"].select(range(100)))"
352
+ ]
353
+ },
354
+ {
355
+ "cell_type": "code",
356
+ "execution_count": 12,
357
+ "metadata": {},
358
+ "outputs": [
359
+ {
360
+ "data": {
361
+ "text/plain": [
362
+ "EfficientNetForImageClassification(\n",
363
+ " (efficientnet): EfficientNetModel(\n",
364
+ " (embeddings): EfficientNetEmbeddings(\n",
365
+ " (padding): ZeroPad2d((0, 1, 0, 1))\n",
366
+ " (convolution): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=valid, bias=False)\n",
367
+ " (batchnorm): BatchNorm2d(40, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
368
+ " (activation): SiLU()\n",
369
+ " )\n",
370
+ " (encoder): EfficientNetEncoder(\n",
371
+ " (blocks): ModuleList(\n",
372
+ " (0): EfficientNetBlock(\n",
373
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
374
+ " (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
375
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=40, bias=False)\n",
376
+ " (depthwise_norm): BatchNorm2d(40, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
377
+ " (depthwise_act): SiLU()\n",
378
+ " )\n",
379
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
380
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
381
+ " (reduce): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
382
+ " (expand): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
383
+ " (act_reduce): SiLU()\n",
384
+ " (act_expand): Sigmoid()\n",
385
+ " )\n",
386
+ " (projection): EfficientNetFinalBlockLayer(\n",
387
+ " (project_conv): Conv2d(40, 24, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
388
+ " (project_bn): BatchNorm2d(24, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
389
+ " (dropout): Dropout(p=0.0, inplace=False)\n",
390
+ " )\n",
391
+ " )\n",
392
+ " (1): EfficientNetBlock(\n",
393
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
394
+ " (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
395
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=24, bias=False)\n",
396
+ " (depthwise_norm): BatchNorm2d(24, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
397
+ " (depthwise_act): SiLU()\n",
398
+ " )\n",
399
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
400
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
401
+ " (reduce): Conv2d(24, 6, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
402
+ " (expand): Conv2d(6, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
403
+ " (act_reduce): SiLU()\n",
404
+ " (act_expand): Sigmoid()\n",
405
+ " )\n",
406
+ " (projection): EfficientNetFinalBlockLayer(\n",
407
+ " (project_conv): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
408
+ " (project_bn): BatchNorm2d(24, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
409
+ " (dropout): Dropout(p=0.007692307692307693, inplace=False)\n",
410
+ " )\n",
411
+ " )\n",
412
+ " (2): EfficientNetBlock(\n",
413
+ " (expansion): EfficientNetExpansionLayer(\n",
414
+ " (expand_conv): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
415
+ " (expand_bn): BatchNorm2d(144, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
416
+ " (expand_act): SiLU()\n",
417
+ " )\n",
418
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
419
+ " (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
420
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=valid, groups=144, bias=False)\n",
421
+ " (depthwise_norm): BatchNorm2d(144, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
422
+ " (depthwise_act): SiLU()\n",
423
+ " )\n",
424
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
425
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
426
+ " (reduce): Conv2d(144, 6, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
427
+ " (expand): Conv2d(6, 144, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
428
+ " (act_reduce): SiLU()\n",
429
+ " (act_expand): Sigmoid()\n",
430
+ " )\n",
431
+ " (projection): EfficientNetFinalBlockLayer(\n",
432
+ " (project_conv): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
433
+ " (project_bn): BatchNorm2d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
434
+ " (dropout): Dropout(p=0.015384615384615385, inplace=False)\n",
435
+ " )\n",
436
+ " )\n",
437
+ " (3): EfficientNetBlock(\n",
438
+ " (expansion): EfficientNetExpansionLayer(\n",
439
+ " (expand_conv): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
440
+ " (expand_bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
441
+ " (expand_act): SiLU()\n",
442
+ " )\n",
443
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
444
+ " (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
445
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=192, bias=False)\n",
446
+ " (depthwise_norm): BatchNorm2d(192, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
447
+ " (depthwise_act): SiLU()\n",
448
+ " )\n",
449
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
450
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
451
+ " (reduce): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
452
+ " (expand): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
453
+ " (act_reduce): SiLU()\n",
454
+ " (act_expand): Sigmoid()\n",
455
+ " )\n",
456
+ " (projection): EfficientNetFinalBlockLayer(\n",
457
+ " (project_conv): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
458
+ " (project_bn): BatchNorm2d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
459
+ " (dropout): Dropout(p=0.02307692307692308, inplace=False)\n",
460
+ " )\n",
461
+ " )\n",
462
+ " (4): EfficientNetBlock(\n",
463
+ " (expansion): EfficientNetExpansionLayer(\n",
464
+ " (expand_conv): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
465
+ " (expand_bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
466
+ " (expand_act): SiLU()\n",
467
+ " )\n",
468
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
469
+ " (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
470
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=192, bias=False)\n",
471
+ " (depthwise_norm): BatchNorm2d(192, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
472
+ " (depthwise_act): SiLU()\n",
473
+ " )\n",
474
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
475
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
476
+ " (reduce): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
477
+ " (expand): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
478
+ " (act_reduce): SiLU()\n",
479
+ " (act_expand): Sigmoid()\n",
480
+ " )\n",
481
+ " (projection): EfficientNetFinalBlockLayer(\n",
482
+ " (project_conv): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
483
+ " (project_bn): BatchNorm2d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
484
+ " (dropout): Dropout(p=0.03076923076923077, inplace=False)\n",
485
+ " )\n",
486
+ " )\n",
487
+ " (5): EfficientNetBlock(\n",
488
+ " (expansion): EfficientNetExpansionLayer(\n",
489
+ " (expand_conv): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
490
+ " (expand_bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
491
+ " (expand_act): SiLU()\n",
492
+ " )\n",
493
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
494
+ " (depthwise_conv_pad): ZeroPad2d((2, 2, 2, 2))\n",
495
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(192, 192, kernel_size=(5, 5), stride=(2, 2), padding=valid, groups=192, bias=False)\n",
496
+ " (depthwise_norm): BatchNorm2d(192, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
497
+ " (depthwise_act): SiLU()\n",
498
+ " )\n",
499
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
500
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
501
+ " (reduce): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
502
+ " (expand): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
503
+ " (act_reduce): SiLU()\n",
504
+ " (act_expand): Sigmoid()\n",
505
+ " )\n",
506
+ " (projection): EfficientNetFinalBlockLayer(\n",
507
+ " (project_conv): Conv2d(192, 48, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
508
+ " (project_bn): BatchNorm2d(48, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
509
+ " (dropout): Dropout(p=0.038461538461538464, inplace=False)\n",
510
+ " )\n",
511
+ " )\n",
512
+ " (6): EfficientNetBlock(\n",
513
+ " (expansion): EfficientNetExpansionLayer(\n",
514
+ " (expand_conv): Conv2d(48, 288, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
515
+ " (expand_bn): BatchNorm2d(288, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
516
+ " (expand_act): SiLU()\n",
517
+ " )\n",
518
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
519
+ " (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
520
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(288, 288, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=288, bias=False)\n",
521
+ " (depthwise_norm): BatchNorm2d(288, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
522
+ " (depthwise_act): SiLU()\n",
523
+ " )\n",
524
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
525
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
526
+ " (reduce): Conv2d(288, 12, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
527
+ " (expand): Conv2d(12, 288, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
528
+ " (act_reduce): SiLU()\n",
529
+ " (act_expand): Sigmoid()\n",
530
+ " )\n",
531
+ " (projection): EfficientNetFinalBlockLayer(\n",
532
+ " (project_conv): Conv2d(288, 48, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
533
+ " (project_bn): BatchNorm2d(48, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
534
+ " (dropout): Dropout(p=0.04615384615384616, inplace=False)\n",
535
+ " )\n",
536
+ " )\n",
537
+ " (7): EfficientNetBlock(\n",
538
+ " (expansion): EfficientNetExpansionLayer(\n",
539
+ " (expand_conv): Conv2d(48, 288, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
540
+ " (expand_bn): BatchNorm2d(288, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
541
+ " (expand_act): SiLU()\n",
542
+ " )\n",
543
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
544
+ " (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
545
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(288, 288, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=288, bias=False)\n",
546
+ " (depthwise_norm): BatchNorm2d(288, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
547
+ " (depthwise_act): SiLU()\n",
548
+ " )\n",
549
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
550
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
551
+ " (reduce): Conv2d(288, 12, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
552
+ " (expand): Conv2d(12, 288, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
553
+ " (act_reduce): SiLU()\n",
554
+ " (act_expand): Sigmoid()\n",
555
+ " )\n",
556
+ " (projection): EfficientNetFinalBlockLayer(\n",
557
+ " (project_conv): Conv2d(288, 48, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
558
+ " (project_bn): BatchNorm2d(48, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
559
+ " (dropout): Dropout(p=0.05384615384615385, inplace=False)\n",
560
+ " )\n",
561
+ " )\n",
562
+ " (8): EfficientNetBlock(\n",
563
+ " (expansion): EfficientNetExpansionLayer(\n",
564
+ " (expand_conv): Conv2d(48, 288, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
565
+ " (expand_bn): BatchNorm2d(288, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
566
+ " (expand_act): SiLU()\n",
567
+ " )\n",
568
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
569
+ " (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
570
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(288, 288, kernel_size=(3, 3), stride=(2, 2), padding=valid, groups=288, bias=False)\n",
571
+ " (depthwise_norm): BatchNorm2d(288, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
572
+ " (depthwise_act): SiLU()\n",
573
+ " )\n",
574
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
575
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
576
+ " (reduce): Conv2d(288, 12, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
577
+ " (expand): Conv2d(12, 288, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
578
+ " (act_reduce): SiLU()\n",
579
+ " (act_expand): Sigmoid()\n",
580
+ " )\n",
581
+ " (projection): EfficientNetFinalBlockLayer(\n",
582
+ " (project_conv): Conv2d(288, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
583
+ " (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
584
+ " (dropout): Dropout(p=0.06153846153846154, inplace=False)\n",
585
+ " )\n",
586
+ " )\n",
587
+ " (9): EfficientNetBlock(\n",
588
+ " (expansion): EfficientNetExpansionLayer(\n",
589
+ " (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
590
+ " (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
591
+ " (expand_act): SiLU()\n",
592
+ " )\n",
593
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
594
+ " (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
595
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=576, bias=False)\n",
596
+ " (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
597
+ " (depthwise_act): SiLU()\n",
598
+ " )\n",
599
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
600
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
601
+ " (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
602
+ " (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
603
+ " (act_reduce): SiLU()\n",
604
+ " (act_expand): Sigmoid()\n",
605
+ " )\n",
606
+ " (projection): EfficientNetFinalBlockLayer(\n",
607
+ " (project_conv): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
608
+ " (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
609
+ " (dropout): Dropout(p=0.06923076923076923, inplace=False)\n",
610
+ " )\n",
611
+ " )\n",
612
+ " (10): EfficientNetBlock(\n",
613
+ " (expansion): EfficientNetExpansionLayer(\n",
614
+ " (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
615
+ " (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
616
+ " (expand_act): SiLU()\n",
617
+ " )\n",
618
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
619
+ " (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
620
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=576, bias=False)\n",
621
+ " (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
622
+ " (depthwise_act): SiLU()\n",
623
+ " )\n",
624
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
625
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
626
+ " (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
627
+ " (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
628
+ " (act_reduce): SiLU()\n",
629
+ " (act_expand): Sigmoid()\n",
630
+ " )\n",
631
+ " (projection): EfficientNetFinalBlockLayer(\n",
632
+ " (project_conv): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
633
+ " (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
634
+ " (dropout): Dropout(p=0.07692307692307693, inplace=False)\n",
635
+ " )\n",
636
+ " )\n",
637
+ " (11): EfficientNetBlock(\n",
638
+ " (expansion): EfficientNetExpansionLayer(\n",
639
+ " (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
640
+ " (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
641
+ " (expand_act): SiLU()\n",
642
+ " )\n",
643
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
644
+ " (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
645
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=576, bias=False)\n",
646
+ " (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
647
+ " (depthwise_act): SiLU()\n",
648
+ " )\n",
649
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
650
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
651
+ " (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
652
+ " (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
653
+ " (act_reduce): SiLU()\n",
654
+ " (act_expand): Sigmoid()\n",
655
+ " )\n",
656
+ " (projection): EfficientNetFinalBlockLayer(\n",
657
+ " (project_conv): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
658
+ " (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
659
+ " (dropout): Dropout(p=0.08461538461538462, inplace=False)\n",
660
+ " )\n",
661
+ " )\n",
662
+ " (12): EfficientNetBlock(\n",
663
+ " (expansion): EfficientNetExpansionLayer(\n",
664
+ " (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
665
+ " (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
666
+ " (expand_act): SiLU()\n",
667
+ " )\n",
668
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
669
+ " (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
670
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=576, bias=False)\n",
671
+ " (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
672
+ " (depthwise_act): SiLU()\n",
673
+ " )\n",
674
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
675
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
676
+ " (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
677
+ " (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
678
+ " (act_reduce): SiLU()\n",
679
+ " (act_expand): Sigmoid()\n",
680
+ " )\n",
681
+ " (projection): EfficientNetFinalBlockLayer(\n",
682
+ " (project_conv): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
683
+ " (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
684
+ " (dropout): Dropout(p=0.09230769230769233, inplace=False)\n",
685
+ " )\n",
686
+ " )\n",
687
+ " (13): EfficientNetBlock(\n",
688
+ " (expansion): EfficientNetExpansionLayer(\n",
689
+ " (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
690
+ " (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
691
+ " (expand_act): SiLU()\n",
692
+ " )\n",
693
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
694
+ " (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
695
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=576, bias=False)\n",
696
+ " (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
697
+ " (depthwise_act): SiLU()\n",
698
+ " )\n",
699
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
700
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
701
+ " (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
702
+ " (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
703
+ " (act_reduce): SiLU()\n",
704
+ " (act_expand): Sigmoid()\n",
705
+ " )\n",
706
+ " (projection): EfficientNetFinalBlockLayer(\n",
707
+ " (project_conv): Conv2d(576, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
708
+ " (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
709
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
710
+ " )\n",
711
+ " )\n",
712
+ " (14): EfficientNetBlock(\n",
713
+ " (expansion): EfficientNetExpansionLayer(\n",
714
+ " (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
715
+ " (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
716
+ " (expand_act): SiLU()\n",
717
+ " )\n",
718
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
719
+ " (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
720
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=816, bias=False)\n",
721
+ " (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
722
+ " (depthwise_act): SiLU()\n",
723
+ " )\n",
724
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
725
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
726
+ " (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
727
+ " (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
728
+ " (act_reduce): SiLU()\n",
729
+ " (act_expand): Sigmoid()\n",
730
+ " )\n",
731
+ " (projection): EfficientNetFinalBlockLayer(\n",
732
+ " (project_conv): Conv2d(816, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
733
+ " (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
734
+ " (dropout): Dropout(p=0.1076923076923077, inplace=False)\n",
735
+ " )\n",
736
+ " )\n",
737
+ " (15): EfficientNetBlock(\n",
738
+ " (expansion): EfficientNetExpansionLayer(\n",
739
+ " (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
740
+ " (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
741
+ " (expand_act): SiLU()\n",
742
+ " )\n",
743
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
744
+ " (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
745
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=816, bias=False)\n",
746
+ " (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
747
+ " (depthwise_act): SiLU()\n",
748
+ " )\n",
749
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
750
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
751
+ " (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
752
+ " (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
753
+ " (act_reduce): SiLU()\n",
754
+ " (act_expand): Sigmoid()\n",
755
+ " )\n",
756
+ " (projection): EfficientNetFinalBlockLayer(\n",
757
+ " (project_conv): Conv2d(816, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
758
+ " (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
759
+ " (dropout): Dropout(p=0.11538461538461539, inplace=False)\n",
760
+ " )\n",
761
+ " )\n",
762
+ " (16): EfficientNetBlock(\n",
763
+ " (expansion): EfficientNetExpansionLayer(\n",
764
+ " (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
765
+ " (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
766
+ " (expand_act): SiLU()\n",
767
+ " )\n",
768
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
769
+ " (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
770
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=816, bias=False)\n",
771
+ " (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
772
+ " (depthwise_act): SiLU()\n",
773
+ " )\n",
774
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
775
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
776
+ " (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
777
+ " (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
778
+ " (act_reduce): SiLU()\n",
779
+ " (act_expand): Sigmoid()\n",
780
+ " )\n",
781
+ " (projection): EfficientNetFinalBlockLayer(\n",
782
+ " (project_conv): Conv2d(816, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
783
+ " (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
784
+ " (dropout): Dropout(p=0.12307692307692308, inplace=False)\n",
785
+ " )\n",
786
+ " )\n",
787
+ " (17): EfficientNetBlock(\n",
788
+ " (expansion): EfficientNetExpansionLayer(\n",
789
+ " (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
790
+ " (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
791
+ " (expand_act): SiLU()\n",
792
+ " )\n",
793
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
794
+ " (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
795
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=816, bias=False)\n",
796
+ " (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
797
+ " (depthwise_act): SiLU()\n",
798
+ " )\n",
799
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
800
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
801
+ " (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
802
+ " (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
803
+ " (act_reduce): SiLU()\n",
804
+ " (act_expand): Sigmoid()\n",
805
+ " )\n",
806
+ " (projection): EfficientNetFinalBlockLayer(\n",
807
+ " (project_conv): Conv2d(816, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
808
+ " (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
809
+ " (dropout): Dropout(p=0.13076923076923078, inplace=False)\n",
810
+ " )\n",
811
+ " )\n",
812
+ " (18): EfficientNetBlock(\n",
813
+ " (expansion): EfficientNetExpansionLayer(\n",
814
+ " (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
815
+ " (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
816
+ " (expand_act): SiLU()\n",
817
+ " )\n",
818
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
819
+ " (depthwise_conv_pad): ZeroPad2d((2, 2, 2, 2))\n",
820
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(2, 2), padding=valid, groups=816, bias=False)\n",
821
+ " (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
822
+ " (depthwise_act): SiLU()\n",
823
+ " )\n",
824
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
825
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
826
+ " (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
827
+ " (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
828
+ " (act_reduce): SiLU()\n",
829
+ " (act_expand): Sigmoid()\n",
830
+ " )\n",
831
+ " (projection): EfficientNetFinalBlockLayer(\n",
832
+ " (project_conv): Conv2d(816, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
833
+ " (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
834
+ " (dropout): Dropout(p=0.13846153846153847, inplace=False)\n",
835
+ " )\n",
836
+ " )\n",
837
+ " (19): EfficientNetBlock(\n",
838
+ " (expansion): EfficientNetExpansionLayer(\n",
839
+ " (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
840
+ " (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
841
+ " (expand_act): SiLU()\n",
842
+ " )\n",
843
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
844
+ " (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
845
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
846
+ " (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
847
+ " (depthwise_act): SiLU()\n",
848
+ " )\n",
849
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
850
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
851
+ " (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
852
+ " (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
853
+ " (act_reduce): SiLU()\n",
854
+ " (act_expand): Sigmoid()\n",
855
+ " )\n",
856
+ " (projection): EfficientNetFinalBlockLayer(\n",
857
+ " (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
858
+ " (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
859
+ " (dropout): Dropout(p=0.14615384615384616, inplace=False)\n",
860
+ " )\n",
861
+ " )\n",
862
+ " (20): EfficientNetBlock(\n",
863
+ " (expansion): EfficientNetExpansionLayer(\n",
864
+ " (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
865
+ " (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
866
+ " (expand_act): SiLU()\n",
867
+ " )\n",
868
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
869
+ " (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
870
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
871
+ " (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
872
+ " (depthwise_act): SiLU()\n",
873
+ " )\n",
874
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
875
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
876
+ " (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
877
+ " (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
878
+ " (act_reduce): SiLU()\n",
879
+ " (act_expand): Sigmoid()\n",
880
+ " )\n",
881
+ " (projection): EfficientNetFinalBlockLayer(\n",
882
+ " (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
883
+ " (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
884
+ " (dropout): Dropout(p=0.15384615384615385, inplace=False)\n",
885
+ " )\n",
886
+ " )\n",
887
+ " (21): EfficientNetBlock(\n",
888
+ " (expansion): EfficientNetExpansionLayer(\n",
889
+ " (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
890
+ " (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
891
+ " (expand_act): SiLU()\n",
892
+ " )\n",
893
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
894
+ " (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
895
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
896
+ " (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
897
+ " (depthwise_act): SiLU()\n",
898
+ " )\n",
899
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
900
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
901
+ " (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
902
+ " (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
903
+ " (act_reduce): SiLU()\n",
904
+ " (act_expand): Sigmoid()\n",
905
+ " )\n",
906
+ " (projection): EfficientNetFinalBlockLayer(\n",
907
+ " (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
908
+ " (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
909
+ " (dropout): Dropout(p=0.16153846153846155, inplace=False)\n",
910
+ " )\n",
911
+ " )\n",
912
+ " (22): EfficientNetBlock(\n",
913
+ " (expansion): EfficientNetExpansionLayer(\n",
914
+ " (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
915
+ " (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
916
+ " (expand_act): SiLU()\n",
917
+ " )\n",
918
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
919
+ " (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
920
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
921
+ " (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
922
+ " (depthwise_act): SiLU()\n",
923
+ " )\n",
924
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
925
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
926
+ " (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
927
+ " (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
928
+ " (act_reduce): SiLU()\n",
929
+ " (act_expand): Sigmoid()\n",
930
+ " )\n",
931
+ " (projection): EfficientNetFinalBlockLayer(\n",
932
+ " (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
933
+ " (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
934
+ " (dropout): Dropout(p=0.16923076923076924, inplace=False)\n",
935
+ " )\n",
936
+ " )\n",
937
+ " (23): EfficientNetBlock(\n",
938
+ " (expansion): EfficientNetExpansionLayer(\n",
939
+ " (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
940
+ " (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
941
+ " (expand_act): SiLU()\n",
942
+ " )\n",
943
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
944
+ " (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
945
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
946
+ " (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
947
+ " (depthwise_act): SiLU()\n",
948
+ " )\n",
949
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
950
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
951
+ " (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
952
+ " (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
953
+ " (act_reduce): SiLU()\n",
954
+ " (act_expand): Sigmoid()\n",
955
+ " )\n",
956
+ " (projection): EfficientNetFinalBlockLayer(\n",
957
+ " (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
958
+ " (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
959
+ " (dropout): Dropout(p=0.17692307692307693, inplace=False)\n",
960
+ " )\n",
961
+ " )\n",
962
+ " (24): EfficientNetBlock(\n",
963
+ " (expansion): EfficientNetExpansionLayer(\n",
964
+ " (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
965
+ " (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
966
+ " (expand_act): SiLU()\n",
967
+ " )\n",
968
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
969
+ " (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
970
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
971
+ " (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
972
+ " (depthwise_act): SiLU()\n",
973
+ " )\n",
974
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
975
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
976
+ " (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
977
+ " (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
978
+ " (act_reduce): SiLU()\n",
979
+ " (act_expand): Sigmoid()\n",
980
+ " )\n",
981
+ " (projection): EfficientNetFinalBlockLayer(\n",
982
+ " (project_conv): Conv2d(1392, 384, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
983
+ " (project_bn): BatchNorm2d(384, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
984
+ " (dropout): Dropout(p=0.18461538461538465, inplace=False)\n",
985
+ " )\n",
986
+ " )\n",
987
+ " (25): EfficientNetBlock(\n",
988
+ " (expansion): EfficientNetExpansionLayer(\n",
989
+ " (expand_conv): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
990
+ " (expand_bn): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
991
+ " (expand_act): SiLU()\n",
992
+ " )\n",
993
+ " (depthwise_conv): EfficientNetDepthwiseLayer(\n",
994
+ " (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
995
+ " (depthwise_conv): EfficientNetDepthwiseConv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=2304, bias=False)\n",
996
+ " (depthwise_norm): BatchNorm2d(2304, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
997
+ " (depthwise_act): SiLU()\n",
998
+ " )\n",
999
+ " (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
1000
+ " (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
1001
+ " (reduce): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
1002
+ " (expand): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
1003
+ " (act_reduce): SiLU()\n",
1004
+ " (act_expand): Sigmoid()\n",
1005
+ " )\n",
1006
+ " (projection): EfficientNetFinalBlockLayer(\n",
1007
+ " (project_conv): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
1008
+ " (project_bn): BatchNorm2d(384, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
1009
+ " (dropout): Dropout(p=0.19230769230769232, inplace=False)\n",
1010
+ " )\n",
1011
+ " )\n",
1012
+ " )\n",
1013
+ " (top_conv): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
1014
+ " (top_bn): BatchNorm2d(1536, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
1015
+ " (top_activation): SiLU()\n",
1016
+ " )\n",
1017
+ " (pooler): AvgPool2d(kernel_size=1536, stride=1536, padding=0)\n",
1018
+ " )\n",
1019
+ " (dropout): Dropout(p=0.3, inplace=False)\n",
1020
+ " (classifier): Linear(in_features=1536, out_features=3872, bias=True)\n",
1021
+ ")"
1022
+ ]
1023
+ },
1024
+ "execution_count": 12,
1025
+ "metadata": {},
1026
+ "output_type": "execute_result"
1027
+ }
1028
+ ],
1029
+ "source": [
1030
+ "model"
1031
+ ]
1032
+ },
1033
+ {
1034
+ "cell_type": "code",
1035
+ "execution_count": null,
1036
+ "metadata": {},
1037
+ "outputs": [],
1038
+ "source": []
1039
+ }
1040
+ ],
1041
+ "metadata": {
1042
+ "kernelspec": {
1043
+ "display_name": "ArtifactClassification",
1044
+ "language": "python",
1045
+ "name": "python3"
1046
+ },
1047
+ "language_info": {
1048
+ "codemirror_mode": {
1049
+ "name": "ipython",
1050
+ "version": 3
1051
+ },
1052
+ "file_extension": ".py",
1053
+ "mimetype": "text/x-python",
1054
+ "name": "python",
1055
+ "nbconvert_exporter": "python",
1056
+ "pygments_lexer": "ipython3",
1057
+ "version": "3.10.12"
1058
+ }
1059
+ },
1060
+ "nbformat": 4,
1061
+ "nbformat_minor": 2
1062
+ }
4.0-assessing_BM_dataset.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
5.0-assessing_date_prediction.ipynb ADDED
File without changes
README.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - generated_from_trainer
4
+ model-index:
5
+ - name: test
6
+ results: []
7
+ ---
8
+
9
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
10
+ should probably proofread and complete it, then remove this comment. -->
11
+
12
+ # test
13
+
14
+ This model was trained from scratch on the None dataset.
15
+
16
+ ## Model description
17
+
18
+ More information needed
19
+
20
+ ## Intended uses & limitations
21
+
22
+ More information needed
23
+
24
+ ## Training and evaluation data
25
+
26
+ More information needed
27
+
28
+ ## Training procedure
29
+
30
+ ### Training hyperparameters
31
+
32
+ The following hyperparameters were used during training:
33
+ - learning_rate: 5e-05
34
+ - train_batch_size: 128
35
+ - eval_batch_size: 128
36
+ - seed: 42
37
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
38
+ - lr_scheduler_type: linear
39
+ - num_epochs: 100
40
+ - mixed_precision_training: Native AMP
41
+
42
+ ### Framework versions
43
+
44
+ - Transformers 4.38.2
45
+ - Pytorch 2.2.1+cu121
46
+ - Datasets 2.18.0
47
+ - Tokenizers 0.15.2
config.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../models/james-burton/BritishMuseum-white/bm5-white_date_log/rose-blaze-27/checkpoint-5310",
3
+ "architectures": [
4
+ "EfficientNetForImageClassification"
5
+ ],
6
+ "batch_norm_eps": 0.001,
7
+ "batch_norm_momentum": 0.99,
8
+ "depth_coefficient": 1.4,
9
+ "depth_divisor": 8,
10
+ "depthwise_padding": [
11
+ 5,
12
+ 18
13
+ ],
14
+ "drop_connect_rate": 0.2,
15
+ "dropout_rate": 0.3,
16
+ "expand_ratios": [
17
+ 1,
18
+ 6,
19
+ 6,
20
+ 6,
21
+ 6,
22
+ 6,
23
+ 6
24
+ ],
25
+ "hidden_act": "swish",
26
+ "hidden_dim": 1536,
27
+ "id2label": {
28
+ "0": "LABEL_0"
29
+ },
30
+ "image_size": 300,
31
+ "in_channels": [
32
+ 32,
33
+ 16,
34
+ 24,
35
+ 40,
36
+ 80,
37
+ 112,
38
+ 192
39
+ ],
40
+ "initializer_range": 0.02,
41
+ "kernel_sizes": [
42
+ 3,
43
+ 3,
44
+ 5,
45
+ 3,
46
+ 5,
47
+ 5,
48
+ 3
49
+ ],
50
+ "label2id": {
51
+ "LABEL_0": 0
52
+ },
53
+ "model_type": "efficientnet",
54
+ "num_block_repeats": [
55
+ 1,
56
+ 2,
57
+ 2,
58
+ 3,
59
+ 3,
60
+ 4,
61
+ 1
62
+ ],
63
+ "num_channels": 3,
64
+ "num_hidden_layers": 64,
65
+ "out_channels": [
66
+ 16,
67
+ 24,
68
+ 40,
69
+ 80,
70
+ 112,
71
+ 192,
72
+ 320
73
+ ],
74
+ "pooling_type": "mean",
75
+ "squeeze_expansion_ratio": 0.25,
76
+ "strides": [
77
+ 1,
78
+ 2,
79
+ 2,
80
+ 2,
81
+ 1,
82
+ 2,
83
+ 1
84
+ ],
85
+ "torch_dtype": "float32",
86
+ "transformers_version": "4.38.2",
87
+ "width_coefficient": 1.2
88
+ }
material_min3.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
2
+ No,white,avg,om3-white_material,0.62,0.812,0.869,0.93,0.588,0.612,0.62
3
+ ,,avg+3D,om3-white_material,0.609,0.812,0.871,0.934,0.572,0.598,0.609
4
+ ,white+3Dx1,avg,om3-3Dwhite-1frame_material,0.608,0.793,0.861,0.924,0.586,0.584,0.608
5
+ ,,avg+3D,om3-3Dwhite-1frame_material,0.604,0.794,0.857,0.923,0.579,0.579,0.604
6
+ ,white+3Dx4,avg,om3-3Dwhite_material,0.618,0.809,0.871,0.929,0.596,0.598,0.618
7
+ ,,avg+3D,om3-3Dwhite_material,0.627,0.811,0.873,0.926,0.604,0.607,0.627
8
+ Yes,white,avg,om3-white_material_bm-pretrn,0.62,0.817,0.883,0.933,0.583,0.579,0.62
9
+ ,,avg+3D,om3-white_material_bm-pretrn,0.628,0.825,0.883,0.936,0.592,0.599,0.628
10
+ ,white+3Dx1,avg,om3-3Dwhite-1frame_material_bm-pretrn,0.62,0.828,0.88,0.936,0.598,0.591,0.62
11
+ ,,avg+3D,om3-3Dwhite-1frame_material_bm-pretrn,0.619,0.819,0.878,0.931,0.602,0.604,0.619
12
+ ,white+3Dx4,avg,om3-3Dwhite_material_bm-pretrn,0.621,0.812,0.873,0.928,0.6,0.601,0.621
13
+ ,,avg+3D,om3-3Dwhite_material_bm-pretrn,0.624,0.809,0.873,0.935,0.609,0.624,0.624
material_min3_max1.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
2
+ No,white,avg,om3-white_material,0.582,0.782,0.85,0.914,0.553,0.578,0.582
3
+ ,,avg+3D,om3-white_material,0.576,0.784,0.85,0.92,0.543,0.572,0.576
4
+ ,white+3Dx1,avg,om3-3Dwhite-1frame_material,0.573,0.759,0.838,0.91,0.555,0.558,0.573
5
+ ,,avg+3D,om3-3Dwhite-1frame_material,0.567,0.762,0.84,0.915,0.548,0.556,0.567
6
+ ,white+3Dx4,avg,om3-3Dwhite_material,0.575,0.777,0.843,0.912,0.557,0.561,0.575
7
+ ,,avg+3D,om3-3Dwhite_material,0.583,0.779,0.849,0.912,0.563,0.565,0.583
8
+ Yes,white,avg,om3-white_material_bm-pretrn,0.587,0.787,0.856,0.917,0.555,0.551,0.587
9
+ ,,avg+3D,om3-white_material_bm-pretrn,0.596,0.797,0.867,0.922,0.566,0.571,0.596
10
+ ,white+3Dx1,avg,om3-3Dwhite-1frame_material_bm-pretrn,0.59,0.8,0.861,0.925,0.571,0.566,0.59
11
+ ,,avg+3D,om3-3Dwhite-1frame_material_bm-pretrn,0.59,0.79,0.852,0.918,0.578,0.583,0.59
12
+ ,white+3Dx4,avg,om3-3Dwhite_material_bm-pretrn,0.582,0.778,0.85,0.911,0.562,0.56,0.582
13
+ ,,avg+3D,om3-3Dwhite_material_bm-pretrn,0.583,0.779,0.841,0.913,0.572,0.581,0.583
material_min4.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
2
+ No,white,avg,om4-white_material,0.617,0.817,0.868,0.926,0.587,0.596,0.617
3
+ ,,avg+3D,om4-white_material,0.608,0.808,0.865,0.925,0.571,0.582,0.608
4
+ ,white+3Dx1,avg,om4-3Dwhite-1frame_material,0.62,0.813,0.864,0.92,0.603,0.616,0.62
5
+ ,,avg+3D,om4-3Dwhite-1frame_material,0.625,0.812,0.871,0.919,0.605,0.616,0.625
6
+ ,white+3Dx4,avg,om4-3Dwhite_material,0.621,0.808,0.869,0.918,0.607,0.611,0.621
7
+ ,,avg+3D,om4-3Dwhite_material,0.62,0.808,0.871,0.92,0.605,0.609,0.62
8
+ Yes,white,avg,om4-white_material_bm-pretrn,0.611,0.805,0.865,0.914,0.577,0.584,0.611
9
+ ,,avg+3D,om4-white_material_bm-pretrn,0.598,0.802,0.87,0.919,0.566,0.582,0.598
10
+ ,white+3Dx1,avg,om4-3Dwhite-1frame_material_bm-pretrn,0.635,0.824,0.877,0.93,0.61,0.608,0.635
11
+ ,,avg+3D,om4-3Dwhite-1frame_material_bm-pretrn,0.64,0.816,0.873,0.924,0.618,0.621,0.64
12
+ ,white+3Dx4,avg,om4-3Dwhite_material_bm-pretrn,0.626,0.822,0.877,0.923,0.601,0.614,0.626
13
+ ,,avg+3D,om4-3Dwhite_material_bm-pretrn,0.628,0.821,0.877,0.928,0.605,0.61,0.628
material_min4_max1.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
2
+ No,white,avg,om4-white_material,0.585,0.791,0.85,0.912,0.558,0.563,0.585
3
+ ,,avg+3D,om4-white_material,0.579,0.787,0.847,0.915,0.547,0.557,0.579
4
+ ,white+3Dx1,avg,om4-3Dwhite-1frame_material,0.593,0.785,0.839,0.903,0.58,0.587,0.593
5
+ ,,avg+3D,om4-3Dwhite-1frame_material,0.595,0.781,0.845,0.906,0.578,0.582,0.595
6
+ ,white+3Dx4,avg,om4-3Dwhite_material,0.59,0.786,0.841,0.905,0.578,0.583,0.59
7
+ ,,avg+3D,om4-3Dwhite_material,0.587,0.786,0.849,0.906,0.573,0.576,0.587
8
+ Yes,white,avg,om4-white_material_bm-pretrn,0.575,0.775,0.846,0.899,0.546,0.558,0.575
9
+ ,,avg+3D,om4-white_material_bm-pretrn,0.576,0.771,0.84,0.908,0.549,0.568,0.576
10
+ ,white+3Dx1,avg,om4-3Dwhite-1frame_material_bm-pretrn,0.596,0.799,0.858,0.916,0.573,0.571,0.596
11
+ ,,avg+3D,om4-3Dwhite-1frame_material_bm-pretrn,0.598,0.792,0.859,0.914,0.579,0.582,0.598
12
+ ,white+3Dx4,avg,om4-3Dwhite_material_bm-pretrn,0.59,0.793,0.857,0.91,0.567,0.574,0.59
13
+ ,,avg+3D,om4-3Dwhite_material_bm-pretrn,0.598,0.791,0.859,0.92,0.577,0.578,0.598
material_min5.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
2
+ No,white,avg,om5-white_material,0.605,0.774,0.84,0.91,0.571,0.609,0.605
3
+ ,,avg+3D,om5-white_material,0.603,0.777,0.845,0.919,0.564,0.603,0.603
4
+ ,white+3Dx1,avg,om5-3Dwhite-1frame_material,0.628,0.806,0.868,0.924,0.6,0.602,0.628
5
+ ,,avg+3D,om5-3Dwhite-1frame_material,0.628,0.809,0.871,0.927,0.597,0.602,0.628
6
+ ,white+3Dx4,avg,om5-3Dwhite_material,0.63,0.816,0.874,0.924,0.598,0.601,0.63
7
+ ,,avg+3D,om5-3Dwhite_material,0.622,0.818,0.877,0.927,0.587,0.588,0.622
8
+ Yes,white,avg,om5-white_material_bm-pretrn,0.605,0.791,0.855,0.912,0.572,0.586,0.605
9
+ ,,avg+3D,om5-white_material_bm-pretrn,0.592,0.784,0.847,0.909,0.557,0.582,0.592
10
+ ,white+3Dx1,avg,om5-3Dwhite-1frame_material_bm-pretrn,0.601,0.812,0.875,0.925,0.57,0.567,0.601
11
+ ,,avg+3D,om5-3Dwhite-1frame_material_bm-pretrn,0.622,0.809,0.875,0.926,0.595,0.598,0.622
12
+ ,white+3Dx4,avg,om5-3Dwhite_material_bm-pretrn,0.633,0.819,0.88,0.93,0.607,0.607,0.633
13
+ ,,avg+3D,om5-3Dwhite_material_bm-pretrn,0.642,0.824,0.879,0.929,0.627,0.632,0.642
material_min5_max1.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
2
+ No,white,avg,om5-white_material,0.565,0.742,0.813,0.887,0.536,0.569,0.565
3
+ ,,avg+3D,om5-white_material,0.568,0.734,0.822,0.899,0.532,0.576,0.568
4
+ ,white+3Dx1,avg,om5-3Dwhite-1frame_material,0.604,0.774,0.84,0.915,0.58,0.581,0.604
5
+ ,,avg+3D,om5-3Dwhite-1frame_material,0.599,0.777,0.846,0.919,0.573,0.574,0.599
6
+ ,white+3Dx4,avg,om5-3Dwhite_material,0.593,0.792,0.849,0.911,0.565,0.563,0.593
7
+ ,,avg+3D,om5-3Dwhite_material,0.591,0.79,0.85,0.917,0.562,0.567,0.591
8
+ Yes,white,avg,om5-white_material_bm-pretrn,0.569,0.767,0.831,0.902,0.54,0.547,0.569
9
+ ,,avg+3D,om5-white_material_bm-pretrn,0.562,0.757,0.827,0.897,0.526,0.542,0.562
10
+ ,white+3Dx1,avg,om5-3Dwhite-1frame_material_bm-pretrn,0.576,0.785,0.853,0.915,0.548,0.542,0.576
11
+ ,,avg+3D,om5-3Dwhite-1frame_material_bm-pretrn,0.594,0.779,0.852,0.91,0.57,0.576,0.594
12
+ ,white+3Dx4,avg,om5-3Dwhite_material_bm-pretrn,0.6,0.794,0.856,0.923,0.577,0.572,0.6
13
+ ,,avg+3D,om5-3Dwhite_material_bm-pretrn,0.606,0.792,0.852,0.915,0.593,0.597,0.606
material_min6.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
2
+ No,white,avg,om6-white_material,0.632,0.817,0.878,0.927,0.606,0.598,0.632
3
+ ,,avg+3D,om6-white_material,0.632,0.811,0.869,0.924,0.602,0.593,0.632
4
+ ,white+3Dx1,avg,om6-3Dwhite-1frame_material,0.615,0.817,0.878,0.929,0.593,0.617,0.615
5
+ ,,avg+3D,om6-3Dwhite-1frame_material,0.62,0.818,0.88,0.928,0.597,0.623,0.62
6
+ ,white+3Dx4,avg,om6-3Dwhite_material,0.628,0.819,0.879,0.933,0.604,0.611,0.628
7
+ ,,avg+3D,om6-3Dwhite_material,0.624,0.819,0.88,0.933,0.597,0.598,0.624
8
+ Yes,white,avg,om6-white_material_bm-pretrn,0.641,0.824,0.886,0.934,0.614,0.61,0.641
9
+ ,,avg+3D,om6-white_material_bm-pretrn,0.644,0.834,0.885,0.938,0.617,0.618,0.644
10
+ ,white+3Dx1,avg,om6-3Dwhite-1frame_material_bm-pretrn,0.622,0.818,0.88,0.935,0.593,0.59,0.622
11
+ ,,avg+3D,om6-3Dwhite-1frame_material_bm-pretrn,0.617,0.813,0.879,0.932,0.586,0.584,0.617
12
+ ,white+3Dx4,avg,om6-3Dwhite_material_bm-pretrn,0.64,0.824,0.879,0.936,0.615,0.615,0.64
13
+ ,,avg+3D,om6-3Dwhite_material_bm-pretrn,0.648,0.827,0.883,0.94,0.622,0.622,0.648
material_min6_max1.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
2
+ No,white,avg,om6-white_material,0.603,0.793,0.854,0.911,0.58,0.572,0.603
3
+ ,,avg+3D,om6-white_material,0.6,0.79,0.856,0.91,0.574,0.568,0.6
4
+ ,white+3Dx1,avg,om6-3Dwhite-1frame_material,0.578,0.786,0.852,0.915,0.556,0.574,0.578
5
+ ,,avg+3D,om6-3Dwhite-1frame_material,0.581,0.793,0.858,0.917,0.558,0.58,0.581
6
+ ,white+3Dx4,avg,om6-3Dwhite_material,0.592,0.795,0.858,0.919,0.57,0.568,0.592
7
+ ,,avg+3D,om6-3Dwhite_material,0.597,0.793,0.858,0.922,0.571,0.567,0.597
8
+ Yes,white,avg,om6-white_material_bm-pretrn,0.612,0.804,0.865,0.917,0.587,0.579,0.612
9
+ ,,avg+3D,om6-white_material_bm-pretrn,0.613,0.812,0.864,0.922,0.589,0.586,0.613
10
+ ,white+3Dx1,avg,om6-3Dwhite-1frame_material_bm-pretrn,0.598,0.791,0.864,0.919,0.573,0.566,0.598
11
+ ,,avg+3D,om6-3Dwhite-1frame_material_bm-pretrn,0.599,0.787,0.863,0.924,0.571,0.568,0.599
12
+ ,white+3Dx4,avg,om6-3Dwhite_material_bm-pretrn,0.597,0.793,0.859,0.924,0.575,0.575,0.597
13
+ ,,avg+3D,om6-3Dwhite_material_bm-pretrn,0.609,0.799,0.867,0.927,0.585,0.58,0.609
material_x_plus3Ds.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
2
+ No,white,avg,om3-white_material,0.62,0.812,0.869,0.93,0.588,0.612,0.62
3
+ ,,avg+3D,om3-white_material,0.609,0.812,0.871,0.934,0.572,0.598,0.609
4
+ ,white+3Dx1,avg,om3-3Dwhite-1frame_material,0.608,0.793,0.861,0.924,0.586,0.584,0.608
5
+ ,,avg+3D,om3-3Dwhite-1frame_material,0.604,0.794,0.857,0.923,0.579,0.579,0.604
6
+ ,white+3Dx4,avg,om3-3Dwhite_material,0.618,0.809,0.871,0.929,0.596,0.598,0.618
7
+ ,,avg+3D,om3-3Dwhite_material,0.627,0.811,0.873,0.926,0.604,0.607,0.627
8
+ Yes,white,avg,om3-white_material_bm-pretrn,0.62,0.817,0.883,0.933,0.583,0.579,0.62
9
+ ,,avg+3D,om3-white_material_bm-pretrn,0.628,0.825,0.883,0.936,0.592,0.599,0.628
10
+ ,white+3Dx1,avg,om3-3Dwhite-1frame_material_bm-pretrn,0.62,0.828,0.88,0.936,0.598,0.591,0.62
11
+ ,,avg+3D,om3-3Dwhite-1frame_material_bm-pretrn,0.619,0.819,0.878,0.931,0.602,0.604,0.619
12
+ ,white+3Dx4,avg,om3-3Dwhite_material_bm-pretrn,0.621,0.812,0.873,0.928,0.6,0.601,0.621
13
+ ,,avg+3D,om3-3Dwhite_material_bm-pretrn,0.624,0.809,0.873,0.935,0.609,0.624,0.624
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3c9863addf5ea52fec2daa05bb2ecbaa772b5750cfd62532070f4ff77b4269a
3
+ size 43215124
object_name_min3.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
object_name_min3_max1.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
object_name_min4.csv ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
2
+ No,white,avg,om4-white_name,0.609,0.752,0.805,0.866,0.581,0.58,0.609
3
+ ,,avg+3D,om4-white_name,0.603,0.744,0.8,0.863,0.573,0.57,0.603
4
+ ,white+3Dx1,avg,om4-3Dwhite-1frame_name,0.584,0.743,0.792,0.858,0.548,0.543,0.584
5
+ ,,avg+3D,om4-3Dwhite-1frame_name,0.579,0.741,0.793,0.854,0.541,0.536,0.579
6
+ ,white+3Dx4,avg,om4-3Dwhite_name,0.563,0.716,0.777,0.843,0.54,0.558,0.563
7
+ ,,avg+3D,om4-3Dwhite_name,0.571,0.715,0.779,0.849,0.547,0.563,0.571
8
+ Yes,white,avg,om4-white_name_bm-pretrn,0.58,0.739,0.798,0.867,0.547,0.546,0.58
9
+ ,,avg,om4-white_name_bm-pretrn-b,0.588,0.743,0.798,0.857,0.552,0.553,0.588
10
+ ,,avg+3D,om4-white_name_bm-pretrn,0.575,0.732,0.795,0.859,0.541,0.544,0.575
11
+ ,,avg+3D,om4-white_name_bm-pretrn-b,0.59,0.749,0.803,0.863,0.56,0.564,0.59
12
+ ,white+3Dx1,avg,om4-3Dwhite-1frame_name_bm-pretrn,0.587,0.742,0.789,0.857,0.552,0.548,0.587
13
+ ,,avg,om4-3Dwhite-1frame_name_bm-pretrn-b,0.589,0.74,0.801,0.861,0.557,0.558,0.589
14
+ ,,avg+3D,om4-3Dwhite-1frame_name_bm-pretrn,0.605,0.75,0.8,0.866,0.58,0.577,0.605
15
+ ,,avg+3D,om4-3Dwhite-1frame_name_bm-pretrn-b,0.594,0.749,0.801,0.871,0.573,0.585,0.594
16
+ ,white+3Dx4,avg,om4-3Dwhite_name_bm-pretrn,0.558,0.726,0.785,0.845,0.539,0.55,0.558
17
+ ,,avg,om4-3Dwhite_name_bm-pretrn-b,0.566,0.721,0.774,0.85,0.543,0.55,0.566
18
+ ,,avg+3D,om4-3Dwhite_name_bm-pretrn,0.55,0.718,0.781,0.839,0.538,0.572,0.55
19
+ ,,avg+3D,om4-3Dwhite_name_bm-pretrn-b,0.562,0.709,0.765,0.839,0.554,0.597,0.562
object_name_min4_max1.csv ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
2
+ No,white,avg,om4-white_name,0.575,0.722,0.779,0.845,0.551,0.55,0.575
3
+ ,,avg+3D,om4-white_name,0.568,0.721,0.779,0.841,0.542,0.543,0.568
4
+ ,white+3Dx1,avg,om4-3Dwhite-1frame_name,0.54,0.718,0.766,0.836,0.507,0.497,0.54
5
+ ,,avg+3D,om4-3Dwhite-1frame_name,0.544,0.712,0.771,0.834,0.507,0.496,0.544
6
+ ,white+3Dx4,avg,om4-3Dwhite_name,0.53,0.682,0.749,0.821,0.512,0.533,0.53
7
+ ,,avg+3D,om4-3Dwhite_name,0.535,0.686,0.75,0.828,0.515,0.536,0.535
8
+ Yes,white,avg,om4-white_name_bm-pretrn,0.549,0.709,0.776,0.84,0.518,0.514,0.549
9
+ ,,avg,om4-white_name_bm-pretrn-b,0.55,0.722,0.769,0.841,0.518,0.516,0.55
10
+ ,,avg+3D,om4-white_name_bm-pretrn,0.544,0.703,0.771,0.842,0.511,0.511,0.544
11
+ ,,avg+3D,om4-white_name_bm-pretrn-b,0.554,0.721,0.778,0.844,0.529,0.534,0.554
12
+ ,white+3Dx1,avg,om4-3Dwhite-1frame_name_bm-pretrn,0.555,0.712,0.761,0.836,0.523,0.513,0.555
13
+ ,,avg,om4-3Dwhite-1frame_name_bm-pretrn-b,0.562,0.705,0.77,0.833,0.53,0.527,0.562
14
+ ,,avg+3D,om4-3Dwhite-1frame_name_bm-pretrn,0.568,0.72,0.772,0.843,0.547,0.546,0.568
15
+ ,,avg+3D,om4-3Dwhite-1frame_name_bm-pretrn-b,0.564,0.717,0.772,0.848,0.543,0.55,0.564
16
+ ,white+3Dx4,avg,om4-3Dwhite_name_bm-pretrn,0.517,0.68,0.752,0.815,0.498,0.507,0.517
17
+ ,,avg,om4-3Dwhite_name_bm-pretrn-b,0.516,0.67,0.733,0.823,0.499,0.514,0.516
18
+ ,,avg+3D,om4-3Dwhite_name_bm-pretrn,0.506,0.677,0.743,0.807,0.494,0.528,0.506
19
+ ,,avg+3D,om4-3Dwhite_name_bm-pretrn-b,0.518,0.66,0.726,0.807,0.508,0.544,0.518
object_name_min5.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
object_name_min5_max1.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
object_name_min6.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
object_name_min6_max1.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 289,
4
+ "width": 289
5
+ },
6
+ "do_center_crop": false,
7
+ "do_normalize": true,
8
+ "do_rescale": true,
9
+ "do_resize": true,
10
+ "image_mean": [
11
+ 0.485,
12
+ 0.456,
13
+ 0.406
14
+ ],
15
+ "image_processor_type": "EfficientNetImageProcessor",
16
+ "image_std": [
17
+ 0.47853944,
18
+ 0.4732864,
19
+ 0.47434163
20
+ ],
21
+ "include_top": true,
22
+ "resample": 0,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "rescale_offset": false,
25
+ "size": {
26
+ "height": 300,
27
+ "width": 300
28
+ }
29
+ }
results.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc310ec19387e9a791da6356d5d50bd06c606c82ffd0f52ba6fe709f9154ce88
3
+ size 191453
results_local.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb5f163e18e85fccf040281518e2bb04ed3af6d8542d59d802297affdc23a6e4
3
+ size 191453
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf85df85ac87760d3117c6911e8713dacf4c0df3b705a14f35c5188bbf5c80d0
3
+ size 4856
wandb/debug-cli.james.log ADDED
File without changes
wandb/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20240214_112422-hfwsgqj3/files/config.yaml ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20240214_112422-hfwsgqj3/files/output.log ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: WARNING Serializing object of type dict that is 147552 bytes
2
+ wandb: WARNING Serializing object of type dict that is 147552 bytes
3
+ wandb: WARNING Serializing object of type dict that is 147552 bytes
4
+ wandb: WARNING Serializing object of type dict that is 147552 bytes
5
+ [{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
6
+ Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:
7
+ - classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([3872, 1536]) in the model instantiated
8
+ - classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3872]) in the model instantiated
9
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
10
+ wandb: WARNING Serializing object of type dict that is 147552 bytes
11
+ wandb: WARNING Serializing object of type dict that is 147552 bytes
12
+ [{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
13
+ [{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
14
+ [{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
15
+ Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:
16
+ - classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([3872, 1536]) in the model instantiated
17
+ - classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3872]) in the model instantiated
18
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
19
+ wandb: WARNING Serializing object of type dict that is 147552 bytes
20
+ wandb: WARNING Serializing object of type dict that is 147552 bytes
21
+ [{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
22
+ [{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
23
+ [{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
24
+ [{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
25
+ [{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
26
+ [{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
27
+ [{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
28
+ Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:
29
+ - classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([3872, 1536]) in the model instantiated
30
+ - classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3872]) in the model instantiated
31
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
32
+ wandb: WARNING Serializing object of type dict that is 147552 bytes
33
+ wandb: WARNING Serializing object of type dict that is 147552 bytes
wandb/run-20240214_112422-hfwsgqj3/files/requirements.txt ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.27.2
2
+ aiohttp==3.9.3
3
+ aiosignal==1.3.1
4
+ alabaster==0.7.16
5
+ anyio==4.2.0
6
+ appdirs==1.4.4
7
+ argon2-cffi-bindings==21.2.0
8
+ argon2-cffi==23.1.0
9
+ arrow==1.3.0
10
+ artifact-classification==0.0.1
11
+ asttokens==2.4.1
12
+ async-lru==2.0.4
13
+ async-timeout==4.0.3
14
+ attrs==23.2.0
15
+ babel==2.14.0
16
+ beautifulsoup4==4.12.3
17
+ black==24.1.1
18
+ bleach==6.1.0
19
+ certifi==2024.2.2
20
+ cffi==1.16.0
21
+ charset-normalizer==3.3.2
22
+ click==8.1.7
23
+ comm==0.2.1
24
+ contourpy==1.2.0
25
+ coverage==7.4.1
26
+ cycler==0.12.1
27
+ datasets==2.17.0
28
+ debugpy==1.8.0
29
+ decorator==5.1.1
30
+ defusedxml==0.7.1
31
+ dill==0.3.8
32
+ docker-pycreds==0.4.0
33
+ docutils==0.20.1
34
+ easydict==1.11
35
+ et-xmlfile==1.1.0
36
+ evaluate==0.4.1
37
+ exceptiongroup==1.2.0
38
+ executing==2.0.1
39
+ fastjsonschema==2.19.1
40
+ filelock==3.13.1
41
+ flake8==7.0.0
42
+ fonttools==4.48.1
43
+ fqdn==1.5.1
44
+ frozenlist==1.4.1
45
+ fsspec==2023.10.0
46
+ gdown==5.1.0
47
+ gitdb==4.0.11
48
+ gitpython==3.1.41
49
+ h11==0.14.0
50
+ httpcore==1.0.2
51
+ httpx==0.26.0
52
+ huggingface-hub==0.20.3
53
+ idna==3.6
54
+ imagesize==1.4.1
55
+ ipykernel==6.29.2
56
+ ipython==8.21.0
57
+ ipywidgets==8.1.2
58
+ isoduration==20.11.0
59
+ isort==5.13.2
60
+ jedi==0.19.1
61
+ jinja2==3.1.3
62
+ joblib==1.3.2
63
+ json5==0.9.14
64
+ jsonpointer==2.4
65
+ jsonschema-specifications==2023.12.1
66
+ jsonschema==4.21.1
67
+ jupyter-client==8.6.0
68
+ jupyter-console==6.6.3
69
+ jupyter-core==5.7.1
70
+ jupyter-events==0.9.0
71
+ jupyter-lsp==2.2.2
72
+ jupyter-server-terminals==0.5.2
73
+ jupyter-server==2.12.5
74
+ jupyter==1.0.0
75
+ jupyterlab-pygments==0.3.0
76
+ jupyterlab-server==2.25.2
77
+ jupyterlab-widgets==3.0.10
78
+ jupyterlab==4.1.1
79
+ kiwisolver==1.4.5
80
+ kornia==0.7.1
81
+ loguru==0.7.2
82
+ markupsafe==2.1.5
83
+ matplotlib-inline==0.1.6
84
+ matplotlib==3.8.2
85
+ mccabe==0.7.0
86
+ mistune==3.0.2
87
+ mpmath==1.3.0
88
+ multidict==6.0.5
89
+ multiprocess==0.70.16
90
+ mypy-extensions==1.0.0
91
+ nbclient==0.9.0
92
+ nbconvert==7.16.0
93
+ nbformat==5.9.2
94
+ nest-asyncio==1.6.0
95
+ networkx==3.2.1
96
+ notebook-shim==0.2.3
97
+ notebook==7.1.0
98
+ numpy==1.26.4
99
+ nvidia-cublas-cu12==12.1.3.1
100
+ nvidia-cuda-cupti-cu12==12.1.105
101
+ nvidia-cuda-nvrtc-cu12==12.1.105
102
+ nvidia-cuda-runtime-cu12==12.1.105
103
+ nvidia-cudnn-cu12==8.9.2.26
104
+ nvidia-cufft-cu12==11.0.2.54
105
+ nvidia-curand-cu12==10.3.2.106
106
+ nvidia-cusolver-cu12==11.4.5.107
107
+ nvidia-cusparse-cu12==12.1.0.106
108
+ nvidia-nccl-cu12==2.19.3
109
+ nvidia-nvjitlink-cu12==12.3.101
110
+ nvidia-nvtx-cu12==12.1.105
111
+ opencv-python==4.9.0.80
112
+ openpyxl==3.1.2
113
+ overrides==7.7.0
114
+ packaging==23.2
115
+ pandas==2.2.0
116
+ pandocfilters==1.5.1
117
+ parso==0.8.3
118
+ pathspec==0.12.1
119
+ pexpect==4.9.0
120
+ pillow==10.2.0
121
+ pip==24.0
122
+ platformdirs==4.2.0
123
+ prometheus-client==0.19.0
124
+ prompt-toolkit==3.0.43
125
+ protobuf==4.25.2
126
+ psutil==5.9.8
127
+ ptyprocess==0.7.0
128
+ pure-eval==0.2.2
129
+ pyarrow-hotfix==0.6
130
+ pyarrow==15.0.0
131
+ pycodestyle==2.11.1
132
+ pycparser==2.21
133
+ pyflakes==3.2.0
134
+ pygments==2.17.2
135
+ pyparsing==3.1.1
136
+ pysocks==1.7.1
137
+ python-dateutil==2.8.2
138
+ python-dotenv==1.0.1
139
+ python-json-logger==2.0.7
140
+ pytz==2024.1
141
+ pyyaml==6.0.1
142
+ pyzmq==25.1.2
143
+ qtconsole==5.5.1
144
+ qtpy==2.4.1
145
+ referencing==0.33.0
146
+ regex==2023.12.25
147
+ requests==2.31.0
148
+ responses==0.18.0
149
+ rfc3339-validator==0.1.4
150
+ rfc3986-validator==0.1.1
151
+ rpds-py==0.17.1
152
+ safetensors==0.4.2
153
+ scikit-learn==1.4.0
154
+ scipy==1.12.0
155
+ send2trash==1.8.2
156
+ sentry-sdk==1.40.4
157
+ setproctitle==1.3.3
158
+ setuptools==69.1.0
159
+ six==1.16.0
160
+ smmap==5.0.1
161
+ sniffio==1.3.0
162
+ snowballstemmer==2.2.0
163
+ soupsieve==2.5
164
+ sphinx==7.2.6
165
+ sphinxcontrib-applehelp==1.0.8
166
+ sphinxcontrib-devhelp==1.0.6
167
+ sphinxcontrib-htmlhelp==2.0.5
168
+ sphinxcontrib-jsmath==1.0.1
169
+ sphinxcontrib-qthelp==1.0.7
170
+ sphinxcontrib-serializinghtml==1.1.10
171
+ stack-data==0.6.3
172
+ sympy==1.12
173
+ terminado==0.18.0
174
+ threadpoolctl==3.2.0
175
+ timm==0.9.12
176
+ tinycss2==1.2.1
177
+ tokenizers==0.15.2
178
+ tomli==2.0.1
179
+ torch==2.2.0
180
+ torchvision==0.17.0
181
+ tornado==6.4
182
+ tqdm==4.66.1
183
+ traitlets==5.14.1
184
+ transformers==4.37.2
185
+ transparent-background==1.2.12
186
+ triton==2.2.0
187
+ typer==0.9.0
188
+ types-python-dateutil==2.8.19.20240106
189
+ typing-extensions==4.9.0
190
+ tzdata==2023.4
191
+ uri-template==1.3.0
192
+ urllib3==2.2.0
193
+ wandb==0.16.3
194
+ wcwidth==0.2.13
195
+ webcolors==1.13
196
+ webencodings==0.5.1
197
+ websocket-client==1.7.0
198
+ wget==3.2
199
+ wheel==0.42.0
200
+ widgetsnbextension==4.0.10
201
+ xxhash==3.4.1
202
+ yarl==1.9.4