End of training
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- .gitkeep +0 -0
- 0.1-testing.ipynb +0 -0
- 0.10-rethinking_OM_splits.ipynb +770 -0
- 0.11-testing_bm_split_sizes.ipynb +644 -0
- 0.12-get_wandb_results.ipynb +0 -0
- 0.13-bm_dates_col.ipynb +0 -0
- 0.2-testing_image_scraping.ipynb +140 -0
- 0.3-testing_csv_join.ipynb +973 -0
- 0.4-testing_tif_images.ipynb +71 -0
- 0.5-testing_transparent_background.ipynb +321 -0
- 0.7Mahnaz-efficientnet.ipynb +492 -0
- 0.8-testing_segmented_data.ipynb +0 -0
- 0.9-testing_om_datasets.ipynb +459 -0
- 1.0-checking_dataset_size.ipynb +559 -0
- 1.1-exploring_OM_image_matching.ipynb +0 -0
- 2.0-assessing_OM_dataset.ipynb +1468 -0
- 3.0-efficientnet_example.ipynb +1062 -0
- 4.0-assessing_BM_dataset.ipynb +0 -0
- 5.0-assessing_date_prediction.ipynb +0 -0
- README.md +47 -0
- config.json +88 -0
- material_min3.csv +13 -0
- material_min3_max1.csv +13 -0
- material_min4.csv +13 -0
- material_min4_max1.csv +13 -0
- material_min5.csv +13 -0
- material_min5_max1.csv +13 -0
- material_min6.csv +13 -0
- material_min6_max1.csv +13 -0
- material_x_plus3Ds.csv +13 -0
- model.safetensors +3 -0
- object_name_min3.csv +1 -0
- object_name_min3_max1.csv +1 -0
- object_name_min4.csv +19 -0
- object_name_min4_max1.csv +19 -0
- object_name_min5.csv +1 -0
- object_name_min5_max1.csv +1 -0
- object_name_min6.csv +1 -0
- object_name_min6_max1.csv +1 -0
- preprocessor_config.json +29 -0
- results.pkl +3 -0
- results_local.pkl +3 -0
- training_args.bin +3 -0
- wandb/debug-cli.james.log +0 -0
- wandb/debug-internal.log +0 -0
- wandb/debug.log +0 -0
- wandb/run-20240214_112422-hfwsgqj3/files/config.yaml +0 -0
- wandb/run-20240214_112422-hfwsgqj3/files/output.log +33 -0
- wandb/run-20240214_112422-hfwsgqj3/files/requirements.txt +202 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
wandb/run-20240214_112422-hfwsgqj3/run-hfwsgqj3.wandb filter=lfs diff=lfs merge=lfs -text
|
.gitkeep
ADDED
File without changes
|
0.1-testing.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
0.10-rethinking_OM_splits.ipynb
ADDED
@@ -0,0 +1,770 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd\n",
|
10 |
+
"import os\n",
|
11 |
+
"from sklearn.model_selection import train_test_split"
|
12 |
+
]
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"cell_type": "code",
|
16 |
+
"execution_count": 2,
|
17 |
+
"metadata": {},
|
18 |
+
"outputs": [],
|
19 |
+
"source": [
|
20 |
+
"file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n",
|
21 |
+
"obj2info = pd.read_csv(\"../data/processed/OM_obj_to_info.csv\")\n",
|
22 |
+
"\n",
|
23 |
+
"\n",
|
24 |
+
"# Could eventually do something with these columns, but need cleaning first\n",
|
25 |
+
"obj2info.drop(\n",
|
26 |
+
" columns=[\"number_of_parts\", \"production.date.start\", \"production.date.end\", \"obj_num_old\"],\n",
|
27 |
+
" inplace=True,\n",
|
28 |
+
")\n",
|
29 |
+
"\n",
|
30 |
+
"file2obj[\"image\"] = file2obj.apply(lambda x: os.path.join(x[\"root\"], x[\"file\"]), axis=1)\n",
|
31 |
+
"# file2obj.rename(columns={\"obj_num\": \"label\"}, inplace=True)\n",
|
32 |
+
"\n",
|
33 |
+
"join_df = file2obj[[\"obj_num\", \"file\", \"image\", \"root\"]].merge(\n",
|
34 |
+
" obj2info, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n",
|
35 |
+
")"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"cell_type": "code",
|
40 |
+
"execution_count": 3,
|
41 |
+
"metadata": {},
|
42 |
+
"outputs": [
|
43 |
+
{
|
44 |
+
"data": {
|
45 |
+
"text/plain": [
|
46 |
+
"0 data/raw/images/fulling_mill/1985\n",
|
47 |
+
"1 data/raw/images/fulling_mill/1985\n",
|
48 |
+
"2 data/raw/images/fulling_mill/1985\n",
|
49 |
+
"3 data/raw/images/fulling_mill/1985\n",
|
50 |
+
"4 data/raw/images/fulling_mill/1985\n",
|
51 |
+
" ... \n",
|
52 |
+
"37300 data/raw/images/egyptian/2014\n",
|
53 |
+
"37301 data/raw/images/egyptian/2014\n",
|
54 |
+
"37302 data/raw/images/egyptian/2014\n",
|
55 |
+
"37303 data/raw/images/egyptian/1963\n",
|
56 |
+
"37304 data/raw/images/egyptian/1963\n",
|
57 |
+
"Name: root, Length: 37305, dtype: object"
|
58 |
+
]
|
59 |
+
},
|
60 |
+
"execution_count": 3,
|
61 |
+
"metadata": {},
|
62 |
+
"output_type": "execute_result"
|
63 |
+
}
|
64 |
+
],
|
65 |
+
"source": [
|
66 |
+
"file2obj[\"root\"]"
|
67 |
+
]
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"cell_type": "code",
|
71 |
+
"execution_count": 4,
|
72 |
+
"metadata": {},
|
73 |
+
"outputs": [
|
74 |
+
{
|
75 |
+
"data": {
|
76 |
+
"text/html": [
|
77 |
+
"<div>\n",
|
78 |
+
"<style scoped>\n",
|
79 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
80 |
+
" vertical-align: middle;\n",
|
81 |
+
" }\n",
|
82 |
+
"\n",
|
83 |
+
" .dataframe tbody tr th {\n",
|
84 |
+
" vertical-align: top;\n",
|
85 |
+
" }\n",
|
86 |
+
"\n",
|
87 |
+
" .dataframe thead th {\n",
|
88 |
+
" text-align: right;\n",
|
89 |
+
" }\n",
|
90 |
+
"</style>\n",
|
91 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
92 |
+
" <thead>\n",
|
93 |
+
" <tr style=\"text-align: right;\">\n",
|
94 |
+
" <th></th>\n",
|
95 |
+
" <th>obj_num</th>\n",
|
96 |
+
" <th>description</th>\n",
|
97 |
+
" <th>object_name</th>\n",
|
98 |
+
" <th>other_name</th>\n",
|
99 |
+
" <th>material</th>\n",
|
100 |
+
" <th>production.period</th>\n",
|
101 |
+
" <th>production.place</th>\n",
|
102 |
+
" </tr>\n",
|
103 |
+
" </thead>\n",
|
104 |
+
" <tbody>\n",
|
105 |
+
" <tr>\n",
|
106 |
+
" <th>0</th>\n",
|
107 |
+
" <td>eg3</td>\n",
|
108 |
+
" <td>squat shouldered jar, no rim</td>\n",
|
109 |
+
" <td>bowls</td>\n",
|
110 |
+
" <td>bowl</td>\n",
|
111 |
+
" <td>limestone</td>\n",
|
112 |
+
" <td>1st Dynasty</td>\n",
|
113 |
+
" <td>Egypt</td>\n",
|
114 |
+
" </tr>\n",
|
115 |
+
" <tr>\n",
|
116 |
+
" <th>1</th>\n",
|
117 |
+
" <td>eg64</td>\n",
|
118 |
+
" <td>axe-head</td>\n",
|
119 |
+
" <td>axes: woodworking tools</td>\n",
|
120 |
+
" <td>axe-head</td>\n",
|
121 |
+
" <td>granite</td>\n",
|
122 |
+
" <td>NaN</td>\n",
|
123 |
+
" <td>Egypt</td>\n",
|
124 |
+
" </tr>\n",
|
125 |
+
" <tr>\n",
|
126 |
+
" <th>2</th>\n",
|
127 |
+
" <td>eg71</td>\n",
|
128 |
+
" <td>the working end of a fish tail knife with pres...</td>\n",
|
129 |
+
" <td>knives</td>\n",
|
130 |
+
" <td>knife</td>\n",
|
131 |
+
" <td>Flint/Chert</td>\n",
|
132 |
+
" <td>Naqada II</td>\n",
|
133 |
+
" <td>Egypt</td>\n",
|
134 |
+
" </tr>\n",
|
135 |
+
" <tr>\n",
|
136 |
+
" <th>3</th>\n",
|
137 |
+
" <td>eg75</td>\n",
|
138 |
+
" <td>seated figure of priest holding unrolled papyr...</td>\n",
|
139 |
+
" <td>Human Figurine</td>\n",
|
140 |
+
" <td>imhotep figurine</td>\n",
|
141 |
+
" <td>bronze</td>\n",
|
142 |
+
" <td>Late Period</td>\n",
|
143 |
+
" <td>Egypt</td>\n",
|
144 |
+
" </tr>\n",
|
145 |
+
" <tr>\n",
|
146 |
+
" <th>4</th>\n",
|
147 |
+
" <td>durom.1971.78</td>\n",
|
148 |
+
" <td>seated woman, inset eyes (lost), headdress had...</td>\n",
|
149 |
+
" <td>Human Figurine</td>\n",
|
150 |
+
" <td>Hathor figurine</td>\n",
|
151 |
+
" <td>bronze</td>\n",
|
152 |
+
" <td>Late Period</td>\n",
|
153 |
+
" <td>Egypt</td>\n",
|
154 |
+
" </tr>\n",
|
155 |
+
" <tr>\n",
|
156 |
+
" <th>...</th>\n",
|
157 |
+
" <td>...</td>\n",
|
158 |
+
" <td>...</td>\n",
|
159 |
+
" <td>...</td>\n",
|
160 |
+
" <td>...</td>\n",
|
161 |
+
" <td>...</td>\n",
|
162 |
+
" <td>...</td>\n",
|
163 |
+
" <td>...</td>\n",
|
164 |
+
" </tr>\n",
|
165 |
+
" <tr>\n",
|
166 |
+
" <th>12349</th>\n",
|
167 |
+
" <td>durma.2020.3.2562</td>\n",
|
168 |
+
" <td>A silver Roman coin which is a part of the Pie...</td>\n",
|
169 |
+
" <td>coins</td>\n",
|
170 |
+
" <td>NaN</td>\n",
|
171 |
+
" <td>metal</td>\n",
|
172 |
+
" <td>Roman</td>\n",
|
173 |
+
" <td>Rome</td>\n",
|
174 |
+
" </tr>\n",
|
175 |
+
" <tr>\n",
|
176 |
+
" <th>12350</th>\n",
|
177 |
+
" <td>durma.2020.3.2060</td>\n",
|
178 |
+
" <td>A silver Roman coin which is a part of the Pie...</td>\n",
|
179 |
+
" <td>coins</td>\n",
|
180 |
+
" <td>NaN</td>\n",
|
181 |
+
" <td>metal</td>\n",
|
182 |
+
" <td>Roman</td>\n",
|
183 |
+
" <td>NaN</td>\n",
|
184 |
+
" </tr>\n",
|
185 |
+
" <tr>\n",
|
186 |
+
" <th>12351</th>\n",
|
187 |
+
" <td>durma.2020.3.1446</td>\n",
|
188 |
+
" <td>A silver Roman coin which is a part of the Pie...</td>\n",
|
189 |
+
" <td>coins</td>\n",
|
190 |
+
" <td>NaN</td>\n",
|
191 |
+
" <td>metal</td>\n",
|
192 |
+
" <td>Roman</td>\n",
|
193 |
+
" <td>Rome</td>\n",
|
194 |
+
" </tr>\n",
|
195 |
+
" <tr>\n",
|
196 |
+
" <th>12352</th>\n",
|
197 |
+
" <td>durma.2020.3.2042</td>\n",
|
198 |
+
" <td>A silver Roman coin which is a part of the Pie...</td>\n",
|
199 |
+
" <td>coins</td>\n",
|
200 |
+
" <td>NaN</td>\n",
|
201 |
+
" <td>metal</td>\n",
|
202 |
+
" <td>Roman</td>\n",
|
203 |
+
" <td>Rome</td>\n",
|
204 |
+
" </tr>\n",
|
205 |
+
" <tr>\n",
|
206 |
+
" <th>12353</th>\n",
|
207 |
+
" <td>durma.2020.3.2072</td>\n",
|
208 |
+
" <td>A silver Roman coin which is a part of the Pie...</td>\n",
|
209 |
+
" <td>coins</td>\n",
|
210 |
+
" <td>NaN</td>\n",
|
211 |
+
" <td>metal</td>\n",
|
212 |
+
" <td>Roman</td>\n",
|
213 |
+
" <td>Rome</td>\n",
|
214 |
+
" </tr>\n",
|
215 |
+
" </tbody>\n",
|
216 |
+
"</table>\n",
|
217 |
+
"<p>11673 rows × 7 columns</p>\n",
|
218 |
+
"</div>"
|
219 |
+
],
|
220 |
+
"text/plain": [
|
221 |
+
" obj_num description \\\n",
|
222 |
+
"0 eg3 squat shouldered jar, no rim \n",
|
223 |
+
"1 eg64 axe-head \n",
|
224 |
+
"2 eg71 the working end of a fish tail knife with pres... \n",
|
225 |
+
"3 eg75 seated figure of priest holding unrolled papyr... \n",
|
226 |
+
"4 durom.1971.78 seated woman, inset eyes (lost), headdress had... \n",
|
227 |
+
"... ... ... \n",
|
228 |
+
"12349 durma.2020.3.2562 A silver Roman coin which is a part of the Pie... \n",
|
229 |
+
"12350 durma.2020.3.2060 A silver Roman coin which is a part of the Pie... \n",
|
230 |
+
"12351 durma.2020.3.1446 A silver Roman coin which is a part of the Pie... \n",
|
231 |
+
"12352 durma.2020.3.2042 A silver Roman coin which is a part of the Pie... \n",
|
232 |
+
"12353 durma.2020.3.2072 A silver Roman coin which is a part of the Pie... \n",
|
233 |
+
"\n",
|
234 |
+
" object_name other_name material \\\n",
|
235 |
+
"0 bowls bowl limestone \n",
|
236 |
+
"1 axes: woodworking tools axe-head granite \n",
|
237 |
+
"2 knives knife Flint/Chert \n",
|
238 |
+
"3 Human Figurine imhotep figurine bronze \n",
|
239 |
+
"4 Human Figurine Hathor figurine bronze \n",
|
240 |
+
"... ... ... ... \n",
|
241 |
+
"12349 coins NaN metal \n",
|
242 |
+
"12350 coins NaN metal \n",
|
243 |
+
"12351 coins NaN metal \n",
|
244 |
+
"12352 coins NaN metal \n",
|
245 |
+
"12353 coins NaN metal \n",
|
246 |
+
"\n",
|
247 |
+
" production.period production.place \n",
|
248 |
+
"0 1st Dynasty Egypt \n",
|
249 |
+
"1 NaN Egypt \n",
|
250 |
+
"2 Naqada II Egypt \n",
|
251 |
+
"3 Late Period Egypt \n",
|
252 |
+
"4 Late Period Egypt \n",
|
253 |
+
"... ... ... \n",
|
254 |
+
"12349 Roman Rome \n",
|
255 |
+
"12350 Roman NaN \n",
|
256 |
+
"12351 Roman Rome \n",
|
257 |
+
"12352 Roman Rome \n",
|
258 |
+
"12353 Roman Rome \n",
|
259 |
+
"\n",
|
260 |
+
"[11673 rows x 7 columns]"
|
261 |
+
]
|
262 |
+
},
|
263 |
+
"execution_count": 4,
|
264 |
+
"metadata": {},
|
265 |
+
"output_type": "execute_result"
|
266 |
+
}
|
267 |
+
],
|
268 |
+
"source": [
|
269 |
+
"obj2info.dropna(subset=[\"material\", \"description\"], inplace=False)"
|
270 |
+
]
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"cell_type": "code",
|
274 |
+
"execution_count": 5,
|
275 |
+
"metadata": {},
|
276 |
+
"outputs": [],
|
277 |
+
"source": [
|
278 |
+
"label_col = \"material\"\n",
|
279 |
+
"\n",
|
280 |
+
"o2i_lim = obj2info.dropna(subset=[label_col, \"description\"], inplace=False)\n",
|
281 |
+
"\n",
|
282 |
+
"num_counts = o2i_lim[label_col].value_counts()\n",
|
283 |
+
"for lower_lim in [3]:\n",
|
284 |
+
" o2i_lim = o2i_lim[o2i_lim[label_col].isin(num_counts[num_counts > lower_lim].index)]\n",
|
285 |
+
"train, val_test = train_test_split(\n",
|
286 |
+
" o2i_lim, stratify=o2i_lim[label_col], test_size=0.4, random_state=42\n",
|
287 |
+
")\n",
|
288 |
+
"val, test = train_test_split(\n",
|
289 |
+
" val_test, stratify=val_test[label_col], test_size=0.8, random_state=42\n",
|
290 |
+
")"
|
291 |
+
]
|
292 |
+
},
|
293 |
+
{
|
294 |
+
"cell_type": "code",
|
295 |
+
"execution_count": 6,
|
296 |
+
"metadata": {},
|
297 |
+
"outputs": [],
|
298 |
+
"source": [
|
299 |
+
"from datasets import Dataset, DatasetDict"
|
300 |
+
]
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"cell_type": "code",
|
304 |
+
"execution_count": 7,
|
305 |
+
"metadata": {},
|
306 |
+
"outputs": [],
|
307 |
+
"source": [
|
308 |
+
"ds = Dataset.from_pandas(join_df).to_pandas()"
|
309 |
+
]
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"cell_type": "code",
|
313 |
+
"execution_count": 9,
|
314 |
+
"metadata": {},
|
315 |
+
"outputs": [
|
316 |
+
{
|
317 |
+
"data": {
|
318 |
+
"text/html": [
|
319 |
+
"<div>\n",
|
320 |
+
"<style scoped>\n",
|
321 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
322 |
+
" vertical-align: middle;\n",
|
323 |
+
" }\n",
|
324 |
+
"\n",
|
325 |
+
" .dataframe tbody tr th {\n",
|
326 |
+
" vertical-align: top;\n",
|
327 |
+
" }\n",
|
328 |
+
"\n",
|
329 |
+
" .dataframe thead th {\n",
|
330 |
+
" text-align: right;\n",
|
331 |
+
" }\n",
|
332 |
+
"</style>\n",
|
333 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
334 |
+
" <thead>\n",
|
335 |
+
" <tr style=\"text-align: right;\">\n",
|
336 |
+
" <th></th>\n",
|
337 |
+
" <th>obj_num</th>\n",
|
338 |
+
" <th>file</th>\n",
|
339 |
+
" <th>image</th>\n",
|
340 |
+
" <th>root</th>\n",
|
341 |
+
" <th>description</th>\n",
|
342 |
+
" <th>object_name</th>\n",
|
343 |
+
" <th>other_name</th>\n",
|
344 |
+
" <th>material</th>\n",
|
345 |
+
" <th>production.period</th>\n",
|
346 |
+
" <th>production.place</th>\n",
|
347 |
+
" </tr>\n",
|
348 |
+
" </thead>\n",
|
349 |
+
" <tbody>\n",
|
350 |
+
" <tr>\n",
|
351 |
+
" <th>0</th>\n",
|
352 |
+
" <td>durma.1985.15.68</td>\n",
|
353 |
+
" <td>1985.15.68.jpg</td>\n",
|
354 |
+
" <td>data/raw/images/fulling_mill/1985/1985.15.68.jpg</td>\n",
|
355 |
+
" <td>data/raw/images/fulling_mill/1985</td>\n",
|
356 |
+
" <td>2 fragments of a bowl with open fret work at t...</td>\n",
|
357 |
+
" <td>None</td>\n",
|
358 |
+
" <td>Rim Sherds</td>\n",
|
359 |
+
" <td>pottery</td>\n",
|
360 |
+
" <td>Post-Medieval</td>\n",
|
361 |
+
" <td>None</td>\n",
|
362 |
+
" </tr>\n",
|
363 |
+
" <tr>\n",
|
364 |
+
" <th>1</th>\n",
|
365 |
+
" <td>durma.1985.52.37</td>\n",
|
366 |
+
" <td>1985.52.37.ff2.jpg</td>\n",
|
367 |
+
" <td>data/raw/images/fulling_mill/1985/1985.52.37.f...</td>\n",
|
368 |
+
" <td>data/raw/images/fulling_mill/1985</td>\n",
|
369 |
+
" <td>Reconstructed small vessel (many pieces with s...</td>\n",
|
370 |
+
" <td>pottery</td>\n",
|
371 |
+
" <td>Pottery</td>\n",
|
372 |
+
" <td>pottery</td>\n",
|
373 |
+
" <td>Roman</td>\n",
|
374 |
+
" <td>None</td>\n",
|
375 |
+
" </tr>\n",
|
376 |
+
" <tr>\n",
|
377 |
+
" <th>2</th>\n",
|
378 |
+
" <td>durma.1985.81.4496</td>\n",
|
379 |
+
" <td>1985.81.4496 d2.jpg</td>\n",
|
380 |
+
" <td>data/raw/images/fulling_mill/1985/1985.81.4496...</td>\n",
|
381 |
+
" <td>data/raw/images/fulling_mill/1985</td>\n",
|
382 |
+
" <td>Fragment of a Samian beaker. Panell decoration...</td>\n",
|
383 |
+
" <td>vessels</td>\n",
|
384 |
+
" <td>pottery</td>\n",
|
385 |
+
" <td>pottery</td>\n",
|
386 |
+
" <td>Roman</td>\n",
|
387 |
+
" <td>None</td>\n",
|
388 |
+
" </tr>\n",
|
389 |
+
" <tr>\n",
|
390 |
+
" <th>3</th>\n",
|
391 |
+
" <td>durma.1985.9.1</td>\n",
|
392 |
+
" <td>1985.9.1.1-d4.jpg</td>\n",
|
393 |
+
" <td>data/raw/images/fulling_mill/1985/1985.9.1.1-d...</td>\n",
|
394 |
+
" <td>data/raw/images/fulling_mill/1985</td>\n",
|
395 |
+
" <td>2 Fragmentary Saxon Cinerary Urns + 1 relative...</td>\n",
|
396 |
+
" <td>None</td>\n",
|
397 |
+
" <td>Cinerary Urns</td>\n",
|
398 |
+
" <td>pottery</td>\n",
|
399 |
+
" <td>Saxon</td>\n",
|
400 |
+
" <td>None</td>\n",
|
401 |
+
" </tr>\n",
|
402 |
+
" <tr>\n",
|
403 |
+
" <th>4</th>\n",
|
404 |
+
" <td>durma.1985.52.37</td>\n",
|
405 |
+
" <td>1985.52.37.sf2.jpg</td>\n",
|
406 |
+
" <td>data/raw/images/fulling_mill/1985/1985.52.37.s...</td>\n",
|
407 |
+
" <td>data/raw/images/fulling_mill/1985</td>\n",
|
408 |
+
" <td>Reconstructed small vessel (many pieces with s...</td>\n",
|
409 |
+
" <td>pottery</td>\n",
|
410 |
+
" <td>Pottery</td>\n",
|
411 |
+
" <td>pottery</td>\n",
|
412 |
+
" <td>Roman</td>\n",
|
413 |
+
" <td>None</td>\n",
|
414 |
+
" </tr>\n",
|
415 |
+
" <tr>\n",
|
416 |
+
" <th>...</th>\n",
|
417 |
+
" <td>...</td>\n",
|
418 |
+
" <td>...</td>\n",
|
419 |
+
" <td>...</td>\n",
|
420 |
+
" <td>...</td>\n",
|
421 |
+
" <td>...</td>\n",
|
422 |
+
" <td>...</td>\n",
|
423 |
+
" <td>...</td>\n",
|
424 |
+
" <td>...</td>\n",
|
425 |
+
" <td>...</td>\n",
|
426 |
+
" <td>...</td>\n",
|
427 |
+
" </tr>\n",
|
428 |
+
" <tr>\n",
|
429 |
+
" <th>37300</th>\n",
|
430 |
+
" <td>durom.2014.1.2</td>\n",
|
431 |
+
" <td>2014.1.2 bb.jpg</td>\n",
|
432 |
+
" <td>data/raw/images/egyptian/2014/2014.1.2 bb.jpg</td>\n",
|
433 |
+
" <td>data/raw/images/egyptian/2014</td>\n",
|
434 |
+
" <td>One of a collection of 162 flint tools. Brown,...</td>\n",
|
435 |
+
" <td>blades</td>\n",
|
436 |
+
" <td>None</td>\n",
|
437 |
+
" <td>Flint/Chert</td>\n",
|
438 |
+
" <td>Neolithic Period</td>\n",
|
439 |
+
" <td>Egypt</td>\n",
|
440 |
+
" </tr>\n",
|
441 |
+
" <tr>\n",
|
442 |
+
" <th>37301</th>\n",
|
443 |
+
" <td>durom.2014.1.71</td>\n",
|
444 |
+
" <td>2014.1.71 ll.jpg</td>\n",
|
445 |
+
" <td>data/raw/images/egyptian/2014/2014.1.71 ll.jpg</td>\n",
|
446 |
+
" <td>data/raw/images/egyptian/2014</td>\n",
|
447 |
+
" <td>One of a collection of 162 flint tools. Large,...</td>\n",
|
448 |
+
" <td>axes: woodworking tools</td>\n",
|
449 |
+
" <td>None</td>\n",
|
450 |
+
" <td>Flint/Chert</td>\n",
|
451 |
+
" <td>Neolithic Period</td>\n",
|
452 |
+
" <td>Egypt</td>\n",
|
453 |
+
" </tr>\n",
|
454 |
+
" <tr>\n",
|
455 |
+
" <th>37302</th>\n",
|
456 |
+
" <td>durom.2014.1.2</td>\n",
|
457 |
+
" <td>2014.1.2 rr.jpg</td>\n",
|
458 |
+
" <td>data/raw/images/egyptian/2014/2014.1.2 rr.jpg</td>\n",
|
459 |
+
" <td>data/raw/images/egyptian/2014</td>\n",
|
460 |
+
" <td>One of a collection of 162 flint tools. Brown,...</td>\n",
|
461 |
+
" <td>blades</td>\n",
|
462 |
+
" <td>None</td>\n",
|
463 |
+
" <td>Flint/Chert</td>\n",
|
464 |
+
" <td>Neolithic Period</td>\n",
|
465 |
+
" <td>Egypt</td>\n",
|
466 |
+
" </tr>\n",
|
467 |
+
" <tr>\n",
|
468 |
+
" <th>37303</th>\n",
|
469 |
+
" <td>durom.1963.4</td>\n",
|
470 |
+
" <td>1963.4.jpg</td>\n",
|
471 |
+
" <td>data/raw/images/egyptian/1963/1963.4.jpg</td>\n",
|
472 |
+
" <td>data/raw/images/egyptian/1963</td>\n",
|
473 |
+
" <td>The woman is dressed in Qing dynasty style and...</td>\n",
|
474 |
+
" <td>figures</td>\n",
|
475 |
+
" <td>牙雕母婴像</td>\n",
|
476 |
+
" <td>ivory</td>\n",
|
477 |
+
" <td>late Qing dynasty</td>\n",
|
478 |
+
" <td>China</td>\n",
|
479 |
+
" </tr>\n",
|
480 |
+
" <tr>\n",
|
481 |
+
" <th>37304</th>\n",
|
482 |
+
" <td>durom.1963.4</td>\n",
|
483 |
+
" <td>1963.4.2.jpg</td>\n",
|
484 |
+
" <td>data/raw/images/egyptian/1963/1963.4.2.jpg</td>\n",
|
485 |
+
" <td>data/raw/images/egyptian/1963</td>\n",
|
486 |
+
" <td>The woman is dressed in Qing dynasty style and...</td>\n",
|
487 |
+
" <td>figures</td>\n",
|
488 |
+
" <td>牙雕母婴像</td>\n",
|
489 |
+
" <td>ivory</td>\n",
|
490 |
+
" <td>late Qing dynasty</td>\n",
|
491 |
+
" <td>China</td>\n",
|
492 |
+
" </tr>\n",
|
493 |
+
" </tbody>\n",
|
494 |
+
"</table>\n",
|
495 |
+
"<p>37305 rows × 10 columns</p>\n",
|
496 |
+
"</div>"
|
497 |
+
],
|
498 |
+
"text/plain": [
|
499 |
+
" obj_num file \\\n",
|
500 |
+
"0 durma.1985.15.68 1985.15.68.jpg \n",
|
501 |
+
"1 durma.1985.52.37 1985.52.37.ff2.jpg \n",
|
502 |
+
"2 durma.1985.81.4496 1985.81.4496 d2.jpg \n",
|
503 |
+
"3 durma.1985.9.1 1985.9.1.1-d4.jpg \n",
|
504 |
+
"4 durma.1985.52.37 1985.52.37.sf2.jpg \n",
|
505 |
+
"... ... ... \n",
|
506 |
+
"37300 durom.2014.1.2 2014.1.2 bb.jpg \n",
|
507 |
+
"37301 durom.2014.1.71 2014.1.71 ll.jpg \n",
|
508 |
+
"37302 durom.2014.1.2 2014.1.2 rr.jpg \n",
|
509 |
+
"37303 durom.1963.4 1963.4.jpg \n",
|
510 |
+
"37304 durom.1963.4 1963.4.2.jpg \n",
|
511 |
+
"\n",
|
512 |
+
" image \\\n",
|
513 |
+
"0 data/raw/images/fulling_mill/1985/1985.15.68.jpg \n",
|
514 |
+
"1 data/raw/images/fulling_mill/1985/1985.52.37.f... \n",
|
515 |
+
"2 data/raw/images/fulling_mill/1985/1985.81.4496... \n",
|
516 |
+
"3 data/raw/images/fulling_mill/1985/1985.9.1.1-d... \n",
|
517 |
+
"4 data/raw/images/fulling_mill/1985/1985.52.37.s... \n",
|
518 |
+
"... ... \n",
|
519 |
+
"37300 data/raw/images/egyptian/2014/2014.1.2 bb.jpg \n",
|
520 |
+
"37301 data/raw/images/egyptian/2014/2014.1.71 ll.jpg \n",
|
521 |
+
"37302 data/raw/images/egyptian/2014/2014.1.2 rr.jpg \n",
|
522 |
+
"37303 data/raw/images/egyptian/1963/1963.4.jpg \n",
|
523 |
+
"37304 data/raw/images/egyptian/1963/1963.4.2.jpg \n",
|
524 |
+
"\n",
|
525 |
+
" root \\\n",
|
526 |
+
"0 data/raw/images/fulling_mill/1985 \n",
|
527 |
+
"1 data/raw/images/fulling_mill/1985 \n",
|
528 |
+
"2 data/raw/images/fulling_mill/1985 \n",
|
529 |
+
"3 data/raw/images/fulling_mill/1985 \n",
|
530 |
+
"4 data/raw/images/fulling_mill/1985 \n",
|
531 |
+
"... ... \n",
|
532 |
+
"37300 data/raw/images/egyptian/2014 \n",
|
533 |
+
"37301 data/raw/images/egyptian/2014 \n",
|
534 |
+
"37302 data/raw/images/egyptian/2014 \n",
|
535 |
+
"37303 data/raw/images/egyptian/1963 \n",
|
536 |
+
"37304 data/raw/images/egyptian/1963 \n",
|
537 |
+
"\n",
|
538 |
+
" description \\\n",
|
539 |
+
"0 2 fragments of a bowl with open fret work at t... \n",
|
540 |
+
"1 Reconstructed small vessel (many pieces with s... \n",
|
541 |
+
"2 Fragment of a Samian beaker. Panell decoration... \n",
|
542 |
+
"3 2 Fragmentary Saxon Cinerary Urns + 1 relative... \n",
|
543 |
+
"4 Reconstructed small vessel (many pieces with s... \n",
|
544 |
+
"... ... \n",
|
545 |
+
"37300 One of a collection of 162 flint tools. Brown,... \n",
|
546 |
+
"37301 One of a collection of 162 flint tools. Large,... \n",
|
547 |
+
"37302 One of a collection of 162 flint tools. Brown,... \n",
|
548 |
+
"37303 The woman is dressed in Qing dynasty style and... \n",
|
549 |
+
"37304 The woman is dressed in Qing dynasty style and... \n",
|
550 |
+
"\n",
|
551 |
+
" object_name other_name material production.period \\\n",
|
552 |
+
"0 None Rim Sherds pottery Post-Medieval \n",
|
553 |
+
"1 pottery Pottery pottery Roman \n",
|
554 |
+
"2 vessels pottery pottery Roman \n",
|
555 |
+
"3 None Cinerary Urns pottery Saxon \n",
|
556 |
+
"4 pottery Pottery pottery Roman \n",
|
557 |
+
"... ... ... ... ... \n",
|
558 |
+
"37300 blades None Flint/Chert Neolithic Period \n",
|
559 |
+
"37301 axes: woodworking tools None Flint/Chert Neolithic Period \n",
|
560 |
+
"37302 blades None Flint/Chert Neolithic Period \n",
|
561 |
+
"37303 figures 牙雕母婴像 ivory late Qing dynasty \n",
|
562 |
+
"37304 figures 牙雕母婴像 ivory late Qing dynasty \n",
|
563 |
+
"\n",
|
564 |
+
" production.place \n",
|
565 |
+
"0 None \n",
|
566 |
+
"1 None \n",
|
567 |
+
"2 None \n",
|
568 |
+
"3 None \n",
|
569 |
+
"4 None \n",
|
570 |
+
"... ... \n",
|
571 |
+
"37300 Egypt \n",
|
572 |
+
"37301 Egypt \n",
|
573 |
+
"37302 Egypt \n",
|
574 |
+
"37303 China \n",
|
575 |
+
"37304 China \n",
|
576 |
+
"\n",
|
577 |
+
"[37305 rows x 10 columns]"
|
578 |
+
]
|
579 |
+
},
|
580 |
+
"execution_count": 9,
|
581 |
+
"metadata": {},
|
582 |
+
"output_type": "execute_result"
|
583 |
+
}
|
584 |
+
],
|
585 |
+
"source": [
|
586 |
+
"ds"
|
587 |
+
]
|
588 |
+
},
|
589 |
+
{
|
590 |
+
"cell_type": "code",
|
591 |
+
"execution_count": 10,
|
592 |
+
"metadata": {},
|
593 |
+
"outputs": [
|
594 |
+
{
|
595 |
+
"name": "stdout",
|
596 |
+
"output_type": "stream",
|
597 |
+
"text": [
|
598 |
+
"(6819, 7) (2370, 7) (2370, 7) (11559, 7)\n",
|
599 |
+
"(19246, 10) (6743, 10) (7078, 10) (37305, 10)\n"
|
600 |
+
]
|
601 |
+
}
|
602 |
+
],
|
603 |
+
"source": [
|
604 |
+
"index_col = \"obj_num\"\n",
|
605 |
+
"text_col = \"obj_num\"\n",
|
606 |
+
"label_col = \"material\"\n",
|
607 |
+
"lower_lim = 3\n",
|
608 |
+
"problem_type = \"image\"\n",
|
609 |
+
"\n",
|
610 |
+
"\n",
|
611 |
+
"o2i_lim = (\n",
|
612 |
+
" ds.drop_duplicates(subset=[index_col, label_col], inplace=False)\n",
|
613 |
+
" .dropna(subset=[text_col, label_col], inplace=False)\n",
|
614 |
+
" .drop(columns=[\"root\", \"file\", \"image\"], inplace=False)\n",
|
615 |
+
")\n",
|
616 |
+
"\n",
|
617 |
+
"\n",
|
618 |
+
"num_counts = o2i_lim[label_col].value_counts()\n",
|
619 |
+
"o2i_lim = o2i_lim[o2i_lim[label_col].isin(num_counts[num_counts > lower_lim].index)]\n",
|
620 |
+
"\n",
|
621 |
+
"train, val_test = train_test_split(\n",
|
622 |
+
" o2i_lim, stratify=o2i_lim[label_col], test_size=0.41, random_state=42\n",
|
623 |
+
")\n",
|
624 |
+
"val, test = train_test_split(\n",
|
625 |
+
" val_test, stratify=val_test[label_col], test_size=0.5, random_state=42\n",
|
626 |
+
")\n",
|
627 |
+
"print(train.shape, val.shape, test.shape, o2i_lim.shape)\n",
|
628 |
+
"\n",
|
629 |
+
"if problem_type == \"image\":\n",
|
630 |
+
" train = train.merge(\n",
|
631 |
+
" ds[[\"obj_num\", \"root\", \"file\", \"image\"]], left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n",
|
632 |
+
" )\n",
|
633 |
+
" val = val.merge(\n",
|
634 |
+
" ds[[\"obj_num\", \"root\", \"file\", \"image\"]], left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n",
|
635 |
+
" )\n",
|
636 |
+
" test = test.merge(\n",
|
637 |
+
" ds[[\"obj_num\", \"root\", \"file\", \"image\"]], left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n",
|
638 |
+
" )\n",
|
639 |
+
" print(train.shape, val.shape, test.shape, ds.shape)\n",
|
640 |
+
"\n",
|
641 |
+
"# ds_dict = DatasetDict({\"train\": Dataset.from_pandas(train), \"val\": Dataset.from_pandas(val), \"test\": Dataset.from_pandas(test)})\n",
|
642 |
+
"# ds_dict\n",
|
643 |
+
"\n",
|
644 |
+
"# if problem_type == \"image\":\n",
|
645 |
+
"\n",
|
646 |
+
"# o2i_lim_ds = o2i_lim_ds.train_test_split(test_size=0.3, stratify_by_column=label_col, seed=42)\n",
|
647 |
+
"# o2i_lim_ds_valtest = o2i_lim_ds[\"test\"].train_test_split(test_size=0.5, stratify_by_column=label_col, seed=42)\n",
|
648 |
+
"# o2i_lim_ds = DatasetDict({\"train\": o2i_lim_ds[\"train\"], \"val\": o2i_lim_ds_valtest[\"train\"], \"test\": o2i_lim_ds_valtest[\"test\"]})\n",
|
649 |
+
"\n",
|
650 |
+
"# if problem_type == \"image\":\n",
|
651 |
+
"# file2obj = ds[[\"obj_num\", \"file\", \"image\", \"root\"]].drop_duplicates(subset=[\"obj_num\"], inplace=False)\n",
|
652 |
+
"# train = o2i_lim_ds[\"train\"].merge(file2obj, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\")\n",
|
653 |
+
"# val = o2i_lim_ds[\"val\"].merge(file2obj, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\")\n",
|
654 |
+
"# test = o2i_lim_ds[\"test\"].merge(file2obj, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\")\n",
|
655 |
+
"# o2i_lim_ds = DatasetDict({\"train\": train, \"val\": val, \"test\": test})\n",
|
656 |
+
"# o2i_lim_ds"
|
657 |
+
]
|
658 |
+
},
|
659 |
+
{
|
660 |
+
"cell_type": "code",
|
661 |
+
"execution_count": null,
|
662 |
+
"metadata": {},
|
663 |
+
"outputs": [
|
664 |
+
{
|
665 |
+
"data": {
|
666 |
+
"text/html": [
|
667 |
+
"<div>\n",
|
668 |
+
"<style scoped>\n",
|
669 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
670 |
+
" vertical-align: middle;\n",
|
671 |
+
" }\n",
|
672 |
+
"\n",
|
673 |
+
" .dataframe tbody tr th {\n",
|
674 |
+
" vertical-align: top;\n",
|
675 |
+
" }\n",
|
676 |
+
"\n",
|
677 |
+
" .dataframe thead th {\n",
|
678 |
+
" text-align: right;\n",
|
679 |
+
" }\n",
|
680 |
+
"</style>\n",
|
681 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
682 |
+
" <thead>\n",
|
683 |
+
" <tr style=\"text-align: right;\">\n",
|
684 |
+
" <th></th>\n",
|
685 |
+
" <th>obj_num</th>\n",
|
686 |
+
" <th>description</th>\n",
|
687 |
+
" <th>object_name</th>\n",
|
688 |
+
" <th>other_name</th>\n",
|
689 |
+
" <th>material</th>\n",
|
690 |
+
" <th>production.period</th>\n",
|
691 |
+
" <th>production.place</th>\n",
|
692 |
+
" </tr>\n",
|
693 |
+
" </thead>\n",
|
694 |
+
" <tbody>\n",
|
695 |
+
" </tbody>\n",
|
696 |
+
"</table>\n",
|
697 |
+
"</div>"
|
698 |
+
],
|
699 |
+
"text/plain": [
|
700 |
+
"Empty DataFrame\n",
|
701 |
+
"Columns: [obj_num, description, object_name, other_name, material, production.period, production.place]\n",
|
702 |
+
"Index: []"
|
703 |
+
]
|
704 |
+
},
|
705 |
+
"execution_count": 10,
|
706 |
+
"metadata": {},
|
707 |
+
"output_type": "execute_result"
|
708 |
+
}
|
709 |
+
],
|
710 |
+
"source": [
|
711 |
+
"o2i_lim"
|
712 |
+
]
|
713 |
+
},
|
714 |
+
{
|
715 |
+
"cell_type": "code",
|
716 |
+
"execution_count": null,
|
717 |
+
"metadata": {},
|
718 |
+
"outputs": [],
|
719 |
+
"source": [
|
720 |
+
"cols_to_drop = [\"col1\", \"col2\", \"col3\"]\n",
|
721 |
+
"ds = ds.drop(cols_to_drop, axis=1, errors=\"ignore\")"
|
722 |
+
]
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"cell_type": "code",
|
726 |
+
"execution_count": null,
|
727 |
+
"metadata": {},
|
728 |
+
"outputs": [],
|
729 |
+
"source": [
|
730 |
+
"ds_lim = ds_all.dropna(subset=[\"image\", args.label_col], inplace=False)\n",
|
731 |
+
"if \"3D\" in args.dataset:\n",
|
732 |
+
" ds_lim = ds_all[ds_all[\"original\"]]\n",
|
733 |
+
"\n",
|
734 |
+
"num_counts = ds_lim[args.label_col].value_counts()\n",
|
735 |
+
"ds_lim = ds_lim[ds_lim[args.label_col].isin(num_counts[num_counts > args.lower_lim].index)]\n",
|
736 |
+
"\n",
|
737 |
+
"train, val_test = train_test_split(\n",
|
738 |
+
" ds_lim,\n",
|
739 |
+
" stratify=ds_lim[args.label_col],\n",
|
740 |
+
" test_size=2 * args.testset_size,\n",
|
741 |
+
" random_state=42,\n",
|
742 |
+
")\n",
|
743 |
+
"val, test = train_test_split(\n",
|
744 |
+
" val_test, stratify=val_test[args.label_col], test_size=0.5, random_state=42\n",
|
745 |
+
")"
|
746 |
+
]
|
747 |
+
}
|
748 |
+
],
|
749 |
+
"metadata": {
|
750 |
+
"kernelspec": {
|
751 |
+
"display_name": "ArtifactClassification",
|
752 |
+
"language": "python",
|
753 |
+
"name": "python3"
|
754 |
+
},
|
755 |
+
"language_info": {
|
756 |
+
"codemirror_mode": {
|
757 |
+
"name": "ipython",
|
758 |
+
"version": 3
|
759 |
+
},
|
760 |
+
"file_extension": ".py",
|
761 |
+
"mimetype": "text/x-python",
|
762 |
+
"name": "python",
|
763 |
+
"nbconvert_exporter": "python",
|
764 |
+
"pygments_lexer": "ipython3",
|
765 |
+
"version": "3.10.12"
|
766 |
+
}
|
767 |
+
},
|
768 |
+
"nbformat": 4,
|
769 |
+
"nbformat_minor": 2
|
770 |
+
}
|
0.11-testing_bm_split_sizes.ipynb
ADDED
@@ -0,0 +1,644 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd\n",
|
10 |
+
"import os\n",
|
11 |
+
"from sklearn.model_selection import train_test_split\n",
|
12 |
+
"from datasets import load_dataset\n",
|
13 |
+
"from artifact_classification.utils import ConfigLoader"
|
14 |
+
]
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"cell_type": "code",
|
18 |
+
"execution_count": 3,
|
19 |
+
"metadata": {},
|
20 |
+
"outputs": [
|
21 |
+
{
|
22 |
+
"name": "stdout",
|
23 |
+
"output_type": "stream",
|
24 |
+
"text": [
|
25 |
+
"Updating with:\n",
|
26 |
+
"{'config': 'testing', 'fast_dev_run': True, 'dataset': 'james-burton/BritishMuseum', 'wandb_proj_name': 'British Museum', 'model_base': 'google/efficientnet-b3', 'problem_type': 'image', 'lower_lim': 5, 'label_col': 'Object type'}\n",
|
27 |
+
"\n",
|
28 |
+
"\n",
|
29 |
+
"{'config': 'testing', 'fast_dev_run': True, 'do_train': True, 'do_predict': True, 'batch_size': 128, 'output_root': 'models/', 'num_epochs': 100, 'early_stopping_patience': 5, 'grad_accumulation_steps': 1, 'seed': 42, 'logging_steps': 10, 'lr_scheduler': 'linear', 'warmup_ratio': 0, 'weight_decay': 0, 'device': 'cuda', 'num_workers': 1, 'resume_from_checkpoint': False, 'predict_batch_size': 16, 'save_total_limit': 1, 'lr': 5e-05, 'pytorch2_0': True, 'max_length': 512, 'text_column': 'Description', 'fp16': True, 'testset_size': 0.1, 'dataset': 'james-burton/BritishMuseum', 'wandb_proj_name': 'British Museum', 'model_base': 'google/efficientnet-b3', 'problem_type': 'image', 'lower_lim': 5, 'label_col': 'Object type'}\n",
|
30 |
+
"\n"
|
31 |
+
]
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"data": {
|
35 |
+
"application/vnd.jupyter.widget-view+json": {
|
36 |
+
"model_id": "cca9a5e0c5f2487ea3ad65c183da9c90",
|
37 |
+
"version_major": 2,
|
38 |
+
"version_minor": 0
|
39 |
+
},
|
40 |
+
"text/plain": [
|
41 |
+
"Resolving data files: 0%| | 0/22 [00:00<?, ?it/s]"
|
42 |
+
]
|
43 |
+
},
|
44 |
+
"metadata": {},
|
45 |
+
"output_type": "display_data"
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"data": {
|
49 |
+
"application/vnd.jupyter.widget-view+json": {
|
50 |
+
"model_id": "431f0e5b4ec84c8693b5c8c18525f810",
|
51 |
+
"version_major": 2,
|
52 |
+
"version_minor": 0
|
53 |
+
},
|
54 |
+
"text/plain": [
|
55 |
+
"Resolving data files: 0%| | 0/22 [00:00<?, ?it/s]"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
"metadata": {},
|
59 |
+
"output_type": "display_data"
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"data": {
|
63 |
+
"application/vnd.jupyter.widget-view+json": {
|
64 |
+
"model_id": "afda95f374c1487584af43d91ba321df",
|
65 |
+
"version_major": 2,
|
66 |
+
"version_minor": 0
|
67 |
+
},
|
68 |
+
"text/plain": [
|
69 |
+
"Loading dataset shards: 0%| | 0/21 [00:00<?, ?it/s]"
|
70 |
+
]
|
71 |
+
},
|
72 |
+
"metadata": {},
|
73 |
+
"output_type": "display_data"
|
74 |
+
}
|
75 |
+
],
|
76 |
+
"source": [
|
77 |
+
"config = \"testing\"\n",
|
78 |
+
"args = ConfigLoader(config, \"../configs/train_bm_configs.yaml\", \"../configs/train_bm_default.yaml\")\n",
|
79 |
+
"\n",
|
80 |
+
"############################## Load dataset ##############################\n",
|
81 |
+
"# Load dataset, filter out na inputs and labels and encode labels (as label column can change)\n",
|
82 |
+
"\n",
|
83 |
+
"\n",
|
84 |
+
"label_cols = [\"Object type\", \"Culture\", \"Materials\", \"Production place\"]\n",
|
85 |
+
"split_sizes = [0.1, 0.12, 0.13, 0.15, 0.2]\n",
|
86 |
+
"\n",
|
87 |
+
"ds_lim = load_dataset(args.dataset)[\"train\"].to_pandas()"
|
88 |
+
]
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"cell_type": "code",
|
92 |
+
"execution_count": 8,
|
93 |
+
"metadata": {},
|
94 |
+
"outputs": [
|
95 |
+
{
|
96 |
+
"name": "stdout",
|
97 |
+
"output_type": "stream",
|
98 |
+
"text": [
|
99 |
+
"Split size 0.1 failed\n",
|
100 |
+
"Split size 0.12 failed\n",
|
101 |
+
"Label col Object type Split size 0.13 passed\n",
|
102 |
+
"Label col Object type Split size 0.15 passed\n",
|
103 |
+
"Label col Object type Split size 0.2 passed\n",
|
104 |
+
"Split size 0.1 failed\n",
|
105 |
+
"Split size 0.12 failed\n",
|
106 |
+
"Label col Culture Split size 0.13 passed\n",
|
107 |
+
"Label col Culture Split size 0.15 passed\n",
|
108 |
+
"Label col Culture Split size 0.2 passed\n",
|
109 |
+
"Split size 0.1 failed\n",
|
110 |
+
"Split size 0.12 failed\n",
|
111 |
+
"Label col Materials Split size 0.13 passed\n",
|
112 |
+
"Label col Materials Split size 0.15 passed\n",
|
113 |
+
"Label col Materials Split size 0.2 passed\n",
|
114 |
+
"Split size 0.1 failed\n",
|
115 |
+
"Split size 0.12 failed\n",
|
116 |
+
"Label col Production place Split size 0.13 passed\n",
|
117 |
+
"Label col Production place Split size 0.15 passed\n",
|
118 |
+
"Label col Production place Split size 0.2 passed\n"
|
119 |
+
]
|
120 |
+
}
|
121 |
+
],
|
122 |
+
"source": [
|
123 |
+
"def test_split_size(split_size, label_col, ds_lim):\n",
|
124 |
+
" try:\n",
|
125 |
+
" ds_lim.dropna(subset=[label_col])\n",
|
126 |
+
" num_counts = ds_lim[label_col].value_counts()\n",
|
127 |
+
" ds_lim = ds_lim[ds_lim[label_col].isin(num_counts[num_counts > args.lower_lim].index)]\n",
|
128 |
+
"\n",
|
129 |
+
" train, val_test = train_test_split(\n",
|
130 |
+
" ds_lim,\n",
|
131 |
+
" stratify=ds_lim[label_col],\n",
|
132 |
+
" test_size=2 * split_size,\n",
|
133 |
+
" random_state=42,\n",
|
134 |
+
" )\n",
|
135 |
+
" val, test = train_test_split(\n",
|
136 |
+
" val_test, stratify=val_test[label_col], test_size=0.5, random_state=42\n",
|
137 |
+
" )\n",
|
138 |
+
" print(f\"Label col {label_col} Split size {split_size} passed\")\n",
|
139 |
+
" except ValueError:\n",
|
140 |
+
" print(f\"Split size {split_size} failed\")\n",
|
141 |
+
"\n",
|
142 |
+
"\n",
|
143 |
+
"for label_col in label_cols:\n",
|
144 |
+
" for split_size in split_sizes:\n",
|
145 |
+
" test_split_size(split_size, label_col, ds_lim)"
|
146 |
+
]
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"cell_type": "code",
|
150 |
+
"execution_count": 1,
|
151 |
+
"metadata": {},
|
152 |
+
"outputs": [],
|
153 |
+
"source": [
|
154 |
+
"import yaml\n",
|
155 |
+
"\n",
|
156 |
+
"with open(\"../configs/train_configs.yaml\", \"r\") as file:\n",
|
157 |
+
" configs = list(yaml.safe_load_all(file))"
|
158 |
+
]
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"cell_type": "code",
|
162 |
+
"execution_count": 2,
|
163 |
+
"metadata": {},
|
164 |
+
"outputs": [
|
165 |
+
{
|
166 |
+
"data": {
|
167 |
+
"text/plain": [
|
168 |
+
"'om3-white_material_bm-pretrn om3-white_name_bm-pretrn om3-3Dwhite_material_bm-pretrn om3-3Dwhite_name_bm-pretrn om3-3Dwhite-1frame_material_bm-pretrn om3-3Dwhite-1frame_name_bm-pretrn om4-white_material_bm-pretrn om4-white_name_bm-pretrn om4-3Dwhite_material_bm-pretrn om4-3Dwhite_name_bm-pretrn om4-3Dwhite-1frame_material_bm-pretrn om4-3Dwhite-1frame_name_bm-pretrn om5-white_material_bm-pretrn om5-white_name_bm-pretrn om5-3Dwhite_material_bm-pretrn om5-3Dwhite_name_bm-pretrn om5-3Dwhite-1frame_material_bm-pretrn om5-3Dwhite-1frame_name_bm-pretrn om6-white_material_bm-pretrn om6-white_name_bm-pretrn om6-3Dwhite_material_bm-pretrn om6-3Dwhite_name_bm-pretrn om6-3Dwhite-1frame_material_bm-pretrn om6-3Dwhite-1frame_name_bm-pretrn'"
|
169 |
+
]
|
170 |
+
},
|
171 |
+
"execution_count": 2,
|
172 |
+
"metadata": {},
|
173 |
+
"output_type": "execute_result"
|
174 |
+
}
|
175 |
+
],
|
176 |
+
"source": [
|
177 |
+
"\" \".join(\n",
|
178 |
+
" [cfg[\"config\"] for cfg in configs if \"bm\" in cfg[\"config\"] and \"num\" not in cfg[\"config\"]]\n",
|
179 |
+
")"
|
180 |
+
]
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"cell_type": "code",
|
184 |
+
"execution_count": 18,
|
185 |
+
"metadata": {},
|
186 |
+
"outputs": [
|
187 |
+
{
|
188 |
+
"data": {
|
189 |
+
"text/plain": [
|
190 |
+
"[{'config': 'testing',\n",
|
191 |
+
" 'fast_dev_run': True,\n",
|
192 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
193 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
194 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
195 |
+
" 'problem_type': 'image',\n",
|
196 |
+
" 'lower_lim': 5,\n",
|
197 |
+
" 'label_col': 'Object type'},\n",
|
198 |
+
" {'config': 'bm3_type',\n",
|
199 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
200 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
201 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
202 |
+
" 'problem_type': 'image',\n",
|
203 |
+
" 'lower_lim': 3,\n",
|
204 |
+
" 'label_col': 'Object type',\n",
|
205 |
+
" 'testset_size': 0.205},\n",
|
206 |
+
" {'config': 'bm3_material',\n",
|
207 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
208 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
209 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
210 |
+
" 'problem_type': 'image',\n",
|
211 |
+
" 'lower_lim': 3,\n",
|
212 |
+
" 'label_col': 'Materials',\n",
|
213 |
+
" 'testset_size': 0.205},\n",
|
214 |
+
" {'config': 'bm3_culture',\n",
|
215 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
216 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
217 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
218 |
+
" 'problem_type': 'image',\n",
|
219 |
+
" 'lower_lim': 3,\n",
|
220 |
+
" 'label_col': 'Culture',\n",
|
221 |
+
" 'testset_size': 0.205},\n",
|
222 |
+
" {'config': 'bm3txt_material',\n",
|
223 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
224 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
225 |
+
" 'model_base': 'microsoft/deberta-v3-base',\n",
|
226 |
+
" 'problem_type': 'text',\n",
|
227 |
+
" 'lower_lim': 3,\n",
|
228 |
+
" 'label_col': 'Materials',\n",
|
229 |
+
" 'testset_size': 0.205},\n",
|
230 |
+
" {'config': 'bm3txt_culture',\n",
|
231 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
232 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
233 |
+
" 'model_base': 'microsoft/deberta-v3-base',\n",
|
234 |
+
" 'problem_type': 'text',\n",
|
235 |
+
" 'lower_lim': 3,\n",
|
236 |
+
" 'label_col': 'Culture',\n",
|
237 |
+
" 'testset_size': 0.205},\n",
|
238 |
+
" {'config': 'bm3-white_type',\n",
|
239 |
+
" 'dataset': 'james-burton/BritishMuseum-white',\n",
|
240 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
241 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
242 |
+
" 'problem_type': 'image',\n",
|
243 |
+
" 'lower_lim': 3,\n",
|
244 |
+
" 'label_col': 'Object type',\n",
|
245 |
+
" 'testset_size': 0.205},\n",
|
246 |
+
" {'config': 'bm3-white_material',\n",
|
247 |
+
" 'dataset': 'james-burton/BritishMuseum-white',\n",
|
248 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
249 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
250 |
+
" 'problem_type': 'image',\n",
|
251 |
+
" 'lower_lim': 3,\n",
|
252 |
+
" 'label_col': 'Materials',\n",
|
253 |
+
" 'testset_size': 0.205},\n",
|
254 |
+
" {'config': 'bm3-white_culture',\n",
|
255 |
+
" 'dataset': 'james-burton/BritishMuseum-white',\n",
|
256 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
257 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
258 |
+
" 'problem_type': 'image',\n",
|
259 |
+
" 'lower_lim': 3,\n",
|
260 |
+
" 'label_col': 'Culture',\n",
|
261 |
+
" 'testset_size': 0.205},\n",
|
262 |
+
" {'config': 'bm3-3Dwhite_type',\n",
|
263 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
|
264 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
265 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
266 |
+
" 'problem_type': 'image',\n",
|
267 |
+
" 'lower_lim': 3,\n",
|
268 |
+
" 'label_col': 'Object type',\n",
|
269 |
+
" 'testset_size': 0.205},\n",
|
270 |
+
" {'config': 'bm3-3Dwhite_material',\n",
|
271 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
|
272 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
273 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
274 |
+
" 'problem_type': 'image',\n",
|
275 |
+
" 'lower_lim': 3,\n",
|
276 |
+
" 'label_col': 'Materials',\n",
|
277 |
+
" 'testset_size': 0.205},\n",
|
278 |
+
" {'config': 'bm3-3Dwhite_culture',\n",
|
279 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
|
280 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
281 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
282 |
+
" 'problem_type': 'image',\n",
|
283 |
+
" 'lower_lim': 3,\n",
|
284 |
+
" 'label_col': 'Culture',\n",
|
285 |
+
" 'testset_size': 0.205},\n",
|
286 |
+
" {'config': 'bm3-3Dwhite-1frame_type',\n",
|
287 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
|
288 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
289 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
290 |
+
" 'problem_type': 'image',\n",
|
291 |
+
" 'lower_lim': 3,\n",
|
292 |
+
" 'label_col': 'Object type',\n",
|
293 |
+
" 'testset_size': 0.205},\n",
|
294 |
+
" {'config': 'bm3-3Dwhite-1frame_material',\n",
|
295 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
|
296 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
297 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
298 |
+
" 'problem_type': 'image',\n",
|
299 |
+
" 'lower_lim': 3,\n",
|
300 |
+
" 'label_col': 'Materials',\n",
|
301 |
+
" 'testset_size': 0.205},\n",
|
302 |
+
" {'config': 'bm3-3Dwhite-1frame_culture',\n",
|
303 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
|
304 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
305 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
306 |
+
" 'problem_type': 'image',\n",
|
307 |
+
" 'lower_lim': 3,\n",
|
308 |
+
" 'label_col': 'Culture',\n",
|
309 |
+
" 'testset_size': 0.205},\n",
|
310 |
+
" {'config': 'bm4_type',\n",
|
311 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
312 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
313 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
314 |
+
" 'problem_type': 'image',\n",
|
315 |
+
" 'lower_lim': 4,\n",
|
316 |
+
" 'label_col': 'Object type'},\n",
|
317 |
+
" {'config': 'bm4_material',\n",
|
318 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
319 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
320 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
321 |
+
" 'problem_type': 'image',\n",
|
322 |
+
" 'lower_lim': 4,\n",
|
323 |
+
" 'label_col': 'Materials'},\n",
|
324 |
+
" {'config': 'bm4_culture',\n",
|
325 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
326 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
327 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
328 |
+
" 'problem_type': 'image',\n",
|
329 |
+
" 'lower_lim': 4,\n",
|
330 |
+
" 'label_col': 'Culture'},\n",
|
331 |
+
" {'config': 'bm4txt_material',\n",
|
332 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
333 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
334 |
+
" 'model_base': 'microsoft/deberta-v3-base',\n",
|
335 |
+
" 'problem_type': 'text',\n",
|
336 |
+
" 'lower_lim': 4,\n",
|
337 |
+
" 'label_col': 'Materials'},\n",
|
338 |
+
" {'config': 'bm4txt_culture',\n",
|
339 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
340 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
341 |
+
" 'model_base': 'microsoft/deberta-v3-base',\n",
|
342 |
+
" 'problem_type': 'text',\n",
|
343 |
+
" 'lower_lim': 4,\n",
|
344 |
+
" 'label_col': 'Culture'},\n",
|
345 |
+
" {'config': 'bm4-white_type',\n",
|
346 |
+
" 'dataset': 'james-burton/BritishMuseum-white',\n",
|
347 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
348 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
349 |
+
" 'problem_type': 'image',\n",
|
350 |
+
" 'lower_lim': 4,\n",
|
351 |
+
" 'label_col': 'Object type'},\n",
|
352 |
+
" {'config': 'bm4-white_material',\n",
|
353 |
+
" 'dataset': 'james-burton/BritishMuseum-white',\n",
|
354 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
355 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
356 |
+
" 'problem_type': 'image',\n",
|
357 |
+
" 'lower_lim': 4,\n",
|
358 |
+
" 'label_col': 'Materials'},\n",
|
359 |
+
" {'config': 'bm4-white_culture',\n",
|
360 |
+
" 'dataset': 'james-burton/BritishMuseum-white',\n",
|
361 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
362 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
363 |
+
" 'problem_type': 'image',\n",
|
364 |
+
" 'lower_lim': 4,\n",
|
365 |
+
" 'label_col': 'Culture'},\n",
|
366 |
+
" {'config': 'bm4-3Dwhite_type',\n",
|
367 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
|
368 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
369 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
370 |
+
" 'problem_type': 'image',\n",
|
371 |
+
" 'lower_lim': 4,\n",
|
372 |
+
" 'label_col': 'Object type'},\n",
|
373 |
+
" {'config': 'bm4-3Dwhite_material',\n",
|
374 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
|
375 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
376 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
377 |
+
" 'problem_type': 'image',\n",
|
378 |
+
" 'lower_lim': 4,\n",
|
379 |
+
" 'label_col': 'Materials'},\n",
|
380 |
+
" {'config': 'bm4-3Dwhite_culture',\n",
|
381 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
|
382 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
383 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
384 |
+
" 'problem_type': 'image',\n",
|
385 |
+
" 'lower_lim': 4,\n",
|
386 |
+
" 'label_col': 'Culture'},\n",
|
387 |
+
" {'config': 'bm4-3Dwhite-1frame_type',\n",
|
388 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
|
389 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
390 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
391 |
+
" 'problem_type': 'image',\n",
|
392 |
+
" 'lower_lim': 4,\n",
|
393 |
+
" 'label_col': 'Object type'},\n",
|
394 |
+
" {'config': 'bm4-3Dwhite-1frame_material',\n",
|
395 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
|
396 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
397 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
398 |
+
" 'problem_type': 'image',\n",
|
399 |
+
" 'lower_lim': 4,\n",
|
400 |
+
" 'label_col': 'Materials'},\n",
|
401 |
+
" {'config': 'bm4-3Dwhite-1frame_culture',\n",
|
402 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
|
403 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
404 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
405 |
+
" 'problem_type': 'image',\n",
|
406 |
+
" 'lower_lim': 4,\n",
|
407 |
+
" 'label_col': 'Culture'},\n",
|
408 |
+
" {'config': 'bm5_type',\n",
|
409 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
410 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
411 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
412 |
+
" 'problem_type': 'image',\n",
|
413 |
+
" 'lower_lim': 5,\n",
|
414 |
+
" 'label_col': 'Object type'},\n",
|
415 |
+
" {'config': 'bm5_material',\n",
|
416 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
417 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
418 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
419 |
+
" 'problem_type': 'image',\n",
|
420 |
+
" 'lower_lim': 5,\n",
|
421 |
+
" 'label_col': 'Materials'},\n",
|
422 |
+
" {'config': 'bm5_culture',\n",
|
423 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
424 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
425 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
426 |
+
" 'problem_type': 'image',\n",
|
427 |
+
" 'lower_lim': 5,\n",
|
428 |
+
" 'label_col': 'Culture'},\n",
|
429 |
+
" {'config': 'bm5txt_material',\n",
|
430 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
431 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
432 |
+
" 'model_base': 'microsoft/deberta-v3-base',\n",
|
433 |
+
" 'problem_type': 'text',\n",
|
434 |
+
" 'lower_lim': 5,\n",
|
435 |
+
" 'label_col': 'Materials'},\n",
|
436 |
+
" {'config': 'bm5txt_culture',\n",
|
437 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
438 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
439 |
+
" 'model_base': 'microsoft/deberta-v3-base',\n",
|
440 |
+
" 'problem_type': 'text',\n",
|
441 |
+
" 'lower_lim': 5,\n",
|
442 |
+
" 'label_col': 'Culture'},\n",
|
443 |
+
" {'config': 'bm5-white_type',\n",
|
444 |
+
" 'dataset': 'james-burton/BritishMuseum-white',\n",
|
445 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
446 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
447 |
+
" 'problem_type': 'image',\n",
|
448 |
+
" 'lower_lim': 5,\n",
|
449 |
+
" 'label_col': 'Object type'},\n",
|
450 |
+
" {'config': 'bm5-white_material',\n",
|
451 |
+
" 'dataset': 'james-burton/BritishMuseum-white',\n",
|
452 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
453 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
454 |
+
" 'problem_type': 'image',\n",
|
455 |
+
" 'lower_lim': 5,\n",
|
456 |
+
" 'label_col': 'Materials'},\n",
|
457 |
+
" {'config': 'bm5-white_culture',\n",
|
458 |
+
" 'dataset': 'james-burton/BritishMuseum-white',\n",
|
459 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
460 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
461 |
+
" 'problem_type': 'image',\n",
|
462 |
+
" 'lower_lim': 5,\n",
|
463 |
+
" 'label_col': 'Culture'},\n",
|
464 |
+
" {'config': 'bm5-3Dwhite_type',\n",
|
465 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
|
466 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
467 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
468 |
+
" 'problem_type': 'image',\n",
|
469 |
+
" 'lower_lim': 5,\n",
|
470 |
+
" 'label_col': 'Object type'},\n",
|
471 |
+
" {'config': 'bm5-3Dwhite_material',\n",
|
472 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
|
473 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
474 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
475 |
+
" 'problem_type': 'image',\n",
|
476 |
+
" 'lower_lim': 5,\n",
|
477 |
+
" 'label_col': 'Materials'},\n",
|
478 |
+
" {'config': 'bm5-3Dwhite_culture',\n",
|
479 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
|
480 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
481 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
482 |
+
" 'problem_type': 'image',\n",
|
483 |
+
" 'lower_lim': 5,\n",
|
484 |
+
" 'label_col': 'Culture'},\n",
|
485 |
+
" {'config': 'bm5-3Dwhite-1frame_type',\n",
|
486 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
|
487 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
488 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
489 |
+
" 'problem_type': 'image',\n",
|
490 |
+
" 'lower_lim': 5,\n",
|
491 |
+
" 'label_col': 'Object type'},\n",
|
492 |
+
" {'config': 'bm5-3Dwhite-1frame_material',\n",
|
493 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
|
494 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
495 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
496 |
+
" 'problem_type': 'image',\n",
|
497 |
+
" 'lower_lim': 5,\n",
|
498 |
+
" 'label_col': 'Materials'},\n",
|
499 |
+
" {'config': 'bm5-3Dwhite-1frame_culture',\n",
|
500 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
|
501 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
502 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
503 |
+
" 'problem_type': 'image',\n",
|
504 |
+
" 'lower_lim': 5,\n",
|
505 |
+
" 'label_col': 'Culture'},\n",
|
506 |
+
" {'config': 'bm6_type',\n",
|
507 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
508 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
509 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
510 |
+
" 'problem_type': 'image',\n",
|
511 |
+
" 'lower_lim': 6,\n",
|
512 |
+
" 'label_col': 'Object type'},\n",
|
513 |
+
" {'config': 'bm6_material',\n",
|
514 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
515 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
516 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
517 |
+
" 'problem_type': 'image',\n",
|
518 |
+
" 'lower_lim': 6,\n",
|
519 |
+
" 'label_col': 'Materials'},\n",
|
520 |
+
" {'config': 'bm6_culture',\n",
|
521 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
522 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
523 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
524 |
+
" 'problem_type': 'image',\n",
|
525 |
+
" 'lower_lim': 6,\n",
|
526 |
+
" 'label_col': 'Culture'},\n",
|
527 |
+
" {'config': 'bm6txt_material',\n",
|
528 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
529 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
530 |
+
" 'model_base': 'microsoft/deberta-v3-base',\n",
|
531 |
+
" 'problem_type': 'text',\n",
|
532 |
+
" 'lower_lim': 6,\n",
|
533 |
+
" 'label_col': 'Materials'},\n",
|
534 |
+
" {'config': 'bm6txt_culture',\n",
|
535 |
+
" 'dataset': 'james-burton/BritishMuseum',\n",
|
536 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
537 |
+
" 'model_base': 'microsoft/deberta-v3-base',\n",
|
538 |
+
" 'problem_type': 'text',\n",
|
539 |
+
" 'lower_lim': 6,\n",
|
540 |
+
" 'label_col': 'Culture'},\n",
|
541 |
+
" {'config': 'bm6-white_type',\n",
|
542 |
+
" 'dataset': 'james-burton/BritishMuseum-white',\n",
|
543 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
544 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
545 |
+
" 'problem_type': 'image',\n",
|
546 |
+
" 'lower_lim': 6,\n",
|
547 |
+
" 'label_col': 'Object type'},\n",
|
548 |
+
" {'config': 'bm6-white_material',\n",
|
549 |
+
" 'dataset': 'james-burton/BritishMuseum-white',\n",
|
550 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
551 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
552 |
+
" 'problem_type': 'image',\n",
|
553 |
+
" 'lower_lim': 6,\n",
|
554 |
+
" 'label_col': 'Materials'},\n",
|
555 |
+
" {'config': 'bm6-white_culture',\n",
|
556 |
+
" 'dataset': 'james-burton/BritishMuseum-white',\n",
|
557 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
558 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
559 |
+
" 'problem_type': 'image',\n",
|
560 |
+
" 'lower_lim': 6,\n",
|
561 |
+
" 'label_col': 'Culture'},\n",
|
562 |
+
" {'config': 'bm6-3Dwhite_type',\n",
|
563 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
|
564 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
565 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
566 |
+
" 'problem_type': 'image',\n",
|
567 |
+
" 'lower_lim': 6,\n",
|
568 |
+
" 'label_col': 'Object type'},\n",
|
569 |
+
" {'config': 'bm6-3Dwhite_material',\n",
|
570 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
|
571 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
572 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
573 |
+
" 'problem_type': 'image',\n",
|
574 |
+
" 'lower_lim': 6,\n",
|
575 |
+
" 'label_col': 'Materials'},\n",
|
576 |
+
" {'config': 'bm6-3Dwhite_culture',\n",
|
577 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
|
578 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
579 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
580 |
+
" 'problem_type': 'image',\n",
|
581 |
+
" 'lower_lim': 6,\n",
|
582 |
+
" 'label_col': 'Culture'},\n",
|
583 |
+
" {'config': 'bm6-3Dwhite-1frame_type',\n",
|
584 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
|
585 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
586 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
587 |
+
" 'problem_type': 'image',\n",
|
588 |
+
" 'lower_lim': 6,\n",
|
589 |
+
" 'label_col': 'Object type'},\n",
|
590 |
+
" {'config': 'bm6-3Dwhite-1frame_material',\n",
|
591 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
|
592 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
593 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
594 |
+
" 'problem_type': 'image',\n",
|
595 |
+
" 'lower_lim': 6,\n",
|
596 |
+
" 'label_col': 'Materials'},\n",
|
597 |
+
" {'config': 'bm6-3Dwhite-1frame_culture',\n",
|
598 |
+
" 'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
|
599 |
+
" 'wandb_proj_name': 'British Museum',\n",
|
600 |
+
" 'model_base': 'google/efficientnet-b3',\n",
|
601 |
+
" 'problem_type': 'image',\n",
|
602 |
+
" 'lower_lim': 6,\n",
|
603 |
+
" 'label_col': 'Culture'}]"
|
604 |
+
]
|
605 |
+
},
|
606 |
+
"execution_count": 18,
|
607 |
+
"metadata": {},
|
608 |
+
"output_type": "execute_result"
|
609 |
+
}
|
610 |
+
],
|
611 |
+
"source": [
|
612 |
+
"configs"
|
613 |
+
]
|
614 |
+
},
|
615 |
+
{
|
616 |
+
"cell_type": "code",
|
617 |
+
"execution_count": null,
|
618 |
+
"metadata": {},
|
619 |
+
"outputs": [],
|
620 |
+
"source": []
|
621 |
+
}
|
622 |
+
],
|
623 |
+
"metadata": {
|
624 |
+
"kernelspec": {
|
625 |
+
"display_name": "ArtifactClassification",
|
626 |
+
"language": "python",
|
627 |
+
"name": "python3"
|
628 |
+
},
|
629 |
+
"language_info": {
|
630 |
+
"codemirror_mode": {
|
631 |
+
"name": "ipython",
|
632 |
+
"version": 3
|
633 |
+
},
|
634 |
+
"file_extension": ".py",
|
635 |
+
"mimetype": "text/x-python",
|
636 |
+
"name": "python",
|
637 |
+
"nbconvert_exporter": "python",
|
638 |
+
"pygments_lexer": "ipython3",
|
639 |
+
"version": "3.10.12"
|
640 |
+
}
|
641 |
+
},
|
642 |
+
"nbformat": 4,
|
643 |
+
"nbformat_minor": 2
|
644 |
+
}
|
0.12-get_wandb_results.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
0.13-bm_dates_col.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
0.2-testing_image_scraping.ipynb
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 9,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"1894,1101.507\n",
|
13 |
+
"https://media.britishmuseum.org/media/Repository/Documents/2014_10/6_14/c5015a41_782e_4eb7_badf_a3bc00f54f2c/preview_00426109_001.jpg\n",
|
14 |
+
"Image downloaded successfully!\n"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"name": "stderr",
|
19 |
+
"output_type": "stream",
|
20 |
+
"text": [
|
21 |
+
"/home/james/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'media.britishmuseum.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
|
22 |
+
" warnings.warn(\n"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "stdout",
|
27 |
+
"output_type": "stream",
|
28 |
+
"text": [
|
29 |
+
"1903,1215.10\n",
|
30 |
+
"https://media.britishmuseum.org/media/Repository/Documents/2014_10/15_13/532668b9_0af1_4402_8e13_a3c500e1907c/preview_00944260_001.jpg\n",
|
31 |
+
"Image downloaded successfully!\n"
|
32 |
+
]
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"name": "stderr",
|
36 |
+
"output_type": "stream",
|
37 |
+
"text": [
|
38 |
+
"/home/james/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'media.britishmuseum.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
|
39 |
+
" warnings.warn(\n"
|
40 |
+
]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"ename": "KeyboardInterrupt",
|
44 |
+
"evalue": "",
|
45 |
+
"output_type": "error",
|
46 |
+
"traceback": [
|
47 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
48 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
49 |
+
"Cell \u001b[0;32mIn[9], line 27\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to download image. Status code:\u001b[39m\u001b[38;5;124m\"\u001b[39m, response\u001b[38;5;241m.\u001b[39mstatus_code)\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# wait 20 seconds\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m20\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;66;03m# response = requests.get(url, headers=headers)\u001b[39;00m\n\u001b[1;32m 29\u001b[0m \n\u001b[1;32m 30\u001b[0m \u001b[38;5;66;03m# if response.status_code == 200:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[38;5;66;03m# else:\u001b[39;00m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;66;03m# print(\"Failed to download image. Status code:\", response.status_code)\u001b[39;00m\n",
|
50 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
51 |
+
]
|
52 |
+
}
|
53 |
+
],
|
54 |
+
"source": [
|
55 |
+
"import requests\n",
|
56 |
+
"import pandas as pd\n",
|
57 |
+
"import time\n",
|
58 |
+
"\n",
|
59 |
+
"url = \"http://media.britishmuseum.org/media/Repository/Documents/2020_2/25_11/8772f2ea_b08f_46cf_8af2_ab6c00c10b84/preview_DSC_0760.jpg\"\n",
|
60 |
+
"headers = {\n",
|
61 |
+
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3\"\n",
|
62 |
+
"}\n",
|
63 |
+
"\n",
|
64 |
+
"\n",
|
65 |
+
"df = pd.read_csv(\"../data/raw/BM_csv_files/3000BC-AD500/europe_999BC-600.csv\")\n",
|
66 |
+
"df[\"Museum number\"] = df[\"Museum number\"].str.replace(r\"^No: \", \"\", regex=True)\n",
|
67 |
+
"\n",
|
68 |
+
"for index, row in df.iterrows():\n",
|
69 |
+
" print(row[\"Museum number\"])\n",
|
70 |
+
" url = row[\"Image\"]\n",
|
71 |
+
" print(url)\n",
|
72 |
+
" response = requests.get(url, verify=False)\n",
|
73 |
+
" if response.status_code == 200:\n",
|
74 |
+
" with open(f\"../data/raw/BM_images/{row['Museum number']}.jpg\", \"wb\") as f:\n",
|
75 |
+
" f.write(response.content)\n",
|
76 |
+
" print(\"Image downloaded successfully!\")\n",
|
77 |
+
" else:\n",
|
78 |
+
" print(\"Failed to download image. Status code:\", response.status_code)\n",
|
79 |
+
" # wait 20 seconds\n",
|
80 |
+
" time.sleep(20)\n",
|
81 |
+
"# response = requests.get(url, headers=headers)\n",
|
82 |
+
"\n",
|
83 |
+
"# if response.status_code == 200:\n",
|
84 |
+
"# with open(\"image.jpg\", \"wb\") as f:\n",
|
85 |
+
"# f.write(response.content)\n",
|
86 |
+
"# print(\"Image downloaded successfully!\")\n",
|
87 |
+
"# else:\n",
|
88 |
+
"# print(\"Failed to download image. Status code:\", response.status_code)"
|
89 |
+
]
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"cell_type": "code",
|
93 |
+
"execution_count": 12,
|
94 |
+
"metadata": {},
|
95 |
+
"outputs": [
|
96 |
+
{
|
97 |
+
"data": {
|
98 |
+
"text/plain": [
|
99 |
+
"460"
|
100 |
+
]
|
101 |
+
},
|
102 |
+
"execution_count": 12,
|
103 |
+
"metadata": {},
|
104 |
+
"output_type": "execute_result"
|
105 |
+
}
|
106 |
+
],
|
107 |
+
"source": [
|
108 |
+
"(df[\"Museum number\"] == \"null\").sum()"
|
109 |
+
]
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"cell_type": "code",
|
113 |
+
"execution_count": null,
|
114 |
+
"metadata": {},
|
115 |
+
"outputs": [],
|
116 |
+
"source": []
|
117 |
+
}
|
118 |
+
],
|
119 |
+
"metadata": {
|
120 |
+
"kernelspec": {
|
121 |
+
"display_name": "ArtifactClassification",
|
122 |
+
"language": "python",
|
123 |
+
"name": "python3"
|
124 |
+
},
|
125 |
+
"language_info": {
|
126 |
+
"codemirror_mode": {
|
127 |
+
"name": "ipython",
|
128 |
+
"version": 3
|
129 |
+
},
|
130 |
+
"file_extension": ".py",
|
131 |
+
"mimetype": "text/x-python",
|
132 |
+
"name": "python",
|
133 |
+
"nbconvert_exporter": "python",
|
134 |
+
"pygments_lexer": "ipython3",
|
135 |
+
"version": "3.10.12"
|
136 |
+
}
|
137 |
+
},
|
138 |
+
"nbformat": 4,
|
139 |
+
"nbformat_minor": 2
|
140 |
+
}
|
0.3-testing_csv_join.ipynb
ADDED
@@ -0,0 +1,973 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stderr",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"/tmp/ipykernel_751668/3571106454.py:2: DeprecationWarning: \n",
|
13 |
+
"Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n",
|
14 |
+
"(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n",
|
15 |
+
"but was not found to be installed on your system.\n",
|
16 |
+
"If this would cause problems for you,\n",
|
17 |
+
"please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n",
|
18 |
+
" \n",
|
19 |
+
" import pandas as pd\n"
|
20 |
+
]
|
21 |
+
}
|
22 |
+
],
|
23 |
+
"source": [
|
24 |
+
"import os\n",
|
25 |
+
"import pandas as pd"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"cell_type": "code",
|
30 |
+
"execution_count": 3,
|
31 |
+
"metadata": {},
|
32 |
+
"outputs": [],
|
33 |
+
"source": [
|
34 |
+
"# Europe\n",
|
35 |
+
"input_filepath = \"../data/raw\"\n",
|
36 |
+
"csv_files = os.listdir(f\"{input_filepath}/BM_csv_files/3000BC-AD500/\")\n",
|
37 |
+
"europe_csv_files = [file for file in csv_files if \"africa\" in file.lower()]\n",
|
38 |
+
"\n",
|
39 |
+
"if europe_csv_files:\n",
|
40 |
+
" # europe_csv_path = f\"{output_filepath}/BM_images/europe/\"\n",
|
41 |
+
" # if not os.path.exists(europe_csv_path):\n",
|
42 |
+
" # os.makedirs(europe_csv_path)\n",
|
43 |
+
"\n",
|
44 |
+
" europe_df = pd.DataFrame()\n",
|
45 |
+
" for csv_file in europe_csv_files:\n",
|
46 |
+
" csv_path = f\"{input_filepath}/BM_csv_files/3000BC-AD500/{csv_file}\"\n",
|
47 |
+
" df = pd.read_csv(csv_path)\n",
|
48 |
+
" europe_df = pd.concat([europe_df, df], ignore_index=True)\n",
|
49 |
+
"\n",
|
50 |
+
" # europe_df.drop_duplicates(inplace=True)\n",
|
51 |
+
" # europe_df.to_csv(f\"{europe_csv_path}/europe.csv\", index=False)\n",
|
52 |
+
" # print(\"Europe CSV file created successfully!\")\n",
|
53 |
+
"else:\n",
|
54 |
+
" print(\"No Europe CSV files found.\")"
|
55 |
+
]
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"cell_type": "code",
|
59 |
+
"execution_count": 10,
|
60 |
+
"metadata": {},
|
61 |
+
"outputs": [],
|
62 |
+
"source": [
|
63 |
+
"region = \"africa\"\n",
|
64 |
+
"region_csv_files = [file for file in csv_files if region in file.lower()]\n",
|
65 |
+
"region_df = pd.concat(\n",
|
66 |
+
" [\n",
|
67 |
+
" pd.read_csv(f\"{input_filepath}/BM_csv_files/3000BC-AD500/{file}\")\n",
|
68 |
+
" for file in region_csv_files\n",
|
69 |
+
" ]\n",
|
70 |
+
")\n",
|
71 |
+
"region_df[\"Museum number\"] = region_df[\"Museum number\"].str.replace(r\"^No: \", \"\", regex=True)\n",
|
72 |
+
"region_df.drop_duplicates(inplace=True)\n",
|
73 |
+
"region_df = region_df[region_df[\"Museum number\"] != \"null\"]"
|
74 |
+
]
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"cell_type": "code",
|
78 |
+
"execution_count": 4,
|
79 |
+
"metadata": {},
|
80 |
+
"outputs": [
|
81 |
+
{
|
82 |
+
"data": {
|
83 |
+
"text/html": [
|
84 |
+
"<div>\n",
|
85 |
+
"<style scoped>\n",
|
86 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
87 |
+
" vertical-align: middle;\n",
|
88 |
+
" }\n",
|
89 |
+
"\n",
|
90 |
+
" .dataframe tbody tr th {\n",
|
91 |
+
" vertical-align: top;\n",
|
92 |
+
" }\n",
|
93 |
+
"\n",
|
94 |
+
" .dataframe thead th {\n",
|
95 |
+
" text-align: right;\n",
|
96 |
+
" }\n",
|
97 |
+
"</style>\n",
|
98 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
99 |
+
" <thead>\n",
|
100 |
+
" <tr style=\"text-align: right;\">\n",
|
101 |
+
" <th></th>\n",
|
102 |
+
" <th>Image</th>\n",
|
103 |
+
" <th>Object type</th>\n",
|
104 |
+
" <th>Museum number</th>\n",
|
105 |
+
" <th>Title</th>\n",
|
106 |
+
" <th>Denomination</th>\n",
|
107 |
+
" <th>Escapement</th>\n",
|
108 |
+
" <th>Description</th>\n",
|
109 |
+
" <th>Producer name</th>\n",
|
110 |
+
" <th>School/style</th>\n",
|
111 |
+
" <th>State</th>\n",
|
112 |
+
" <th>...</th>\n",
|
113 |
+
" <th>Acq date</th>\n",
|
114 |
+
" <th>Acq notes (acq)</th>\n",
|
115 |
+
" <th>Acq notes (exc)</th>\n",
|
116 |
+
" <th>Dept</th>\n",
|
117 |
+
" <th>BM/Big number</th>\n",
|
118 |
+
" <th>Reg number</th>\n",
|
119 |
+
" <th>Add ids</th>\n",
|
120 |
+
" <th>Cat no</th>\n",
|
121 |
+
" <th>Banknote serial number</th>\n",
|
122 |
+
" <th>Joined objects</th>\n",
|
123 |
+
" </tr>\n",
|
124 |
+
" </thead>\n",
|
125 |
+
" <tbody>\n",
|
126 |
+
" <tr>\n",
|
127 |
+
" <th>0</th>\n",
|
128 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
129 |
+
" <td>acorn lekythos</td>\n",
|
130 |
+
" <td>No: 1888,0601.716</td>\n",
|
131 |
+
" <td>NaN</td>\n",
|
132 |
+
" <td>NaN</td>\n",
|
133 |
+
" <td>NaN</td>\n",
|
134 |
+
" <td>Attic red-figured pottery acorn lekythos, rest...</td>\n",
|
135 |
+
" <td>NaN</td>\n",
|
136 |
+
" <td>NaN</td>\n",
|
137 |
+
" <td>NaN</td>\n",
|
138 |
+
" <td>...</td>\n",
|
139 |
+
" <td>1888</td>\n",
|
140 |
+
" <td>NaN</td>\n",
|
141 |
+
" <td>Excavated 1885-1886.</td>\n",
|
142 |
+
" <td>Greek and Roman</td>\n",
|
143 |
+
" <td>NaN</td>\n",
|
144 |
+
" <td>1888,0601.716</td>\n",
|
145 |
+
" <td>NaN</td>\n",
|
146 |
+
" <td>NaN</td>\n",
|
147 |
+
" <td>NaN</td>\n",
|
148 |
+
" <td>NaN</td>\n",
|
149 |
+
" </tr>\n",
|
150 |
+
" <tr>\n",
|
151 |
+
" <th>1</th>\n",
|
152 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
153 |
+
" <td>acroterion</td>\n",
|
154 |
+
" <td>No: 1886,0401.45</td>\n",
|
155 |
+
" <td>NaN</td>\n",
|
156 |
+
" <td>NaN</td>\n",
|
157 |
+
" <td>NaN</td>\n",
|
158 |
+
" <td>Fragment of a marble corner palmetto with bird...</td>\n",
|
159 |
+
" <td>NaN</td>\n",
|
160 |
+
" <td>NaN</td>\n",
|
161 |
+
" <td>NaN</td>\n",
|
162 |
+
" <td>...</td>\n",
|
163 |
+
" <td>NaN</td>\n",
|
164 |
+
" <td>NaN</td>\n",
|
165 |
+
" <td>NaN</td>\n",
|
166 |
+
" <td>Greek and Roman</td>\n",
|
167 |
+
" <td>NaN</td>\n",
|
168 |
+
" <td>1886,0401.45</td>\n",
|
169 |
+
" <td>NaN</td>\n",
|
170 |
+
" <td>NaN</td>\n",
|
171 |
+
" <td>NaN</td>\n",
|
172 |
+
" <td>NaN</td>\n",
|
173 |
+
" </tr>\n",
|
174 |
+
" <tr>\n",
|
175 |
+
" <th>2</th>\n",
|
176 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
177 |
+
" <td>acroterion</td>\n",
|
178 |
+
" <td>No: 1886,0401.1215</td>\n",
|
179 |
+
" <td>NaN</td>\n",
|
180 |
+
" <td>NaN</td>\n",
|
181 |
+
" <td>NaN</td>\n",
|
182 |
+
" <td>Fragment of a marble acroterion palmetto. Two ...</td>\n",
|
183 |
+
" <td>NaN</td>\n",
|
184 |
+
" <td>NaN</td>\n",
|
185 |
+
" <td>NaN</td>\n",
|
186 |
+
" <td>...</td>\n",
|
187 |
+
" <td>1886</td>\n",
|
188 |
+
" <td>NaN</td>\n",
|
189 |
+
" <td>NaN</td>\n",
|
190 |
+
" <td>Greek and Roman</td>\n",
|
191 |
+
" <td>NaN</td>\n",
|
192 |
+
" <td>1886,0401.1215</td>\n",
|
193 |
+
" <td>Miscellaneous number: 1886,0401.44</td>\n",
|
194 |
+
" <td>NaN</td>\n",
|
195 |
+
" <td>NaN</td>\n",
|
196 |
+
" <td>NaN</td>\n",
|
197 |
+
" </tr>\n",
|
198 |
+
" <tr>\n",
|
199 |
+
" <th>3</th>\n",
|
200 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
201 |
+
" <td>adze; hoe</td>\n",
|
202 |
+
" <td>No: null</td>\n",
|
203 |
+
" <td>NaN</td>\n",
|
204 |
+
" <td>NaN</td>\n",
|
205 |
+
" <td>NaN</td>\n",
|
206 |
+
" <td>Iron adze or hoe.</td>\n",
|
207 |
+
" <td>NaN</td>\n",
|
208 |
+
" <td>NaN</td>\n",
|
209 |
+
" <td>NaN</td>\n",
|
210 |
+
" <td>...</td>\n",
|
211 |
+
" <td>NaN</td>\n",
|
212 |
+
" <td>NaN</td>\n",
|
213 |
+
" <td>Excavated 1885-1886 by Petrie.</td>\n",
|
214 |
+
" <td>External</td>\n",
|
215 |
+
" <td>NaN</td>\n",
|
216 |
+
" <td>NaN</td>\n",
|
217 |
+
" <td>Miscellaneous number: 1886.XI.5 (Publication p...</td>\n",
|
218 |
+
" <td>NaN</td>\n",
|
219 |
+
" <td>NaN</td>\n",
|
220 |
+
" <td>NaN</td>\n",
|
221 |
+
" </tr>\n",
|
222 |
+
" <tr>\n",
|
223 |
+
" <th>4</th>\n",
|
224 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
225 |
+
" <td>aegis; votive offering</td>\n",
|
226 |
+
" <td>No: null</td>\n",
|
227 |
+
" <td>NaN</td>\n",
|
228 |
+
" <td>NaN</td>\n",
|
229 |
+
" <td>NaN</td>\n",
|
230 |
+
" <td>Solid-cast fragmentary Menat-counterweight of ...</td>\n",
|
231 |
+
" <td>NaN</td>\n",
|
232 |
+
" <td>NaN</td>\n",
|
233 |
+
" <td>NaN</td>\n",
|
234 |
+
" <td>...</td>\n",
|
235 |
+
" <td>NaN</td>\n",
|
236 |
+
" <td>NaN</td>\n",
|
237 |
+
" <td>Excavated 1884-1885. 1885: excavated by the Eg...</td>\n",
|
238 |
+
" <td>External</td>\n",
|
239 |
+
" <td>NaN</td>\n",
|
240 |
+
" <td>NaN</td>\n",
|
241 |
+
" <td>Miscellaneous number: 86.339 (Accession Number...</td>\n",
|
242 |
+
" <td>NaN</td>\n",
|
243 |
+
" <td>NaN</td>\n",
|
244 |
+
" <td>NaN</td>\n",
|
245 |
+
" </tr>\n",
|
246 |
+
" <tr>\n",
|
247 |
+
" <th>...</th>\n",
|
248 |
+
" <td>...</td>\n",
|
249 |
+
" <td>...</td>\n",
|
250 |
+
" <td>...</td>\n",
|
251 |
+
" <td>...</td>\n",
|
252 |
+
" <td>...</td>\n",
|
253 |
+
" <td>...</td>\n",
|
254 |
+
" <td>...</td>\n",
|
255 |
+
" <td>...</td>\n",
|
256 |
+
" <td>...</td>\n",
|
257 |
+
" <td>...</td>\n",
|
258 |
+
" <td>...</td>\n",
|
259 |
+
" <td>...</td>\n",
|
260 |
+
" <td>...</td>\n",
|
261 |
+
" <td>...</td>\n",
|
262 |
+
" <td>...</td>\n",
|
263 |
+
" <td>...</td>\n",
|
264 |
+
" <td>...</td>\n",
|
265 |
+
" <td>...</td>\n",
|
266 |
+
" <td>...</td>\n",
|
267 |
+
" <td>...</td>\n",
|
268 |
+
" <td>...</td>\n",
|
269 |
+
" </tr>\n",
|
270 |
+
" <tr>\n",
|
271 |
+
" <th>44921</th>\n",
|
272 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
273 |
+
" <td>whetstone</td>\n",
|
274 |
+
" <td>No: null</td>\n",
|
275 |
+
" <td>NaN</td>\n",
|
276 |
+
" <td>NaN</td>\n",
|
277 |
+
" <td>NaN</td>\n",
|
278 |
+
" <td>Whetstone. Well worn on both sides; dull beige.</td>\n",
|
279 |
+
" <td>NaN</td>\n",
|
280 |
+
" <td>NaN</td>\n",
|
281 |
+
" <td>NaN</td>\n",
|
282 |
+
" <td>...</td>\n",
|
283 |
+
" <td>NaN</td>\n",
|
284 |
+
" <td>NaN</td>\n",
|
285 |
+
" <td>NaN</td>\n",
|
286 |
+
" <td>External</td>\n",
|
287 |
+
" <td>NaN</td>\n",
|
288 |
+
" <td>NaN</td>\n",
|
289 |
+
" <td>Miscellaneous number: 2478 (Accession Number)</td>\n",
|
290 |
+
" <td>NaN</td>\n",
|
291 |
+
" <td>NaN</td>\n",
|
292 |
+
" <td>NaN</td>\n",
|
293 |
+
" </tr>\n",
|
294 |
+
" <tr>\n",
|
295 |
+
" <th>44922</th>\n",
|
296 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
297 |
+
" <td>whetstone</td>\n",
|
298 |
+
" <td>No: null</td>\n",
|
299 |
+
" <td>NaN</td>\n",
|
300 |
+
" <td>NaN</td>\n",
|
301 |
+
" <td>NaN</td>\n",
|
302 |
+
" <td>Whetstone (?), made out of sandstone, in the s...</td>\n",
|
303 |
+
" <td>NaN</td>\n",
|
304 |
+
" <td>NaN</td>\n",
|
305 |
+
" <td>NaN</td>\n",
|
306 |
+
" <td>...</td>\n",
|
307 |
+
" <td>1886</td>\n",
|
308 |
+
" <td>NaN</td>\n",
|
309 |
+
" <td>Excavated 1884-1885. 1885: excavated by Willia...</td>\n",
|
310 |
+
" <td>External</td>\n",
|
311 |
+
" <td>NaN</td>\n",
|
312 |
+
" <td>NaN</td>\n",
|
313 |
+
" <td>Miscellaneous number: 86.185 (Accession Number...</td>\n",
|
314 |
+
" <td>NaN</td>\n",
|
315 |
+
" <td>NaN</td>\n",
|
316 |
+
" <td>NaN</td>\n",
|
317 |
+
" </tr>\n",
|
318 |
+
" <tr>\n",
|
319 |
+
" <th>44923</th>\n",
|
320 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
321 |
+
" <td>whistle</td>\n",
|
322 |
+
" <td>No: EA22513</td>\n",
|
323 |
+
" <td>NaN</td>\n",
|
324 |
+
" <td>NaN</td>\n",
|
325 |
+
" <td>NaN</td>\n",
|
326 |
+
" <td>A terracotta whistle, still working, roughly i...</td>\n",
|
327 |
+
" <td>NaN</td>\n",
|
328 |
+
" <td>NaN</td>\n",
|
329 |
+
" <td>NaN</td>\n",
|
330 |
+
" <td>...</td>\n",
|
331 |
+
" <td>1885</td>\n",
|
332 |
+
" <td>NaN</td>\n",
|
333 |
+
" <td>NaN</td>\n",
|
334 |
+
" <td>Egypt and Sudan</td>\n",
|
335 |
+
" <td>EA22513</td>\n",
|
336 |
+
" <td>1885,0101.361</td>\n",
|
337 |
+
" <td>NaN</td>\n",
|
338 |
+
" <td>NaN</td>\n",
|
339 |
+
" <td>NaN</td>\n",
|
340 |
+
" <td>NaN</td>\n",
|
341 |
+
" </tr>\n",
|
342 |
+
" <tr>\n",
|
343 |
+
" <th>44924</th>\n",
|
344 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
345 |
+
" <td>whistle</td>\n",
|
346 |
+
" <td>No: 1906,0301.7</td>\n",
|
347 |
+
" <td>NaN</td>\n",
|
348 |
+
" <td>NaN</td>\n",
|
349 |
+
" <td>NaN</td>\n",
|
350 |
+
" <td>Hand-modelled terracotta whistle, still workin...</td>\n",
|
351 |
+
" <td>NaN</td>\n",
|
352 |
+
" <td>NaN</td>\n",
|
353 |
+
" <td>NaN</td>\n",
|
354 |
+
" <td>...</td>\n",
|
355 |
+
" <td>1906</td>\n",
|
356 |
+
" <td>NaN</td>\n",
|
357 |
+
" <td>Excavated May 1886.</td>\n",
|
358 |
+
" <td>Greek and Roman</td>\n",
|
359 |
+
" <td>NaN</td>\n",
|
360 |
+
" <td>1906,0301.7</td>\n",
|
361 |
+
" <td>NaN</td>\n",
|
362 |
+
" <td>NaN</td>\n",
|
363 |
+
" <td>NaN</td>\n",
|
364 |
+
" <td>NaN</td>\n",
|
365 |
+
" </tr>\n",
|
366 |
+
" <tr>\n",
|
367 |
+
" <th>44925</th>\n",
|
368 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
369 |
+
" <td>null; plate</td>\n",
|
370 |
+
" <td>No: null</td>\n",
|
371 |
+
" <td>NaN</td>\n",
|
372 |
+
" <td>NaN</td>\n",
|
373 |
+
" <td>NaN</td>\n",
|
374 |
+
" <td>Body of North Ionian Late Wild Goat Style pott...</td>\n",
|
375 |
+
" <td>NaN</td>\n",
|
376 |
+
" <td>NaN</td>\n",
|
377 |
+
" <td>NaN</td>\n",
|
378 |
+
" <td>...</td>\n",
|
379 |
+
" <td>NaN</td>\n",
|
380 |
+
" <td>NaN</td>\n",
|
381 |
+
" <td>NaN</td>\n",
|
382 |
+
" <td>External</td>\n",
|
383 |
+
" <td>NaN</td>\n",
|
384 |
+
" <td>NaN</td>\n",
|
385 |
+
" <td>Miscellaneous number: 26.2.35 (Accession Number)</td>\n",
|
386 |
+
" <td>NaN</td>\n",
|
387 |
+
" <td>NaN</td>\n",
|
388 |
+
" <td>NaN</td>\n",
|
389 |
+
" </tr>\n",
|
390 |
+
" </tbody>\n",
|
391 |
+
"</table>\n",
|
392 |
+
"<p>44926 rows × 47 columns</p>\n",
|
393 |
+
"</div>"
|
394 |
+
],
|
395 |
+
"text/plain": [
|
396 |
+
" Image \\\n",
|
397 |
+
"0 https://media.britishmuseum.org/media/Reposito... \n",
|
398 |
+
"1 https://media.britishmuseum.org/media/Reposito... \n",
|
399 |
+
"2 https://media.britishmuseum.org/media/Reposito... \n",
|
400 |
+
"3 https://media.britishmuseum.org/media/Reposito... \n",
|
401 |
+
"4 https://media.britishmuseum.org/media/Reposito... \n",
|
402 |
+
"... ... \n",
|
403 |
+
"44921 https://media.britishmuseum.org/media/Reposito... \n",
|
404 |
+
"44922 https://media.britishmuseum.org/media/Reposito... \n",
|
405 |
+
"44923 https://media.britishmuseum.org/media/Reposito... \n",
|
406 |
+
"44924 https://media.britishmuseum.org/media/Reposito... \n",
|
407 |
+
"44925 https://media.britishmuseum.org/media/Reposito... \n",
|
408 |
+
"\n",
|
409 |
+
" Object type Museum number Title Denomination \\\n",
|
410 |
+
"0 acorn lekythos No: 1888,0601.716 NaN NaN \n",
|
411 |
+
"1 acroterion No: 1886,0401.45 NaN NaN \n",
|
412 |
+
"2 acroterion No: 1886,0401.1215 NaN NaN \n",
|
413 |
+
"3 adze; hoe No: null NaN NaN \n",
|
414 |
+
"4 aegis; votive offering No: null NaN NaN \n",
|
415 |
+
"... ... ... ... ... \n",
|
416 |
+
"44921 whetstone No: null NaN NaN \n",
|
417 |
+
"44922 whetstone No: null NaN NaN \n",
|
418 |
+
"44923 whistle No: EA22513 NaN NaN \n",
|
419 |
+
"44924 whistle No: 1906,0301.7 NaN NaN \n",
|
420 |
+
"44925 null; plate No: null NaN NaN \n",
|
421 |
+
"\n",
|
422 |
+
" Escapement Description \\\n",
|
423 |
+
"0 NaN Attic red-figured pottery acorn lekythos, rest... \n",
|
424 |
+
"1 NaN Fragment of a marble corner palmetto with bird... \n",
|
425 |
+
"2 NaN Fragment of a marble acroterion palmetto. Two ... \n",
|
426 |
+
"3 NaN Iron adze or hoe. \n",
|
427 |
+
"4 NaN Solid-cast fragmentary Menat-counterweight of ... \n",
|
428 |
+
"... ... ... \n",
|
429 |
+
"44921 NaN Whetstone. Well worn on both sides; dull beige. \n",
|
430 |
+
"44922 NaN Whetstone (?), made out of sandstone, in the s... \n",
|
431 |
+
"44923 NaN A terracotta whistle, still working, roughly i... \n",
|
432 |
+
"44924 NaN Hand-modelled terracotta whistle, still workin... \n",
|
433 |
+
"44925 NaN Body of North Ionian Late Wild Goat Style pott... \n",
|
434 |
+
"\n",
|
435 |
+
" Producer name School/style State ... Acq date Acq notes (acq) \\\n",
|
436 |
+
"0 NaN NaN NaN ... 1888 NaN \n",
|
437 |
+
"1 NaN NaN NaN ... NaN NaN \n",
|
438 |
+
"2 NaN NaN NaN ... 1886 NaN \n",
|
439 |
+
"3 NaN NaN NaN ... NaN NaN \n",
|
440 |
+
"4 NaN NaN NaN ... NaN NaN \n",
|
441 |
+
"... ... ... ... ... ... ... \n",
|
442 |
+
"44921 NaN NaN NaN ... NaN NaN \n",
|
443 |
+
"44922 NaN NaN NaN ... 1886 NaN \n",
|
444 |
+
"44923 NaN NaN NaN ... 1885 NaN \n",
|
445 |
+
"44924 NaN NaN NaN ... 1906 NaN \n",
|
446 |
+
"44925 NaN NaN NaN ... NaN NaN \n",
|
447 |
+
"\n",
|
448 |
+
" Acq notes (exc) Dept \\\n",
|
449 |
+
"0 Excavated 1885-1886. Greek and Roman \n",
|
450 |
+
"1 NaN Greek and Roman \n",
|
451 |
+
"2 NaN Greek and Roman \n",
|
452 |
+
"3 Excavated 1885-1886 by Petrie. External \n",
|
453 |
+
"4 Excavated 1884-1885. 1885: excavated by the Eg... External \n",
|
454 |
+
"... ... ... \n",
|
455 |
+
"44921 NaN External \n",
|
456 |
+
"44922 Excavated 1884-1885. 1885: excavated by Willia... External \n",
|
457 |
+
"44923 NaN Egypt and Sudan \n",
|
458 |
+
"44924 Excavated May 1886. Greek and Roman \n",
|
459 |
+
"44925 NaN External \n",
|
460 |
+
"\n",
|
461 |
+
" BM/Big number Reg number \\\n",
|
462 |
+
"0 NaN 1888,0601.716 \n",
|
463 |
+
"1 NaN 1886,0401.45 \n",
|
464 |
+
"2 NaN 1886,0401.1215 \n",
|
465 |
+
"3 NaN NaN \n",
|
466 |
+
"4 NaN NaN \n",
|
467 |
+
"... ... ... \n",
|
468 |
+
"44921 NaN NaN \n",
|
469 |
+
"44922 NaN NaN \n",
|
470 |
+
"44923 EA22513 1885,0101.361 \n",
|
471 |
+
"44924 NaN 1906,0301.7 \n",
|
472 |
+
"44925 NaN NaN \n",
|
473 |
+
"\n",
|
474 |
+
" Add ids Cat no \\\n",
|
475 |
+
"0 NaN NaN \n",
|
476 |
+
"1 NaN NaN \n",
|
477 |
+
"2 Miscellaneous number: 1886,0401.44 NaN \n",
|
478 |
+
"3 Miscellaneous number: 1886.XI.5 (Publication p... NaN \n",
|
479 |
+
"4 Miscellaneous number: 86.339 (Accession Number... NaN \n",
|
480 |
+
"... ... ... \n",
|
481 |
+
"44921 Miscellaneous number: 2478 (Accession Number) NaN \n",
|
482 |
+
"44922 Miscellaneous number: 86.185 (Accession Number... NaN \n",
|
483 |
+
"44923 NaN NaN \n",
|
484 |
+
"44924 NaN NaN \n",
|
485 |
+
"44925 Miscellaneous number: 26.2.35 (Accession Number) NaN \n",
|
486 |
+
"\n",
|
487 |
+
" Banknote serial number Joined objects \n",
|
488 |
+
"0 NaN NaN \n",
|
489 |
+
"1 NaN NaN \n",
|
490 |
+
"2 NaN NaN \n",
|
491 |
+
"3 NaN NaN \n",
|
492 |
+
"4 NaN NaN \n",
|
493 |
+
"... ... ... \n",
|
494 |
+
"44921 NaN NaN \n",
|
495 |
+
"44922 NaN NaN \n",
|
496 |
+
"44923 NaN NaN \n",
|
497 |
+
"44924 NaN NaN \n",
|
498 |
+
"44925 NaN NaN \n",
|
499 |
+
"\n",
|
500 |
+
"[44926 rows x 47 columns]"
|
501 |
+
]
|
502 |
+
},
|
503 |
+
"execution_count": 4,
|
504 |
+
"metadata": {},
|
505 |
+
"output_type": "execute_result"
|
506 |
+
}
|
507 |
+
],
|
508 |
+
"source": [
|
509 |
+
"europe_df"
|
510 |
+
]
|
511 |
+
},
|
512 |
+
{
|
513 |
+
"cell_type": "code",
|
514 |
+
"execution_count": 8,
|
515 |
+
"metadata": {},
|
516 |
+
"outputs": [],
|
517 |
+
"source": [
|
518 |
+
"region_df.drop_duplicates(inplace=True)"
|
519 |
+
]
|
520 |
+
},
|
521 |
+
{
|
522 |
+
"cell_type": "code",
|
523 |
+
"execution_count": 11,
|
524 |
+
"metadata": {},
|
525 |
+
"outputs": [
|
526 |
+
{
|
527 |
+
"data": {
|
528 |
+
"text/html": [
|
529 |
+
"<div>\n",
|
530 |
+
"<style scoped>\n",
|
531 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
532 |
+
" vertical-align: middle;\n",
|
533 |
+
" }\n",
|
534 |
+
"\n",
|
535 |
+
" .dataframe tbody tr th {\n",
|
536 |
+
" vertical-align: top;\n",
|
537 |
+
" }\n",
|
538 |
+
"\n",
|
539 |
+
" .dataframe thead th {\n",
|
540 |
+
" text-align: right;\n",
|
541 |
+
" }\n",
|
542 |
+
"</style>\n",
|
543 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
544 |
+
" <thead>\n",
|
545 |
+
" <tr style=\"text-align: right;\">\n",
|
546 |
+
" <th></th>\n",
|
547 |
+
" <th>Image</th>\n",
|
548 |
+
" <th>Object type</th>\n",
|
549 |
+
" <th>Museum number</th>\n",
|
550 |
+
" <th>Title</th>\n",
|
551 |
+
" <th>Denomination</th>\n",
|
552 |
+
" <th>Escapement</th>\n",
|
553 |
+
" <th>Description</th>\n",
|
554 |
+
" <th>Producer name</th>\n",
|
555 |
+
" <th>School/style</th>\n",
|
556 |
+
" <th>State</th>\n",
|
557 |
+
" <th>...</th>\n",
|
558 |
+
" <th>Acq date</th>\n",
|
559 |
+
" <th>Acq notes (acq)</th>\n",
|
560 |
+
" <th>Acq notes (exc)</th>\n",
|
561 |
+
" <th>Dept</th>\n",
|
562 |
+
" <th>BM/Big number</th>\n",
|
563 |
+
" <th>Reg number</th>\n",
|
564 |
+
" <th>Add ids</th>\n",
|
565 |
+
" <th>Cat no</th>\n",
|
566 |
+
" <th>Banknote serial number</th>\n",
|
567 |
+
" <th>Joined objects</th>\n",
|
568 |
+
" </tr>\n",
|
569 |
+
" </thead>\n",
|
570 |
+
" <tbody>\n",
|
571 |
+
" <tr>\n",
|
572 |
+
" <th>0</th>\n",
|
573 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
574 |
+
" <td>acorn lekythos</td>\n",
|
575 |
+
" <td>1888,0601.716</td>\n",
|
576 |
+
" <td>NaN</td>\n",
|
577 |
+
" <td>NaN</td>\n",
|
578 |
+
" <td>NaN</td>\n",
|
579 |
+
" <td>Attic red-figured pottery acorn lekythos, rest...</td>\n",
|
580 |
+
" <td>NaN</td>\n",
|
581 |
+
" <td>NaN</td>\n",
|
582 |
+
" <td>NaN</td>\n",
|
583 |
+
" <td>...</td>\n",
|
584 |
+
" <td>1888</td>\n",
|
585 |
+
" <td>NaN</td>\n",
|
586 |
+
" <td>Excavated 1885-1886.</td>\n",
|
587 |
+
" <td>Greek and Roman</td>\n",
|
588 |
+
" <td>NaN</td>\n",
|
589 |
+
" <td>1888,0601.716</td>\n",
|
590 |
+
" <td>NaN</td>\n",
|
591 |
+
" <td>NaN</td>\n",
|
592 |
+
" <td>NaN</td>\n",
|
593 |
+
" <td>NaN</td>\n",
|
594 |
+
" </tr>\n",
|
595 |
+
" <tr>\n",
|
596 |
+
" <th>1</th>\n",
|
597 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
598 |
+
" <td>acroterion</td>\n",
|
599 |
+
" <td>1886,0401.45</td>\n",
|
600 |
+
" <td>NaN</td>\n",
|
601 |
+
" <td>NaN</td>\n",
|
602 |
+
" <td>NaN</td>\n",
|
603 |
+
" <td>Fragment of a marble corner palmetto with bird...</td>\n",
|
604 |
+
" <td>NaN</td>\n",
|
605 |
+
" <td>NaN</td>\n",
|
606 |
+
" <td>NaN</td>\n",
|
607 |
+
" <td>...</td>\n",
|
608 |
+
" <td>NaN</td>\n",
|
609 |
+
" <td>NaN</td>\n",
|
610 |
+
" <td>NaN</td>\n",
|
611 |
+
" <td>Greek and Roman</td>\n",
|
612 |
+
" <td>NaN</td>\n",
|
613 |
+
" <td>1886,0401.45</td>\n",
|
614 |
+
" <td>NaN</td>\n",
|
615 |
+
" <td>NaN</td>\n",
|
616 |
+
" <td>NaN</td>\n",
|
617 |
+
" <td>NaN</td>\n",
|
618 |
+
" </tr>\n",
|
619 |
+
" <tr>\n",
|
620 |
+
" <th>2</th>\n",
|
621 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
622 |
+
" <td>acroterion</td>\n",
|
623 |
+
" <td>1886,0401.1215</td>\n",
|
624 |
+
" <td>NaN</td>\n",
|
625 |
+
" <td>NaN</td>\n",
|
626 |
+
" <td>NaN</td>\n",
|
627 |
+
" <td>Fragment of a marble acroterion palmetto. Two ...</td>\n",
|
628 |
+
" <td>NaN</td>\n",
|
629 |
+
" <td>NaN</td>\n",
|
630 |
+
" <td>NaN</td>\n",
|
631 |
+
" <td>...</td>\n",
|
632 |
+
" <td>1886</td>\n",
|
633 |
+
" <td>NaN</td>\n",
|
634 |
+
" <td>NaN</td>\n",
|
635 |
+
" <td>Greek and Roman</td>\n",
|
636 |
+
" <td>NaN</td>\n",
|
637 |
+
" <td>1886,0401.1215</td>\n",
|
638 |
+
" <td>Miscellaneous number: 1886,0401.44</td>\n",
|
639 |
+
" <td>NaN</td>\n",
|
640 |
+
" <td>NaN</td>\n",
|
641 |
+
" <td>NaN</td>\n",
|
642 |
+
" </tr>\n",
|
643 |
+
" <tr>\n",
|
644 |
+
" <th>7</th>\n",
|
645 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
646 |
+
" <td>alabastron</td>\n",
|
647 |
+
" <td>1894,1101.213</td>\n",
|
648 |
+
" <td>NaN</td>\n",
|
649 |
+
" <td>NaN</td>\n",
|
650 |
+
" <td>NaN</td>\n",
|
651 |
+
" <td>Core-formed glass alabastron.\\r\\nOpaque orange...</td>\n",
|
652 |
+
" <td>NaN</td>\n",
|
653 |
+
" <td>NaN</td>\n",
|
654 |
+
" <td>NaN</td>\n",
|
655 |
+
" <td>...</td>\n",
|
656 |
+
" <td>1894</td>\n",
|
657 |
+
" <td>NaN</td>\n",
|
658 |
+
" <td>NaN</td>\n",
|
659 |
+
" <td>Greek and Roman</td>\n",
|
660 |
+
" <td>NaN</td>\n",
|
661 |
+
" <td>1894,1101.213</td>\n",
|
662 |
+
" <td>Miscellaneous number: DBH.0056 (Harden number)</td>\n",
|
663 |
+
" <td>NaN</td>\n",
|
664 |
+
" <td>NaN</td>\n",
|
665 |
+
" <td>NaN</td>\n",
|
666 |
+
" </tr>\n",
|
667 |
+
" <tr>\n",
|
668 |
+
" <th>8</th>\n",
|
669 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
670 |
+
" <td>alabastron</td>\n",
|
671 |
+
" <td>132114</td>\n",
|
672 |
+
" <td>NaN</td>\n",
|
673 |
+
" <td>NaN</td>\n",
|
674 |
+
" <td>NaN</td>\n",
|
675 |
+
" <td>Large baggy alabastron of horizontal banded, t...</td>\n",
|
676 |
+
" <td>NaN</td>\n",
|
677 |
+
" <td>NaN</td>\n",
|
678 |
+
" <td>NaN</td>\n",
|
679 |
+
" <td>...</td>\n",
|
680 |
+
" <td>1857</td>\n",
|
681 |
+
" <td>NaN</td>\n",
|
682 |
+
" <td>NaN</td>\n",
|
683 |
+
" <td>Middle East</td>\n",
|
684 |
+
" <td>132114</td>\n",
|
685 |
+
" <td>1857,1220.1</td>\n",
|
686 |
+
" <td>Miscellaneous number: 416 (paper label attache...</td>\n",
|
687 |
+
" <td>NaN</td>\n",
|
688 |
+
" <td>NaN</td>\n",
|
689 |
+
" <td>NaN</td>\n",
|
690 |
+
" </tr>\n",
|
691 |
+
" <tr>\n",
|
692 |
+
" <th>...</th>\n",
|
693 |
+
" <td>...</td>\n",
|
694 |
+
" <td>...</td>\n",
|
695 |
+
" <td>...</td>\n",
|
696 |
+
" <td>...</td>\n",
|
697 |
+
" <td>...</td>\n",
|
698 |
+
" <td>...</td>\n",
|
699 |
+
" <td>...</td>\n",
|
700 |
+
" <td>...</td>\n",
|
701 |
+
" <td>...</td>\n",
|
702 |
+
" <td>...</td>\n",
|
703 |
+
" <td>...</td>\n",
|
704 |
+
" <td>...</td>\n",
|
705 |
+
" <td>...</td>\n",
|
706 |
+
" <td>...</td>\n",
|
707 |
+
" <td>...</td>\n",
|
708 |
+
" <td>...</td>\n",
|
709 |
+
" <td>...</td>\n",
|
710 |
+
" <td>...</td>\n",
|
711 |
+
" <td>...</td>\n",
|
712 |
+
" <td>...</td>\n",
|
713 |
+
" <td>...</td>\n",
|
714 |
+
" </tr>\n",
|
715 |
+
" <tr>\n",
|
716 |
+
" <th>9301</th>\n",
|
717 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
718 |
+
" <td>vessel-fitting; lekane</td>\n",
|
719 |
+
" <td>1886,0401.1218</td>\n",
|
720 |
+
" <td>NaN</td>\n",
|
721 |
+
" <td>NaN</td>\n",
|
722 |
+
" <td>NaN</td>\n",
|
723 |
+
" <td>Chian pottery plastic head, originally attache...</td>\n",
|
724 |
+
" <td>NaN</td>\n",
|
725 |
+
" <td>NaN</td>\n",
|
726 |
+
" <td>NaN</td>\n",
|
727 |
+
" <td>...</td>\n",
|
728 |
+
" <td>1886</td>\n",
|
729 |
+
" <td>NaN</td>\n",
|
730 |
+
" <td>Excavated 1884-1885.</td>\n",
|
731 |
+
" <td>Greek and Roman</td>\n",
|
732 |
+
" <td>NaN</td>\n",
|
733 |
+
" <td>1886,0401.1218</td>\n",
|
734 |
+
" <td>NaN</td>\n",
|
735 |
+
" <td>NaN</td>\n",
|
736 |
+
" <td>NaN</td>\n",
|
737 |
+
" <td>NaN</td>\n",
|
738 |
+
" </tr>\n",
|
739 |
+
" <tr>\n",
|
740 |
+
" <th>9302</th>\n",
|
741 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
742 |
+
" <td>vessel-fitting; lid</td>\n",
|
743 |
+
" <td>1886,0401.1429</td>\n",
|
744 |
+
" <td>NaN</td>\n",
|
745 |
+
" <td>NaN</td>\n",
|
746 |
+
" <td>NaN</td>\n",
|
747 |
+
" <td>Plastic double head of Chian, probably black-f...</td>\n",
|
748 |
+
" <td>NaN</td>\n",
|
749 |
+
" <td>NaN</td>\n",
|
750 |
+
" <td>NaN</td>\n",
|
751 |
+
" <td>...</td>\n",
|
752 |
+
" <td>1886</td>\n",
|
753 |
+
" <td>NaN</td>\n",
|
754 |
+
" <td>Excavated 1884-1885.</td>\n",
|
755 |
+
" <td>Greek and Roman</td>\n",
|
756 |
+
" <td>NaN</td>\n",
|
757 |
+
" <td>1886,0401.1429</td>\n",
|
758 |
+
" <td>NaN</td>\n",
|
759 |
+
" <td>NaN</td>\n",
|
760 |
+
" <td>NaN</td>\n",
|
761 |
+
" <td>NaN</td>\n",
|
762 |
+
" </tr>\n",
|
763 |
+
" <tr>\n",
|
764 |
+
" <th>9304</th>\n",
|
765 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
766 |
+
" <td>volute krater</td>\n",
|
767 |
+
" <td>1924,1201.41</td>\n",
|
768 |
+
" <td>NaN</td>\n",
|
769 |
+
" <td>NaN</td>\n",
|
770 |
+
" <td>NaN</td>\n",
|
771 |
+
" <td>Volute handle and rim sherd (consisting of 3 f...</td>\n",
|
772 |
+
" <td>NaN</td>\n",
|
773 |
+
" <td>NaN</td>\n",
|
774 |
+
" <td>NaN</td>\n",
|
775 |
+
" <td>...</td>\n",
|
776 |
+
" <td>NaN</td>\n",
|
777 |
+
" <td>NaN</td>\n",
|
778 |
+
" <td>NaN</td>\n",
|
779 |
+
" <td>Greek and Roman</td>\n",
|
780 |
+
" <td>NaN</td>\n",
|
781 |
+
" <td>1924,1201.41</td>\n",
|
782 |
+
" <td>NaN</td>\n",
|
783 |
+
" <td>NaN</td>\n",
|
784 |
+
" <td>NaN</td>\n",
|
785 |
+
" <td>NaN</td>\n",
|
786 |
+
" </tr>\n",
|
787 |
+
" <tr>\n",
|
788 |
+
" <th>9310</th>\n",
|
789 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
790 |
+
" <td>volute krater</td>\n",
|
791 |
+
" <td>1924,1201.40</td>\n",
|
792 |
+
" <td>NaN</td>\n",
|
793 |
+
" <td>NaN</td>\n",
|
794 |
+
" <td>NaN</td>\n",
|
795 |
+
" <td>Sherd (mended from two fragments) of Laconian ...</td>\n",
|
796 |
+
" <td>NaN</td>\n",
|
797 |
+
" <td>NaN</td>\n",
|
798 |
+
" <td>NaN</td>\n",
|
799 |
+
" <td>...</td>\n",
|
800 |
+
" <td>NaN</td>\n",
|
801 |
+
" <td>NaN</td>\n",
|
802 |
+
" <td>NaN</td>\n",
|
803 |
+
" <td>Greek and Roman</td>\n",
|
804 |
+
" <td>NaN</td>\n",
|
805 |
+
" <td>1924,1201.40</td>\n",
|
806 |
+
" <td>NaN</td>\n",
|
807 |
+
" <td>NaN</td>\n",
|
808 |
+
" <td>NaN</td>\n",
|
809 |
+
" <td>NaN</td>\n",
|
810 |
+
" </tr>\n",
|
811 |
+
" <tr>\n",
|
812 |
+
" <th>9317</th>\n",
|
813 |
+
" <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
|
814 |
+
" <td>wall-painting</td>\n",
|
815 |
+
" <td>1886,0401.67</td>\n",
|
816 |
+
" <td>NaN</td>\n",
|
817 |
+
" <td>NaN</td>\n",
|
818 |
+
" <td>NaN</td>\n",
|
819 |
+
" <td>Stucco wall fragment, with marks of pointed to...</td>\n",
|
820 |
+
" <td>NaN</td>\n",
|
821 |
+
" <td>NaN</td>\n",
|
822 |
+
" <td>NaN</td>\n",
|
823 |
+
" <td>...</td>\n",
|
824 |
+
" <td>1886</td>\n",
|
825 |
+
" <td>NaN</td>\n",
|
826 |
+
" <td>NaN</td>\n",
|
827 |
+
" <td>Greek and Roman</td>\n",
|
828 |
+
" <td>NaN</td>\n",
|
829 |
+
" <td>1886,0401.67</td>\n",
|
830 |
+
" <td>NaN</td>\n",
|
831 |
+
" <td>NaN</td>\n",
|
832 |
+
" <td>NaN</td>\n",
|
833 |
+
" <td>NaN</td>\n",
|
834 |
+
" </tr>\n",
|
835 |
+
" </tbody>\n",
|
836 |
+
"</table>\n",
|
837 |
+
"<p>19570 rows × 47 columns</p>\n",
|
838 |
+
"</div>"
|
839 |
+
],
|
840 |
+
"text/plain": [
|
841 |
+
" Image \\\n",
|
842 |
+
"0 https://media.britishmuseum.org/media/Reposito... \n",
|
843 |
+
"1 https://media.britishmuseum.org/media/Reposito... \n",
|
844 |
+
"2 https://media.britishmuseum.org/media/Reposito... \n",
|
845 |
+
"7 https://media.britishmuseum.org/media/Reposito... \n",
|
846 |
+
"8 https://media.britishmuseum.org/media/Reposito... \n",
|
847 |
+
"... ... \n",
|
848 |
+
"9301 https://media.britishmuseum.org/media/Reposito... \n",
|
849 |
+
"9302 https://media.britishmuseum.org/media/Reposito... \n",
|
850 |
+
"9304 https://media.britishmuseum.org/media/Reposito... \n",
|
851 |
+
"9310 https://media.britishmuseum.org/media/Reposito... \n",
|
852 |
+
"9317 https://media.britishmuseum.org/media/Reposito... \n",
|
853 |
+
"\n",
|
854 |
+
" Object type Museum number Title Denomination Escapement \\\n",
|
855 |
+
"0 acorn lekythos 1888,0601.716 NaN NaN NaN \n",
|
856 |
+
"1 acroterion 1886,0401.45 NaN NaN NaN \n",
|
857 |
+
"2 acroterion 1886,0401.1215 NaN NaN NaN \n",
|
858 |
+
"7 alabastron 1894,1101.213 NaN NaN NaN \n",
|
859 |
+
"8 alabastron 132114 NaN NaN NaN \n",
|
860 |
+
"... ... ... ... ... ... \n",
|
861 |
+
"9301 vessel-fitting; lekane 1886,0401.1218 NaN NaN NaN \n",
|
862 |
+
"9302 vessel-fitting; lid 1886,0401.1429 NaN NaN NaN \n",
|
863 |
+
"9304 volute krater 1924,1201.41 NaN NaN NaN \n",
|
864 |
+
"9310 volute krater 1924,1201.40 NaN NaN NaN \n",
|
865 |
+
"9317 wall-painting 1886,0401.67 NaN NaN NaN \n",
|
866 |
+
"\n",
|
867 |
+
" Description Producer name \\\n",
|
868 |
+
"0 Attic red-figured pottery acorn lekythos, rest... NaN \n",
|
869 |
+
"1 Fragment of a marble corner palmetto with bird... NaN \n",
|
870 |
+
"2 Fragment of a marble acroterion palmetto. Two ... NaN \n",
|
871 |
+
"7 Core-formed glass alabastron.\\r\\nOpaque orange... NaN \n",
|
872 |
+
"8 Large baggy alabastron of horizontal banded, t... NaN \n",
|
873 |
+
"... ... ... \n",
|
874 |
+
"9301 Chian pottery plastic head, originally attache... NaN \n",
|
875 |
+
"9302 Plastic double head of Chian, probably black-f... NaN \n",
|
876 |
+
"9304 Volute handle and rim sherd (consisting of 3 f... NaN \n",
|
877 |
+
"9310 Sherd (mended from two fragments) of Laconian ... NaN \n",
|
878 |
+
"9317 Stucco wall fragment, with marks of pointed to... NaN \n",
|
879 |
+
"\n",
|
880 |
+
" School/style State ... Acq date Acq notes (acq) Acq notes (exc) \\\n",
|
881 |
+
"0 NaN NaN ... 1888 NaN Excavated 1885-1886. \n",
|
882 |
+
"1 NaN NaN ... NaN NaN NaN \n",
|
883 |
+
"2 NaN NaN ... 1886 NaN NaN \n",
|
884 |
+
"7 NaN NaN ... 1894 NaN NaN \n",
|
885 |
+
"8 NaN NaN ... 1857 NaN NaN \n",
|
886 |
+
"... ... ... ... ... ... ... \n",
|
887 |
+
"9301 NaN NaN ... 1886 NaN Excavated 1884-1885. \n",
|
888 |
+
"9302 NaN NaN ... 1886 NaN Excavated 1884-1885. \n",
|
889 |
+
"9304 NaN NaN ... NaN NaN NaN \n",
|
890 |
+
"9310 NaN NaN ... NaN NaN NaN \n",
|
891 |
+
"9317 NaN NaN ... 1886 NaN NaN \n",
|
892 |
+
"\n",
|
893 |
+
" Dept BM/Big number Reg number \\\n",
|
894 |
+
"0 Greek and Roman NaN 1888,0601.716 \n",
|
895 |
+
"1 Greek and Roman NaN 1886,0401.45 \n",
|
896 |
+
"2 Greek and Roman NaN 1886,0401.1215 \n",
|
897 |
+
"7 Greek and Roman NaN 1894,1101.213 \n",
|
898 |
+
"8 Middle East 132114 1857,1220.1 \n",
|
899 |
+
"... ... ... ... \n",
|
900 |
+
"9301 Greek and Roman NaN 1886,0401.1218 \n",
|
901 |
+
"9302 Greek and Roman NaN 1886,0401.1429 \n",
|
902 |
+
"9304 Greek and Roman NaN 1924,1201.41 \n",
|
903 |
+
"9310 Greek and Roman NaN 1924,1201.40 \n",
|
904 |
+
"9317 Greek and Roman NaN 1886,0401.67 \n",
|
905 |
+
"\n",
|
906 |
+
" Add ids Cat no \\\n",
|
907 |
+
"0 NaN NaN \n",
|
908 |
+
"1 NaN NaN \n",
|
909 |
+
"2 Miscellaneous number: 1886,0401.44 NaN \n",
|
910 |
+
"7 Miscellaneous number: DBH.0056 (Harden number) NaN \n",
|
911 |
+
"8 Miscellaneous number: 416 (paper label attache... NaN \n",
|
912 |
+
"... ... ... \n",
|
913 |
+
"9301 NaN NaN \n",
|
914 |
+
"9302 NaN NaN \n",
|
915 |
+
"9304 NaN NaN \n",
|
916 |
+
"9310 NaN NaN \n",
|
917 |
+
"9317 NaN NaN \n",
|
918 |
+
"\n",
|
919 |
+
" Banknote serial number Joined objects \n",
|
920 |
+
"0 NaN NaN \n",
|
921 |
+
"1 NaN NaN \n",
|
922 |
+
"2 NaN NaN \n",
|
923 |
+
"7 NaN NaN \n",
|
924 |
+
"8 NaN NaN \n",
|
925 |
+
"... ... ... \n",
|
926 |
+
"9301 NaN NaN \n",
|
927 |
+
"9302 NaN NaN \n",
|
928 |
+
"9304 NaN NaN \n",
|
929 |
+
"9310 NaN NaN \n",
|
930 |
+
"9317 NaN NaN \n",
|
931 |
+
"\n",
|
932 |
+
"[19570 rows x 47 columns]"
|
933 |
+
]
|
934 |
+
},
|
935 |
+
"execution_count": 11,
|
936 |
+
"metadata": {},
|
937 |
+
"output_type": "execute_result"
|
938 |
+
}
|
939 |
+
],
|
940 |
+
"source": [
|
941 |
+
"region_df"
|
942 |
+
]
|
943 |
+
},
|
944 |
+
{
|
945 |
+
"cell_type": "code",
|
946 |
+
"execution_count": null,
|
947 |
+
"metadata": {},
|
948 |
+
"outputs": [],
|
949 |
+
"source": []
|
950 |
+
}
|
951 |
+
],
|
952 |
+
"metadata": {
|
953 |
+
"kernelspec": {
|
954 |
+
"display_name": "ArtifactClassification",
|
955 |
+
"language": "python",
|
956 |
+
"name": "python3"
|
957 |
+
},
|
958 |
+
"language_info": {
|
959 |
+
"codemirror_mode": {
|
960 |
+
"name": "ipython",
|
961 |
+
"version": 3
|
962 |
+
},
|
963 |
+
"file_extension": ".py",
|
964 |
+
"mimetype": "text/x-python",
|
965 |
+
"name": "python",
|
966 |
+
"nbconvert_exporter": "python",
|
967 |
+
"pygments_lexer": "ipython3",
|
968 |
+
"version": "3.10.12"
|
969 |
+
}
|
970 |
+
},
|
971 |
+
"nbformat": 4,
|
972 |
+
"nbformat_minor": 2
|
973 |
+
}
|
0.4-testing_tif_images.ipynb
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 4,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"from PIL import Image"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "code",
|
14 |
+
"execution_count": 5,
|
15 |
+
"metadata": {},
|
16 |
+
"outputs": [],
|
17 |
+
"source": [
|
18 |
+
"im = Image.open(\"../data/raw/images/castle/1924/1924_4_738a_small.tif\")\n",
|
19 |
+
"name = str(\"../data/raw/images/castle/1924/1924_4_738a_small.tif\").rstrip(\".tif\")\n",
|
20 |
+
"im.save(\"image\" + \".jpg\", \"JPEG\")"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"cell_type": "code",
|
25 |
+
"execution_count": 6,
|
26 |
+
"metadata": {},
|
27 |
+
"outputs": [
|
28 |
+
{
|
29 |
+
"name": "stderr",
|
30 |
+
"output_type": "stream",
|
31 |
+
"text": [
|
32 |
+
"/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)\n",
|
33 |
+
"Failed to load module: /home/james/snap/code/common/.cache/gio-modules/libgiolibproxy.so\n",
|
34 |
+
"eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE\n"
|
35 |
+
]
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"source": [
|
39 |
+
"im.show()"
|
40 |
+
]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"cell_type": "code",
|
44 |
+
"execution_count": null,
|
45 |
+
"metadata": {},
|
46 |
+
"outputs": [],
|
47 |
+
"source": []
|
48 |
+
}
|
49 |
+
],
|
50 |
+
"metadata": {
|
51 |
+
"kernelspec": {
|
52 |
+
"display_name": "ArtifactClassification",
|
53 |
+
"language": "python",
|
54 |
+
"name": "python3"
|
55 |
+
},
|
56 |
+
"language_info": {
|
57 |
+
"codemirror_mode": {
|
58 |
+
"name": "ipython",
|
59 |
+
"version": 3
|
60 |
+
},
|
61 |
+
"file_extension": ".py",
|
62 |
+
"mimetype": "text/x-python",
|
63 |
+
"name": "python",
|
64 |
+
"nbconvert_exporter": "python",
|
65 |
+
"pygments_lexer": "ipython3",
|
66 |
+
"version": "3.10.12"
|
67 |
+
}
|
68 |
+
},
|
69 |
+
"nbformat": 4,
|
70 |
+
"nbformat_minor": 2
|
71 |
+
}
|
0.5-testing_transparent_background.ipynb
ADDED
@@ -0,0 +1,321 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 16,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import cv2\n",
|
10 |
+
"import numpy as np\n",
|
11 |
+
"\n",
|
12 |
+
"from PIL import Image\n",
|
13 |
+
"from transparent_background import Remover\n",
|
14 |
+
"import pandas as pd\n",
|
15 |
+
"from tqdm import tqdm\n",
|
16 |
+
"import os"
|
17 |
+
]
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"cell_type": "code",
|
21 |
+
"execution_count": 5,
|
22 |
+
"metadata": {},
|
23 |
+
"outputs": [
|
24 |
+
{
|
25 |
+
"name": "stdout",
|
26 |
+
"output_type": "stream",
|
27 |
+
"text": [
|
28 |
+
"Settings -> Mode=base-nightly, Device=cuda:0, Torchscript=disabled\n"
|
29 |
+
]
|
30 |
+
}
|
31 |
+
],
|
32 |
+
"source": [
|
33 |
+
"# Load model\n",
|
34 |
+
"# remover = Remover() # default setting\n",
|
35 |
+
"# remover = Remover(mode='fast', jit=True, device='cuda:0', ckpt='~/latest.pth', url=\"https://drive.google.com/file/d/13oBl5MTVcWER3YU4fSxW3ATlVfueFQPY/view?usp=share_link\", ckpt_name=\"ckpt_base.pth\")\n",
|
36 |
+
"remover = Remover(mode=\"base-nightly\") # nightly release checkpoint"
|
37 |
+
]
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"cell_type": "code",
|
41 |
+
"execution_count": 7,
|
42 |
+
"metadata": {},
|
43 |
+
"outputs": [],
|
44 |
+
"source": [
|
45 |
+
"# Usage for image\n",
|
46 |
+
"img = Image.open(\"../data/raw/images/egyptian/1953/1953.1-tt.jpg\").convert(\"RGB\") # read image\n",
|
47 |
+
"\n",
|
48 |
+
"out = remover.process(img) # default setting - transparent background\n",
|
49 |
+
"# out = remover.process(img, type='rgba') # same as above\n",
|
50 |
+
"# out = remover.process(img, type='map') # object map only\n",
|
51 |
+
"# out = remover.process(img, type='green') # image matting - green screen\n",
|
52 |
+
"# out = remover.process(img, type='white') # change backround with white color\n",
|
53 |
+
"# out = remover.process(img, type=[255, 0, 0]) # change background with color code [255, 0, 0]\n",
|
54 |
+
"# out = remover.process(img, type='blur') # blur background\n",
|
55 |
+
"# out = remover.process(img, type='overlay') # overlay object map onto the image\n",
|
56 |
+
"# out = remover.process(img, type='samples/background.jpg') # use another image as a background\n",
|
57 |
+
"\n",
|
58 |
+
"# out = remover.process(img, threshold=0.5) # use threhold parameter for hard prediction.\n",
|
59 |
+
"\n",
|
60 |
+
"out.save(\"output.png\") # save result"
|
61 |
+
]
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"cell_type": "code",
|
65 |
+
"execution_count": 24,
|
66 |
+
"metadata": {},
|
67 |
+
"outputs": [],
|
68 |
+
"source": [
|
69 |
+
"img_df = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n",
|
70 |
+
"img_df[\"full_path\"] = img_df.apply(lambda row: os.path.join(row[\"root\"], row[\"file\"]), axis=1)\n",
|
71 |
+
"img_df[\"new_root\"] = img_df[\"root\"].apply(\n",
|
72 |
+
" lambda x: x.replace(\"data/raw/images/\", \"data/processed/OM_images_white/\")\n",
|
73 |
+
")\n",
|
74 |
+
"img_df[\"new_full_path\"] = img_df.apply(lambda row: os.path.join(row[\"new_root\"], row[\"file\"]), axis=1)"
|
75 |
+
]
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"cell_type": "code",
|
79 |
+
"execution_count": 23,
|
80 |
+
"metadata": {},
|
81 |
+
"outputs": [
|
82 |
+
{
|
83 |
+
"data": {
|
84 |
+
"text/html": [
|
85 |
+
"<div>\n",
|
86 |
+
"<style scoped>\n",
|
87 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
88 |
+
" vertical-align: middle;\n",
|
89 |
+
" }\n",
|
90 |
+
"\n",
|
91 |
+
" .dataframe tbody tr th {\n",
|
92 |
+
" vertical-align: top;\n",
|
93 |
+
" }\n",
|
94 |
+
"\n",
|
95 |
+
" .dataframe thead th {\n",
|
96 |
+
" text-align: right;\n",
|
97 |
+
" }\n",
|
98 |
+
"</style>\n",
|
99 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
100 |
+
" <thead>\n",
|
101 |
+
" <tr style=\"text-align: right;\">\n",
|
102 |
+
" <th></th>\n",
|
103 |
+
" <th>file</th>\n",
|
104 |
+
" <th>root</th>\n",
|
105 |
+
" <th>obj_num</th>\n",
|
106 |
+
" <th>full_path</th>\n",
|
107 |
+
" <th>new_root</th>\n",
|
108 |
+
" </tr>\n",
|
109 |
+
" </thead>\n",
|
110 |
+
" <tbody>\n",
|
111 |
+
" <tr>\n",
|
112 |
+
" <th>0</th>\n",
|
113 |
+
" <td>1985.15.68.jpg</td>\n",
|
114 |
+
" <td>data/raw/images/fulling_mill/1985</td>\n",
|
115 |
+
" <td>durma.1985.15.68</td>\n",
|
116 |
+
" <td>data/raw/images/fulling_mill/1985/1985.15.68.jpg</td>\n",
|
117 |
+
" <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
|
118 |
+
" </tr>\n",
|
119 |
+
" <tr>\n",
|
120 |
+
" <th>1</th>\n",
|
121 |
+
" <td>1985.52.37.ff2.jpg</td>\n",
|
122 |
+
" <td>data/raw/images/fulling_mill/1985</td>\n",
|
123 |
+
" <td>durma.1985.52.37</td>\n",
|
124 |
+
" <td>data/raw/images/fulling_mill/1985/1985.52.37.f...</td>\n",
|
125 |
+
" <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
|
126 |
+
" </tr>\n",
|
127 |
+
" <tr>\n",
|
128 |
+
" <th>2</th>\n",
|
129 |
+
" <td>1985.81.4496 d2.jpg</td>\n",
|
130 |
+
" <td>data/raw/images/fulling_mill/1985</td>\n",
|
131 |
+
" <td>durma.1985.81.4496</td>\n",
|
132 |
+
" <td>data/raw/images/fulling_mill/1985/1985.81.4496...</td>\n",
|
133 |
+
" <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
|
134 |
+
" </tr>\n",
|
135 |
+
" <tr>\n",
|
136 |
+
" <th>3</th>\n",
|
137 |
+
" <td>1985.9.1.1-d4.jpg</td>\n",
|
138 |
+
" <td>data/raw/images/fulling_mill/1985</td>\n",
|
139 |
+
" <td>durma.1985.9.1</td>\n",
|
140 |
+
" <td>data/raw/images/fulling_mill/1985/1985.9.1.1-d...</td>\n",
|
141 |
+
" <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
|
142 |
+
" </tr>\n",
|
143 |
+
" <tr>\n",
|
144 |
+
" <th>4</th>\n",
|
145 |
+
" <td>1985.52.37.sf2.jpg</td>\n",
|
146 |
+
" <td>data/raw/images/fulling_mill/1985</td>\n",
|
147 |
+
" <td>durma.1985.52.37</td>\n",
|
148 |
+
" <td>data/raw/images/fulling_mill/1985/1985.52.37.s...</td>\n",
|
149 |
+
" <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
|
150 |
+
" </tr>\n",
|
151 |
+
" <tr>\n",
|
152 |
+
" <th>...</th>\n",
|
153 |
+
" <td>...</td>\n",
|
154 |
+
" <td>...</td>\n",
|
155 |
+
" <td>...</td>\n",
|
156 |
+
" <td>...</td>\n",
|
157 |
+
" <td>...</td>\n",
|
158 |
+
" </tr>\n",
|
159 |
+
" <tr>\n",
|
160 |
+
" <th>39239</th>\n",
|
161 |
+
" <td>2014.1.2 bb.jpg</td>\n",
|
162 |
+
" <td>data/raw/images/egyptian/2014</td>\n",
|
163 |
+
" <td>durom.2014.1.2</td>\n",
|
164 |
+
" <td>data/raw/images/egyptian/2014/2014.1.2 bb.jpg</td>\n",
|
165 |
+
" <td>data/processed/OM_images_white/egyptian/2014</td>\n",
|
166 |
+
" </tr>\n",
|
167 |
+
" <tr>\n",
|
168 |
+
" <th>39240</th>\n",
|
169 |
+
" <td>2014.1.71 ll.jpg</td>\n",
|
170 |
+
" <td>data/raw/images/egyptian/2014</td>\n",
|
171 |
+
" <td>durom.2014.1.71</td>\n",
|
172 |
+
" <td>data/raw/images/egyptian/2014/2014.1.71 ll.jpg</td>\n",
|
173 |
+
" <td>data/processed/OM_images_white/egyptian/2014</td>\n",
|
174 |
+
" </tr>\n",
|
175 |
+
" <tr>\n",
|
176 |
+
" <th>39241</th>\n",
|
177 |
+
" <td>2014.1.2 rr.jpg</td>\n",
|
178 |
+
" <td>data/raw/images/egyptian/2014</td>\n",
|
179 |
+
" <td>durom.2014.1.2</td>\n",
|
180 |
+
" <td>data/raw/images/egyptian/2014/2014.1.2 rr.jpg</td>\n",
|
181 |
+
" <td>data/processed/OM_images_white/egyptian/2014</td>\n",
|
182 |
+
" </tr>\n",
|
183 |
+
" <tr>\n",
|
184 |
+
" <th>39242</th>\n",
|
185 |
+
" <td>1963.4.jpg</td>\n",
|
186 |
+
" <td>data/raw/images/egyptian/1963</td>\n",
|
187 |
+
" <td>durom.1963.4</td>\n",
|
188 |
+
" <td>data/raw/images/egyptian/1963/1963.4.jpg</td>\n",
|
189 |
+
" <td>data/processed/OM_images_white/egyptian/1963</td>\n",
|
190 |
+
" </tr>\n",
|
191 |
+
" <tr>\n",
|
192 |
+
" <th>39243</th>\n",
|
193 |
+
" <td>1963.4.2.jpg</td>\n",
|
194 |
+
" <td>data/raw/images/egyptian/1963</td>\n",
|
195 |
+
" <td>durom.1963.4</td>\n",
|
196 |
+
" <td>data/raw/images/egyptian/1963/1963.4.2.jpg</td>\n",
|
197 |
+
" <td>data/processed/OM_images_white/egyptian/1963</td>\n",
|
198 |
+
" </tr>\n",
|
199 |
+
" </tbody>\n",
|
200 |
+
"</table>\n",
|
201 |
+
"<p>39244 rows × 5 columns</p>\n",
|
202 |
+
"</div>"
|
203 |
+
],
|
204 |
+
"text/plain": [
|
205 |
+
" file root \\\n",
|
206 |
+
"0 1985.15.68.jpg data/raw/images/fulling_mill/1985 \n",
|
207 |
+
"1 1985.52.37.ff2.jpg data/raw/images/fulling_mill/1985 \n",
|
208 |
+
"2 1985.81.4496 d2.jpg data/raw/images/fulling_mill/1985 \n",
|
209 |
+
"3 1985.9.1.1-d4.jpg data/raw/images/fulling_mill/1985 \n",
|
210 |
+
"4 1985.52.37.sf2.jpg data/raw/images/fulling_mill/1985 \n",
|
211 |
+
"... ... ... \n",
|
212 |
+
"39239 2014.1.2 bb.jpg data/raw/images/egyptian/2014 \n",
|
213 |
+
"39240 2014.1.71 ll.jpg data/raw/images/egyptian/2014 \n",
|
214 |
+
"39241 2014.1.2 rr.jpg data/raw/images/egyptian/2014 \n",
|
215 |
+
"39242 1963.4.jpg data/raw/images/egyptian/1963 \n",
|
216 |
+
"39243 1963.4.2.jpg data/raw/images/egyptian/1963 \n",
|
217 |
+
"\n",
|
218 |
+
" obj_num full_path \\\n",
|
219 |
+
"0 durma.1985.15.68 data/raw/images/fulling_mill/1985/1985.15.68.jpg \n",
|
220 |
+
"1 durma.1985.52.37 data/raw/images/fulling_mill/1985/1985.52.37.f... \n",
|
221 |
+
"2 durma.1985.81.4496 data/raw/images/fulling_mill/1985/1985.81.4496... \n",
|
222 |
+
"3 durma.1985.9.1 data/raw/images/fulling_mill/1985/1985.9.1.1-d... \n",
|
223 |
+
"4 durma.1985.52.37 data/raw/images/fulling_mill/1985/1985.52.37.s... \n",
|
224 |
+
"... ... ... \n",
|
225 |
+
"39239 durom.2014.1.2 data/raw/images/egyptian/2014/2014.1.2 bb.jpg \n",
|
226 |
+
"39240 durom.2014.1.71 data/raw/images/egyptian/2014/2014.1.71 ll.jpg \n",
|
227 |
+
"39241 durom.2014.1.2 data/raw/images/egyptian/2014/2014.1.2 rr.jpg \n",
|
228 |
+
"39242 durom.1963.4 data/raw/images/egyptian/1963/1963.4.jpg \n",
|
229 |
+
"39243 durom.1963.4 data/raw/images/egyptian/1963/1963.4.2.jpg \n",
|
230 |
+
"\n",
|
231 |
+
" new_root \n",
|
232 |
+
"0 data/processed/OM_images_white/fulling_mill/1985 \n",
|
233 |
+
"1 data/processed/OM_images_white/fulling_mill/1985 \n",
|
234 |
+
"2 data/processed/OM_images_white/fulling_mill/1985 \n",
|
235 |
+
"3 data/processed/OM_images_white/fulling_mill/1985 \n",
|
236 |
+
"4 data/processed/OM_images_white/fulling_mill/1985 \n",
|
237 |
+
"... ... \n",
|
238 |
+
"39239 data/processed/OM_images_white/egyptian/2014 \n",
|
239 |
+
"39240 data/processed/OM_images_white/egyptian/2014 \n",
|
240 |
+
"39241 data/processed/OM_images_white/egyptian/2014 \n",
|
241 |
+
"39242 data/processed/OM_images_white/egyptian/1963 \n",
|
242 |
+
"39243 data/processed/OM_images_white/egyptian/1963 \n",
|
243 |
+
"\n",
|
244 |
+
"[39244 rows x 5 columns]"
|
245 |
+
]
|
246 |
+
},
|
247 |
+
"execution_count": 23,
|
248 |
+
"metadata": {},
|
249 |
+
"output_type": "execute_result"
|
250 |
+
}
|
251 |
+
],
|
252 |
+
"source": [
|
253 |
+
"img_df"
|
254 |
+
]
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"cell_type": "code",
|
258 |
+
"execution_count": 26,
|
259 |
+
"metadata": {},
|
260 |
+
"outputs": [
|
261 |
+
{
|
262 |
+
"name": "stderr",
|
263 |
+
"output_type": "stream",
|
264 |
+
"text": [
|
265 |
+
" 0%| | 84/39244 [00:06<52:59, 12.32it/s] \n"
|
266 |
+
]
|
267 |
+
},
|
268 |
+
{
|
269 |
+
"ename": "KeyboardInterrupt",
|
270 |
+
"evalue": "",
|
271 |
+
"output_type": "error",
|
272 |
+
"traceback": [
|
273 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
274 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
275 |
+
"Cell \u001b[0;32mIn[26], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m index, row \u001b[38;5;129;01min\u001b[39;00m tqdm(img_df\u001b[38;5;241m.\u001b[39miterrows(), total\u001b[38;5;241m=\u001b[39mimg_df\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m]):\n\u001b[1;32m 2\u001b[0m img \u001b[38;5;241m=\u001b[39m Image\u001b[38;5;241m.\u001b[39mopen(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m../\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m+\u001b[39mrow[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfull_path\u001b[39m\u001b[38;5;124m'\u001b[39m])\u001b[38;5;241m.\u001b[39mconvert(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mRGB\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;66;03m# read image\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mremover\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mtype\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mwhite\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# change backround with white color\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# make sure the directory exists\u001b[39;00m\n\u001b[1;32m 5\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m../\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m+\u001b[39mrow[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnew_root\u001b[39m\u001b[38;5;124m'\u001b[39m], exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
|
276 |
+
"File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transparent_background/Remover.py:154\u001b[0m, in \u001b[0;36mRemover.process\u001b[0;34m(self, img, type, threshold)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 138\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[1;32m 139\u001b[0m \u001b[38;5;124;03m img (PIL.Image): input image as PIL.Image type\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 151\u001b[0m \n\u001b[1;32m 152\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 153\u001b[0m shape \u001b[38;5;241m=\u001b[39m img\u001b[38;5;241m.\u001b[39msize[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m--> 154\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtransform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 155\u001b[0m x \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39munsqueeze(\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 156\u001b[0m x \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n",
|
277 |
+
"File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/torchvision/transforms/transforms.py:95\u001b[0m, in \u001b[0;36mCompose.__call__\u001b[0;34m(self, img)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, img):\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m t \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransforms:\n\u001b[0;32m---> 95\u001b[0m img \u001b[38;5;241m=\u001b[39m \u001b[43mt\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m img\n",
|
278 |
+
"File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transparent_background/utils.py:105\u001b[0m, in \u001b[0;36mnormalize.__call__\u001b[0;34m(self, img)\u001b[0m\n\u001b[1;32m 103\u001b[0m img \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdiv\n\u001b[1;32m 104\u001b[0m img \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmean\n\u001b[0;32m--> 105\u001b[0m img \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstd\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m img\n",
|
279 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
280 |
+
]
|
281 |
+
}
|
282 |
+
],
|
283 |
+
"source": [
|
284 |
+
"for index, row in tqdm(img_df.iterrows(), total=img_df.shape[0]):\n",
|
285 |
+
" img = Image.open('../' + row['full_path']).convert('RGB') # read image\n",
|
286 |
+
" out = remover.process(img, type='white') # change backround with white color\n",
|
287 |
+
" # make sure the directory exists\n",
|
288 |
+
" os.makedirs('../' + row['new_root'], exist_ok=True)\n",
|
289 |
+
" out.save('../' + row['new_full_path']) # save result"
|
290 |
+
]
|
291 |
+
},
|
292 |
+
{
|
293 |
+
"cell_type": "code",
|
294 |
+
"execution_count": null,
|
295 |
+
"metadata": {},
|
296 |
+
"outputs": [],
|
297 |
+
"source": []
|
298 |
+
}
|
299 |
+
],
|
300 |
+
"metadata": {
|
301 |
+
"kernelspec": {
|
302 |
+
"display_name": "ArtifactClassification",
|
303 |
+
"language": "python",
|
304 |
+
"name": "python3"
|
305 |
+
},
|
306 |
+
"language_info": {
|
307 |
+
"codemirror_mode": {
|
308 |
+
"name": "ipython",
|
309 |
+
"version": 3
|
310 |
+
},
|
311 |
+
"file_extension": ".py",
|
312 |
+
"mimetype": "text/x-python",
|
313 |
+
"name": "python",
|
314 |
+
"nbconvert_exporter": "python",
|
315 |
+
"pygments_lexer": "ipython3",
|
316 |
+
"version": "3.10.12"
|
317 |
+
}
|
318 |
+
},
|
319 |
+
"nbformat": 4,
|
320 |
+
"nbformat_minor": 2
|
321 |
+
}
|
0.7Mahnaz-efficientnet.ipynb
ADDED
@@ -0,0 +1,492 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"# Finetuning efficientNet"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "markdown",
|
12 |
+
"metadata": {},
|
13 |
+
"source": [
|
14 |
+
"Lets try the model that is trending for image classification on HuggingFace: efficientnet_b2.ra_in1k"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"cell_type": "code",
|
19 |
+
"execution_count": 1,
|
20 |
+
"metadata": {},
|
21 |
+
"outputs": [
|
22 |
+
{
|
23 |
+
"data": {
|
24 |
+
"application/vnd.jupyter.widget-view+json": {
|
25 |
+
"model_id": "0a87f155dc5d480c8b68caf0c69f69cd",
|
26 |
+
"version_major": 2,
|
27 |
+
"version_minor": 0
|
28 |
+
},
|
29 |
+
"text/plain": [
|
30 |
+
"Downloading readme: 0%| | 0.00/5.16k [00:00<?, ?B/s]"
|
31 |
+
]
|
32 |
+
},
|
33 |
+
"metadata": {},
|
34 |
+
"output_type": "display_data"
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"data": {
|
38 |
+
"application/vnd.jupyter.widget-view+json": {
|
39 |
+
"model_id": "8b705267629c4028bc48465ab583337b",
|
40 |
+
"version_major": 2,
|
41 |
+
"version_minor": 0
|
42 |
+
},
|
43 |
+
"text/plain": [
|
44 |
+
"Downloading data: 0%| | 0.00/120M [00:00<?, ?B/s]"
|
45 |
+
]
|
46 |
+
},
|
47 |
+
"metadata": {},
|
48 |
+
"output_type": "display_data"
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"data": {
|
52 |
+
"application/vnd.jupyter.widget-view+json": {
|
53 |
+
"model_id": "c825ef39efb24233acabac80abb960fa",
|
54 |
+
"version_major": 2,
|
55 |
+
"version_minor": 0
|
56 |
+
},
|
57 |
+
"text/plain": [
|
58 |
+
"Downloading data: 0%| | 0.00/23.9M [00:00<?, ?B/s]"
|
59 |
+
]
|
60 |
+
},
|
61 |
+
"metadata": {},
|
62 |
+
"output_type": "display_data"
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"data": {
|
66 |
+
"application/vnd.jupyter.widget-view+json": {
|
67 |
+
"model_id": "d41e4e130864414dba3e419eada3941d",
|
68 |
+
"version_major": 2,
|
69 |
+
"version_minor": 0
|
70 |
+
},
|
71 |
+
"text/plain": [
|
72 |
+
"Generating train split: 0%| | 0/50000 [00:00<?, ? examples/s]"
|
73 |
+
]
|
74 |
+
},
|
75 |
+
"metadata": {},
|
76 |
+
"output_type": "display_data"
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"data": {
|
80 |
+
"application/vnd.jupyter.widget-view+json": {
|
81 |
+
"model_id": "7e5cfe4b7b7244beb6a2b19d24fa9c63",
|
82 |
+
"version_major": 2,
|
83 |
+
"version_minor": 0
|
84 |
+
},
|
85 |
+
"text/plain": [
|
86 |
+
"Generating test split: 0%| | 0/10000 [00:00<?, ? examples/s]"
|
87 |
+
]
|
88 |
+
},
|
89 |
+
"metadata": {},
|
90 |
+
"output_type": "display_data"
|
91 |
+
}
|
92 |
+
],
|
93 |
+
"source": [
|
94 |
+
"from datasets import load_dataset\n",
|
95 |
+
"\n",
|
96 |
+
"cifar10dataset = load_dataset(\"cifar10\", split=\"train\")"
|
97 |
+
]
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"cell_type": "code",
|
101 |
+
"execution_count": 2,
|
102 |
+
"metadata": {},
|
103 |
+
"outputs": [],
|
104 |
+
"source": [
|
105 |
+
"cifar10dataset = cifar10dataset.train_test_split(test_size=0.2)"
|
106 |
+
]
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"cell_type": "code",
|
110 |
+
"execution_count": 3,
|
111 |
+
"metadata": {},
|
112 |
+
"outputs": [],
|
113 |
+
"source": [
|
114 |
+
"labels = cifar10dataset[\"train\"].features[\"label\"].names\n",
|
115 |
+
"label2id, id2label = dict(), dict()\n",
|
116 |
+
"for i, label in enumerate(labels):\n",
|
117 |
+
" label2id[label] = str(i)\n",
|
118 |
+
" id2label[str(i)] = label"
|
119 |
+
]
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"cell_type": "code",
|
123 |
+
"execution_count": 4,
|
124 |
+
"metadata": {},
|
125 |
+
"outputs": [],
|
126 |
+
"source": [
|
127 |
+
"from transformers import AutoImageProcessor\n",
|
128 |
+
"\n",
|
129 |
+
"# import timm\n",
|
130 |
+
"# model = timm.create_model(\"hf_hub:timm/efficientnet_b2.ra_in1k\", pretrained=True)\n",
|
131 |
+
"\n",
|
132 |
+
"checkpoint = \"google/efficientnet-b3\"\n",
|
133 |
+
"image_processor = AutoImageProcessor.from_pretrained(checkpoint)"
|
134 |
+
]
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"cell_type": "code",
|
138 |
+
"execution_count": 5,
|
139 |
+
"metadata": {},
|
140 |
+
"outputs": [],
|
141 |
+
"source": [
|
142 |
+
"from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor\n",
|
143 |
+
"\n",
|
144 |
+
"normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)\n",
|
145 |
+
"size = (\n",
|
146 |
+
" image_processor.size[\"shortest_edge\"]\n",
|
147 |
+
" if \"shortest_edge\" in image_processor.size\n",
|
148 |
+
" else (image_processor.size[\"height\"], image_processor.size[\"width\"])\n",
|
149 |
+
")\n",
|
150 |
+
"_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])"
|
151 |
+
]
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"cell_type": "code",
|
155 |
+
"execution_count": 6,
|
156 |
+
"metadata": {},
|
157 |
+
"outputs": [],
|
158 |
+
"source": [
|
159 |
+
"def transforms(examples):\n",
|
160 |
+
" examples[\"pixel_values\"] = [_transforms(img.convert(\"RGB\")) for img in examples[\"img\"]]\n",
|
161 |
+
" del examples[\"img\"]\n",
|
162 |
+
" return examples"
|
163 |
+
]
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"cell_type": "code",
|
167 |
+
"execution_count": 14,
|
168 |
+
"metadata": {},
|
169 |
+
"outputs": [],
|
170 |
+
"source": [
|
171 |
+
"import pandas as pd\n",
|
172 |
+
"import os\n",
|
173 |
+
"\n",
|
174 |
+
"file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n",
|
175 |
+
"file2obj[\"image\"] = file2obj.apply(lambda x: os.path.join(\"..\", x[\"root\"], x[\"file\"]), axis=1)\n",
|
176 |
+
"\n",
|
177 |
+
"# Group by 'obj_num' and count occurrences\n",
|
178 |
+
"obj_num_counts = file2obj[\"obj_num\"].value_counts()\n",
|
179 |
+
"\n",
|
180 |
+
"# Filter rows where 'obj_num' appears more than twice\n",
|
181 |
+
"file2obj_3 = file2obj[file2obj[\"obj_num\"].isin(obj_num_counts[obj_num_counts > 2].index)]"
|
182 |
+
]
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"cell_type": "code",
|
186 |
+
"execution_count": 15,
|
187 |
+
"metadata": {},
|
188 |
+
"outputs": [
|
189 |
+
{
|
190 |
+
"data": {
|
191 |
+
"application/vnd.jupyter.widget-view+json": {
|
192 |
+
"model_id": "352630377c4f42adad3b161fd95e7545",
|
193 |
+
"version_major": 2,
|
194 |
+
"version_minor": 0
|
195 |
+
},
|
196 |
+
"text/plain": [
|
197 |
+
"Casting to class labels: 0%| | 0/25725 [00:00<?, ? examples/s]"
|
198 |
+
]
|
199 |
+
},
|
200 |
+
"metadata": {},
|
201 |
+
"output_type": "display_data"
|
202 |
+
}
|
203 |
+
],
|
204 |
+
"source": [
|
205 |
+
"from datasets import Dataset, Image, DatasetDict\n",
|
206 |
+
"\n",
|
207 |
+
"\n",
|
208 |
+
"ds = Dataset.from_pandas(file2obj_3[[\"image\", \"obj_num\"]], preserve_index=False).cast_column(\n",
|
209 |
+
" \"image\", Image()\n",
|
210 |
+
")\n",
|
211 |
+
"ds = ds.class_encode_column(\"obj_num\")\n",
|
212 |
+
"trainval_test = ds.train_test_split(stratify_by_column=\"obj_num\", test_size=0.16)\n",
|
213 |
+
"train_val = trainval_test[\"train\"].train_test_split(\n",
|
214 |
+
" stratify_by_column=\"obj_num\", test_size=16 / 84\n",
|
215 |
+
")\n",
|
216 |
+
"ds = DatasetDict(\n",
|
217 |
+
" {\"train\": train_val[\"train\"], \"valid\": train_val[\"test\"], \"test\": trainval_test[\"test\"]}\n",
|
218 |
+
")"
|
219 |
+
]
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"cell_type": "code",
|
223 |
+
"execution_count": 17,
|
224 |
+
"metadata": {},
|
225 |
+
"outputs": [],
|
226 |
+
"source": [
|
227 |
+
"cifar10dataset = cifar10dataset.with_transform(transforms)\n",
|
228 |
+
"# cifar10dataset = ds.map(transforms)"
|
229 |
+
]
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"cell_type": "code",
|
233 |
+
"execution_count": null,
|
234 |
+
"metadata": {},
|
235 |
+
"outputs": [],
|
236 |
+
"source": [
|
237 |
+
"from transformers import DefaultDataCollator\n",
|
238 |
+
"\n",
|
239 |
+
"data_collator = DefaultDataCollator()"
|
240 |
+
]
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"cell_type": "code",
|
244 |
+
"execution_count": null,
|
245 |
+
"metadata": {},
|
246 |
+
"outputs": [],
|
247 |
+
"source": [
|
248 |
+
"import evaluate\n",
|
249 |
+
"\n",
|
250 |
+
"accuracy = evaluate.load(\"accuracy\")"
|
251 |
+
]
|
252 |
+
},
|
253 |
+
{
|
254 |
+
"cell_type": "code",
|
255 |
+
"execution_count": null,
|
256 |
+
"metadata": {},
|
257 |
+
"outputs": [],
|
258 |
+
"source": [
|
259 |
+
"import numpy as np\n",
|
260 |
+
"\n",
|
261 |
+
"\n",
|
262 |
+
"def compute_metrics(eval_pred):\n",
|
263 |
+
" predictions, labels = eval_pred\n",
|
264 |
+
" predictions = np.argmax(predictions, axis=1)\n",
|
265 |
+
" return accuracy.compute(predictions=predictions, references=labels)"
|
266 |
+
]
|
267 |
+
},
|
268 |
+
{
|
269 |
+
"cell_type": "code",
|
270 |
+
"execution_count": null,
|
271 |
+
"metadata": {},
|
272 |
+
"outputs": [
|
273 |
+
{
|
274 |
+
"name": "stderr",
|
275 |
+
"output_type": "stream",
|
276 |
+
"text": [
|
277 |
+
"Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:\n",
|
278 |
+
"- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([10, 1536]) in the model instantiated\n",
|
279 |
+
"- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([10]) in the model instantiated\n",
|
280 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
281 |
+
]
|
282 |
+
}
|
283 |
+
],
|
284 |
+
"source": [
|
285 |
+
"from transformers import AutoModelForImageClassification, TrainingArguments, Trainer\n",
|
286 |
+
"\n",
|
287 |
+
"model = AutoModelForImageClassification.from_pretrained(\n",
|
288 |
+
" checkpoint,\n",
|
289 |
+
" num_labels=len(labels),\n",
|
290 |
+
" ignore_mismatched_sizes=True,\n",
|
291 |
+
" id2label=id2label,\n",
|
292 |
+
" label2id=label2id,\n",
|
293 |
+
")"
|
294 |
+
]
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"cell_type": "code",
|
298 |
+
"execution_count": null,
|
299 |
+
"metadata": {},
|
300 |
+
"outputs": [],
|
301 |
+
"source": [
|
302 |
+
"import torch\n",
|
303 |
+
"\n",
|
304 |
+
"torch.cuda.empty_cache()"
|
305 |
+
]
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"cell_type": "code",
|
309 |
+
"execution_count": null,
|
310 |
+
"metadata": {},
|
311 |
+
"outputs": [
|
312 |
+
{
|
313 |
+
"name": "stderr",
|
314 |
+
"output_type": "stream",
|
315 |
+
"text": [
|
316 |
+
"Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
|
317 |
+
"\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mjameswburton18\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
|
318 |
+
]
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"data": {
|
322 |
+
"text/html": [
|
323 |
+
"Tracking run with wandb version 0.16.3"
|
324 |
+
],
|
325 |
+
"text/plain": [
|
326 |
+
"<IPython.core.display.HTML object>"
|
327 |
+
]
|
328 |
+
},
|
329 |
+
"metadata": {},
|
330 |
+
"output_type": "display_data"
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"data": {
|
334 |
+
"text/html": [
|
335 |
+
"Run data is saved locally in <code>/home/james/CodingProjects/ArcPostDoc/ArtifactClassification/notebooks/wandb/run-20240214_115817-fyblqcba</code>"
|
336 |
+
],
|
337 |
+
"text/plain": [
|
338 |
+
"<IPython.core.display.HTML object>"
|
339 |
+
]
|
340 |
+
},
|
341 |
+
"metadata": {},
|
342 |
+
"output_type": "display_data"
|
343 |
+
},
|
344 |
+
{
|
345 |
+
"data": {
|
346 |
+
"text/html": [
|
347 |
+
"Syncing run <strong><a href='https://wandb.ai/jameswburton18/huggingface/runs/fyblqcba' target=\"_blank\">passionate-lovebird-214</a></strong> to <a href='https://wandb.ai/jameswburton18/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
|
348 |
+
],
|
349 |
+
"text/plain": [
|
350 |
+
"<IPython.core.display.HTML object>"
|
351 |
+
]
|
352 |
+
},
|
353 |
+
"metadata": {},
|
354 |
+
"output_type": "display_data"
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"data": {
|
358 |
+
"text/html": [
|
359 |
+
" View project at <a href='https://wandb.ai/jameswburton18/huggingface' target=\"_blank\">https://wandb.ai/jameswburton18/huggingface</a>"
|
360 |
+
],
|
361 |
+
"text/plain": [
|
362 |
+
"<IPython.core.display.HTML object>"
|
363 |
+
]
|
364 |
+
},
|
365 |
+
"metadata": {},
|
366 |
+
"output_type": "display_data"
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"data": {
|
370 |
+
"text/html": [
|
371 |
+
" View run at <a href='https://wandb.ai/jameswburton18/huggingface/runs/fyblqcba' target=\"_blank\">https://wandb.ai/jameswburton18/huggingface/runs/fyblqcba</a>"
|
372 |
+
],
|
373 |
+
"text/plain": [
|
374 |
+
"<IPython.core.display.HTML object>"
|
375 |
+
]
|
376 |
+
},
|
377 |
+
"metadata": {},
|
378 |
+
"output_type": "display_data"
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"data": {
|
382 |
+
"application/vnd.jupyter.widget-view+json": {
|
383 |
+
"model_id": "a8012d9b2c7b47c5aa2533983016d3a2",
|
384 |
+
"version_major": 2,
|
385 |
+
"version_minor": 0
|
386 |
+
},
|
387 |
+
"text/plain": [
|
388 |
+
" 0%| | 0/3750 [00:00<?, ?it/s]"
|
389 |
+
]
|
390 |
+
},
|
391 |
+
"metadata": {},
|
392 |
+
"output_type": "display_data"
|
393 |
+
},
|
394 |
+
{
|
395 |
+
"name": "stdout",
|
396 |
+
"output_type": "stream",
|
397 |
+
"text": [
|
398 |
+
"{'loss': 2.3286, 'learning_rate': 1.3333333333333334e-06, 'epoch': 0.01}\n"
|
399 |
+
]
|
400 |
+
},
|
401 |
+
{
|
402 |
+
"ename": "KeyboardInterrupt",
|
403 |
+
"evalue": "",
|
404 |
+
"output_type": "error",
|
405 |
+
"traceback": [
|
406 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
407 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
408 |
+
"Cell \u001b[0;32mIn[13], line 28\u001b[0m\n\u001b[1;32m 1\u001b[0m training_args \u001b[38;5;241m=\u001b[39m TrainingArguments(\n\u001b[1;32m 2\u001b[0m output_dir\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcifar10_efficientnet\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 3\u001b[0m remove_unused_columns\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 15\u001b[0m push_to_hub\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 16\u001b[0m )\n\u001b[1;32m 18\u001b[0m trainer \u001b[38;5;241m=\u001b[39m Trainer(\n\u001b[1;32m 19\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 20\u001b[0m args\u001b[38;5;241m=\u001b[39mtraining_args,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 25\u001b[0m compute_metrics\u001b[38;5;241m=\u001b[39mcompute_metrics,\n\u001b[1;32m 26\u001b[0m )\n\u001b[0;32m---> 28\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
409 |
+
"File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/trainer.py:1539\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1537\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m 1538\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1539\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1540\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1541\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1542\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1543\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1544\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
410 |
+
"File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/trainer.py:1881\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1879\u001b[0m tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss_step\n\u001b[0;32m-> 1881\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcurrent_flos \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mfloat\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloating_point_ops\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 1883\u001b[0m is_last_step_and_steps_less_than_grad_acc \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1884\u001b[0m steps_in_epoch \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m args\u001b[38;5;241m.\u001b[39mgradient_accumulation_steps \u001b[38;5;129;01mand\u001b[39;00m (step \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m==\u001b[39m steps_in_epoch\n\u001b[1;32m 1885\u001b[0m )\n\u001b[1;32m 1887\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 1888\u001b[0m total_batched_samples \u001b[38;5;241m%\u001b[39m args\u001b[38;5;241m.\u001b[39mgradient_accumulation_steps \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 1889\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1893\u001b[0m \u001b[38;5;66;03m# the `or` condition of `is_last_step_and_steps_less_than_grad_acc` is not covered\u001b[39;00m\n\u001b[1;32m 1894\u001b[0m \u001b[38;5;66;03m# in accelerate. So, explicitly enable sync gradients to True in that case.\u001b[39;00m\n",
|
411 |
+
"File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/trainer.py:3543\u001b[0m, in \u001b[0;36mTrainer.floating_point_ops\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 3530\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 3531\u001b[0m \u001b[38;5;124;03mFor models that inherit from [`PreTrainedModel`], uses that method to compute the number of floating point\u001b[39;00m\n\u001b[1;32m 3532\u001b[0m \u001b[38;5;124;03moperations for every backward + forward pass. If using another model, either implement such a method in the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 3540\u001b[0m \u001b[38;5;124;03m `int`: The number of floating-point operations.\u001b[39;00m\n\u001b[1;32m 3541\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 3542\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfloating_point_ops\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 3543\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloating_point_ops\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3544\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 3545\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n",
|
412 |
+
"File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/modeling_utils.py:1154\u001b[0m, in \u001b[0;36mModuleUtilsMixin.floating_point_ops\u001b[0;34m(self, input_dict, exclude_embeddings)\u001b[0m\n\u001b[1;32m 1130\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfloating_point_ops\u001b[39m(\n\u001b[1;32m 1131\u001b[0m \u001b[38;5;28mself\u001b[39m, input_dict: Dict[\u001b[38;5;28mstr\u001b[39m, Union[torch\u001b[38;5;241m.\u001b[39mTensor, Any]], exclude_embeddings: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 1132\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mint\u001b[39m:\n\u001b[1;32m 1133\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1134\u001b[0m \u001b[38;5;124;03m Get number of (optionally, non-embeddings) floating-point operations for the forward and backward passes of a\u001b[39;00m\n\u001b[1;32m 1135\u001b[0m \u001b[38;5;124;03m batch with this transformer model. Default approximation neglects the quadratic dependency on the number of\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1151\u001b[0m \u001b[38;5;124;03m `int`: The number of floating-point operations.\u001b[39;00m\n\u001b[1;32m 1152\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1154\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m6\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestimate_tokens(input_dict) \u001b[38;5;241m*\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_parameters\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexclude_embeddings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude_embeddings\u001b[49m\u001b[43m)\u001b[49m\n",
|
413 |
+
"File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/modeling_utils.py:1078\u001b[0m, in \u001b[0;36mModuleUtilsMixin.num_parameters\u001b[0;34m(self, only_trainable, exclude_embeddings)\u001b[0m\n\u001b[1;32m 1063\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1064\u001b[0m \u001b[38;5;124;03mGet number of (optionally, trainable or non-embeddings) parameters in the module.\u001b[39;00m\n\u001b[1;32m 1065\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1074\u001b[0m \u001b[38;5;124;03m `int`: The number of parameters.\u001b[39;00m\n\u001b[1;32m 1075\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1077\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exclude_embeddings:\n\u001b[0;32m-> 1078\u001b[0m embedding_param_names \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 1079\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.weight\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name, module_type \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_modules() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(module_type, nn\u001b[38;5;241m.\u001b[39mEmbedding)\n\u001b[1;32m 1080\u001b[0m ]\n\u001b[1;32m 1081\u001b[0m total_parameters \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 1082\u001b[0m parameter \u001b[38;5;28;01mfor\u001b[39;00m name, parameter \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_parameters() \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m embedding_param_names\n\u001b[1;32m 1083\u001b[0m ]\n\u001b[1;32m 1084\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
|
414 |
+
"File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/modeling_utils.py:1079\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 1063\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1064\u001b[0m \u001b[38;5;124;03mGet number of (optionally, trainable or non-embeddings) parameters in the module.\u001b[39;00m\n\u001b[1;32m 1065\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1074\u001b[0m \u001b[38;5;124;03m `int`: The number of parameters.\u001b[39;00m\n\u001b[1;32m 1075\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1077\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exclude_embeddings:\n\u001b[1;32m 1078\u001b[0m embedding_param_names \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m-> 1079\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.weight\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name, module_type \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_modules() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmodule_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mEmbedding\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1080\u001b[0m ]\n\u001b[1;32m 1081\u001b[0m total_parameters \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 1082\u001b[0m parameter \u001b[38;5;28;01mfor\u001b[39;00m name, parameter \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_parameters() \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m embedding_param_names\n\u001b[1;32m 1083\u001b[0m ]\n\u001b[1;32m 1084\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
|
415 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
416 |
+
]
|
417 |
+
}
|
418 |
+
],
|
419 |
+
"source": [
|
420 |
+
"training_args = TrainingArguments(\n",
|
421 |
+
" output_dir=\"cifar10_efficientnet\",\n",
|
422 |
+
" remove_unused_columns=False,\n",
|
423 |
+
" evaluation_strategy=\"epoch\",\n",
|
424 |
+
" save_strategy=\"epoch\",\n",
|
425 |
+
" learning_rate=5e-5,\n",
|
426 |
+
" per_device_train_batch_size=8, # memory error with 16\n",
|
427 |
+
" gradient_accumulation_steps=4,\n",
|
428 |
+
" per_device_eval_batch_size=8,\n",
|
429 |
+
" num_train_epochs=3,\n",
|
430 |
+
" warmup_ratio=0.1,\n",
|
431 |
+
" logging_steps=10,\n",
|
432 |
+
" load_best_model_at_end=True,\n",
|
433 |
+
" metric_for_best_model=\"accuracy\",\n",
|
434 |
+
" push_to_hub=False,\n",
|
435 |
+
")\n",
|
436 |
+
"\n",
|
437 |
+
"trainer = Trainer(\n",
|
438 |
+
" model=model,\n",
|
439 |
+
" args=training_args,\n",
|
440 |
+
" data_collator=data_collator,\n",
|
441 |
+
" train_dataset=cifar10dataset[\"train\"],\n",
|
442 |
+
" eval_dataset=cifar10dataset[\"test\"],\n",
|
443 |
+
" tokenizer=image_processor,\n",
|
444 |
+
" compute_metrics=compute_metrics,\n",
|
445 |
+
")\n",
|
446 |
+
"\n",
|
447 |
+
"trainer.train()"
|
448 |
+
]
|
449 |
+
},
|
450 |
+
{
|
451 |
+
"cell_type": "markdown",
|
452 |
+
"metadata": {},
|
453 |
+
"source": [
|
454 |
+
"### Evaluation"
|
455 |
+
]
|
456 |
+
},
|
457 |
+
{
|
458 |
+
"cell_type": "code",
|
459 |
+
"execution_count": null,
|
460 |
+
"metadata": {},
|
461 |
+
"outputs": [],
|
462 |
+
"source": [
|
463 |
+
"results = trainer.evaluate()\n",
|
464 |
+
"print(results)\n",
|
465 |
+
"\n",
|
466 |
+
"test_results = trainer.predict("
|
467 |
+
]
|
468 |
+
}
|
469 |
+
],
|
470 |
+
"metadata": {
|
471 |
+
"kernelspec": {
|
472 |
+
"display_name": "venv_bloom-classifier",
|
473 |
+
"language": "python",
|
474 |
+
"name": "python3"
|
475 |
+
},
|
476 |
+
"language_info": {
|
477 |
+
"codemirror_mode": {
|
478 |
+
"name": "ipython",
|
479 |
+
"version": 3
|
480 |
+
},
|
481 |
+
"file_extension": ".py",
|
482 |
+
"mimetype": "text/x-python",
|
483 |
+
"name": "python",
|
484 |
+
"nbconvert_exporter": "python",
|
485 |
+
"pygments_lexer": "ipython3",
|
486 |
+
"version": "3.10.12"
|
487 |
+
},
|
488 |
+
"orig_nbformat": 4
|
489 |
+
},
|
490 |
+
"nbformat": 4,
|
491 |
+
"nbformat_minor": 2
|
492 |
+
}
|
0.8-testing_segmented_data.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
0.9-testing_om_datasets.ipynb
ADDED
@@ -0,0 +1,459 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import logging\n",
|
10 |
+
"import os\n",
|
11 |
+
"from pathlib import Path\n",
|
12 |
+
"import click\n",
|
13 |
+
"from dotenv import find_dotenv, load_dotenv\n",
|
14 |
+
"\n",
|
15 |
+
"from datasets import load_dataset, ClassLabel\n",
|
16 |
+
"import numpy as np\n",
|
17 |
+
"import wandb\n",
|
18 |
+
"import yaml\n",
|
19 |
+
"from transformers.trainer_callback import EarlyStoppingCallback\n",
|
20 |
+
"from artifact_classification.utils import ConfigLoader\n",
|
21 |
+
"from torchvision.transforms import (\n",
|
22 |
+
" Compose,\n",
|
23 |
+
" Normalize,\n",
|
24 |
+
" ToTensor,\n",
|
25 |
+
" CenterCrop,\n",
|
26 |
+
" Resize,\n",
|
27 |
+
")\n",
|
28 |
+
"from transformers import (\n",
|
29 |
+
" AutoImageProcessor,\n",
|
30 |
+
" AutoModelForImageClassification,\n",
|
31 |
+
" TrainingArguments,\n",
|
32 |
+
" Trainer,\n",
|
33 |
+
" DefaultDataCollator,\n",
|
34 |
+
" AutoModelForSequenceClassification,\n",
|
35 |
+
" DataCollatorWithPadding,\n",
|
36 |
+
" AutoTokenizer,\n",
|
37 |
+
")\n",
|
38 |
+
"from sklearn.metrics import top_k_accuracy_score\n",
|
39 |
+
"import evaluate"
|
40 |
+
]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"cell_type": "code",
|
44 |
+
"execution_count": 2,
|
45 |
+
"metadata": {},
|
46 |
+
"outputs": [
|
47 |
+
{
|
48 |
+
"name": "stdout",
|
49 |
+
"output_type": "stream",
|
50 |
+
"text": [
|
51 |
+
"Updating with:\n",
|
52 |
+
"{'config': 'om3txt_name', 'dataset': 'james-burton/OrientalMuseum_min3-name-text', 'wandb_proj_name': 'OrientalMuesumText', 'model_base': 'microsoft/deberta-v3-base', 'problem_type': 'text'}\n",
|
53 |
+
"\n",
|
54 |
+
"\n",
|
55 |
+
"{'config': 'om3txt_name', 'fast_dev_run': False, 'do_train': True, 'do_predict': True, 'batch_size': 16, 'model_base': 'microsoft/deberta-v3-base', 'output_root': 'models/', 'num_epochs': 100, 'early_stopping_patience': 5, 'grad_accumulation_steps': 1, 'seed': 42, 'logging_steps': 10, 'lr_scheduler': 'linear', 'warmup_ratio': 0, 'weight_decay': 0, 'device': 'cuda', 'num_workers': 1, 'resume_from_checkpoint': False, 'predict_batch_size': 16, 'save_total_limit': 1, 'lr': 5e-05, 'pytorch2_0': False, 'max_length': 512, 'text_column': 'description', 'fp16': True, 'dataset': 'james-burton/OrientalMuseum_min3-name-text', 'wandb_proj_name': 'OrientalMuesumText', 'problem_type': 'text'}\n",
|
56 |
+
"\n"
|
57 |
+
]
|
58 |
+
}
|
59 |
+
],
|
60 |
+
"source": [
|
61 |
+
"config = \"om3txt_name\"\n",
|
62 |
+
"\n",
|
63 |
+
"# Training args\n",
|
64 |
+
"args = ConfigLoader(config, \"../configs/train_configs.yaml\", \"../configs/train_default.yaml\")\n",
|
65 |
+
"\n",
|
66 |
+
"# # Load dataset, filter out na inputs and labels and encode labels (as label column can change)\n",
|
67 |
+
"# dataset = load_dataset(args.dataset) # , download_mode=\"force_redownload\")\n",
|
68 |
+
"# dataset = dataset.filter(lambda example: example[args.label_column] is not None)\n",
|
69 |
+
"# if args.problem_type == \"text\":\n",
|
70 |
+
"# dataset = dataset.filter(lambda example: example[args.text_column] is not None)\n",
|
71 |
+
"# dataset = dataset.rename_column(args.label_column, \"label\")\n",
|
72 |
+
"# if not isinstance(dataset[\"train\"].features[\"label\"], ClassLabel):\n",
|
73 |
+
"# dataset = dataset.class_encode_column(\"label\")"
|
74 |
+
]
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"cell_type": "code",
|
78 |
+
"execution_count": 2,
|
79 |
+
"metadata": {},
|
80 |
+
"outputs": [
|
81 |
+
{
|
82 |
+
"name": "stdout",
|
83 |
+
"output_type": "stream",
|
84 |
+
"text": [
|
85 |
+
"testing om3_num om3_material om3_name om3txt_material om3txt_name om3-white_num om3-white_material om3-white_name om3-3Dwhite_num om3-3Dwhite_material om3-3Dwhite_name om3-3Dwhite-1frame_num om3-3Dwhite-1frame_material om3-3Dwhite-1frame_name om4_num om4_material om4_name om4txt_material om4txt_name om4-white_num om4-white_material om4-white_name om4-3Dwhite_num om4-3Dwhite_material om4-3Dwhite_name om4-3Dwhite-1frame_num om4-3Dwhite-1frame_material om4-3Dwhite-1frame_name om5_num om5_material om5_name om5txt_material om5txt_name om5-white_num om5-white_material om5-white_name om5-3Dwhite_num om5-3Dwhite_material om5-3Dwhite_name om5-3Dwhite-1frame_num om5-3Dwhite-1frame_material om5-3Dwhite-1frame_name om6_num om6_material om6_name om6txt_material om6txt_name om6-white_num om6-white_material om6-white_name om6-3Dwhite_num om6-3Dwhite_material om6-3Dwhite_name om6-3Dwhite-1frame_num om6-3Dwhite-1frame_material om6-3Dwhite-1frame_name om3-3DwhiteTVT_num om3-3DwhiteTVT_material om3-3DwhiteTVT_name\n"
|
86 |
+
]
|
87 |
+
}
|
88 |
+
],
|
89 |
+
"source": [
|
90 |
+
"import yaml\n",
|
91 |
+
"\n",
|
92 |
+
"with open(\"../configs/train_configs.yaml\", \"r\") as file:\n",
|
93 |
+
" configs = list(yaml.safe_load_all(file))\n",
|
94 |
+
"\n",
|
95 |
+
"config_names = \" \".join([cfg[\"config\"] for cfg in configs])\n",
|
96 |
+
"print(config_names)"
|
97 |
+
]
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"cell_type": "code",
|
101 |
+
"execution_count": 4,
|
102 |
+
"metadata": {},
|
103 |
+
"outputs": [
|
104 |
+
{
|
105 |
+
"data": {
|
106 |
+
"text/plain": [
|
107 |
+
"'testing om3_material om3_name om3-white_material om3-white_name om3-3Dwhite_material om3-3Dwhite_name om3-3Dwhite-1frame_material om3-3Dwhite-1frame_name om4_material om4_name om4-white_material om4-white_name om4-3Dwhite_material om4-3Dwhite_name om4-3Dwhite-1frame_material om4-3Dwhite-1frame_name om5_material om5_name om5-white_material om5-white_name om5-3Dwhite_material om5-3Dwhite_name om5-3Dwhite-1frame_material om5-3Dwhite-1frame_name om6_material om6_name om6-white_material om6-white_name om6-3Dwhite_material om6-3Dwhite_name om6-3Dwhite-1frame_material om6-3Dwhite-1frame_name om3-3DwhiteTVT_material om3-3DwhiteTVT_name'"
|
108 |
+
]
|
109 |
+
},
|
110 |
+
"execution_count": 4,
|
111 |
+
"metadata": {},
|
112 |
+
"output_type": "execute_result"
|
113 |
+
}
|
114 |
+
],
|
115 |
+
"source": [
|
116 |
+
"\" \".join(\n",
|
117 |
+
" [cfg[\"config\"] for cfg in configs if not (\"txt\" in cfg[\"config\"] or \"num\" in cfg[\"config\"])]\n",
|
118 |
+
")\n",
|
119 |
+
"# \" \".join([cfg[\"config\"] for cfg in configs if \"1frame\" in cfg[\"config\"]])"
|
120 |
+
]
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"cell_type": "code",
|
124 |
+
"execution_count": 37,
|
125 |
+
"metadata": {},
|
126 |
+
"outputs": [],
|
127 |
+
"source": [
|
128 |
+
"l2i = {\n",
|
129 |
+
" \"Album Painting\": 0,\n",
|
130 |
+
" \"Animal Figurine\": 1,\n",
|
131 |
+
" \"Animal Mummy\": 2,\n",
|
132 |
+
" \"Animal bone\": 3,\n",
|
133 |
+
" \"Belt Hook\": 4,\n",
|
134 |
+
" \"Blouse\": 5,\n",
|
135 |
+
" \"Bolt\": 6,\n",
|
136 |
+
" \"Box\": 7,\n",
|
137 |
+
" \"Brush Pot\": 8,\n",
|
138 |
+
" \"Cap\": 9,\n",
|
139 |
+
" \"Case\": 10,\n",
|
140 |
+
" \"Clay pipe (smoking)\": 11,\n",
|
141 |
+
" \"Cosmetic and Medical Equipment and Implements\": 12,\n",
|
142 |
+
" \"Cup And Saucer\": 13,\n",
|
143 |
+
" \"DVDs\": 14,\n",
|
144 |
+
" \"Dagger\": 15,\n",
|
145 |
+
" \"Disc\": 16,\n",
|
146 |
+
" \"Domestic Equipment and Utensils\": 17,\n",
|
147 |
+
" \"Earring\": 18,\n",
|
148 |
+
" \"Finger Ring\": 19,\n",
|
149 |
+
" \"Funerary Cone\": 20,\n",
|
150 |
+
" \"Funerary goods\": 21,\n",
|
151 |
+
" \"Funerary money\": 22,\n",
|
152 |
+
" \"Hanging\": 23,\n",
|
153 |
+
" \"Heart Scarab\": 24,\n",
|
154 |
+
" \"Human Figurine\": 25,\n",
|
155 |
+
" \"Inkstick\": 26,\n",
|
156 |
+
" \"Kite\": 27,\n",
|
157 |
+
" \"Kohl Pot\": 28,\n",
|
158 |
+
" \"Letter\": 29,\n",
|
159 |
+
" \"Manuscript Page\": 30,\n",
|
160 |
+
" \"Mat\": 31,\n",
|
161 |
+
" \"Mica Painting\": 32,\n",
|
162 |
+
" \"Miniature Painting\": 33,\n",
|
163 |
+
" \"Mortar\": 34,\n",
|
164 |
+
" \"Mummy Label\": 35,\n",
|
165 |
+
" \"Oracle Bone\": 36,\n",
|
166 |
+
" \"Ostraka\": 37,\n",
|
167 |
+
" \"Palette\": 38,\n",
|
168 |
+
" \"Panel\": 39,\n",
|
169 |
+
" \"Part\": 40,\n",
|
170 |
+
" \"Pendant\": 41,\n",
|
171 |
+
" \"Pipe\": 42,\n",
|
172 |
+
" \"Pith Painting\": 43,\n",
|
173 |
+
" \"Plaque\": 44,\n",
|
174 |
+
" \"Plate\": 45,\n",
|
175 |
+
" \"Scarab Seal\": 46,\n",
|
176 |
+
" \"Scarf\": 47,\n",
|
177 |
+
" \"Screen\": 48,\n",
|
178 |
+
" \"Seal\": 49,\n",
|
179 |
+
" \"Slide\": 50,\n",
|
180 |
+
" \"Stand\": 51,\n",
|
181 |
+
" \"Thangka\": 52,\n",
|
182 |
+
" \"Water Dropper\": 53,\n",
|
183 |
+
" \"Water Pot\": 54,\n",
|
184 |
+
" \"Woodblock Print\": 55,\n",
|
185 |
+
" \"accessories\": 56,\n",
|
186 |
+
" \"albums\": 57,\n",
|
187 |
+
" \"amulets\": 58,\n",
|
188 |
+
" \"animation cels\": 59,\n",
|
189 |
+
" \"animation drawings\": 60,\n",
|
190 |
+
" \"armor\": 61,\n",
|
191 |
+
" \"arrowheads\": 62,\n",
|
192 |
+
" \"axes: woodworking tools\": 63,\n",
|
193 |
+
" \"badges\": 64,\n",
|
194 |
+
" \"bags\": 65,\n",
|
195 |
+
" \"bandages\": 66,\n",
|
196 |
+
" \"baskets\": 67,\n",
|
197 |
+
" \"beads\": 68,\n",
|
198 |
+
" \"bells\": 69,\n",
|
199 |
+
" \"belts\": 70,\n",
|
200 |
+
" \"blades\": 71,\n",
|
201 |
+
" \"books\": 72,\n",
|
202 |
+
" \"bottles\": 73,\n",
|
203 |
+
" \"bowls\": 74,\n",
|
204 |
+
" \"boxes\": 75,\n",
|
205 |
+
" \"bracelets\": 76,\n",
|
206 |
+
" \"brick\": 77,\n",
|
207 |
+
" \"brooches\": 78,\n",
|
208 |
+
" \"brush washers\": 79,\n",
|
209 |
+
" \"buckets\": 80,\n",
|
210 |
+
" \"buckles\": 81,\n",
|
211 |
+
" \"calligraphy\": 82,\n",
|
212 |
+
" \"canopic jars\": 83,\n",
|
213 |
+
" \"cards\": 84,\n",
|
214 |
+
" \"carvings\": 85,\n",
|
215 |
+
" \"chains\": 86,\n",
|
216 |
+
" \"chessmen\": 87,\n",
|
217 |
+
" \"chopsticks\": 88,\n",
|
218 |
+
" \"claypipe\": 89,\n",
|
219 |
+
" \"cloth\": 90,\n",
|
220 |
+
" \"clothing\": 91,\n",
|
221 |
+
" \"coats\": 92,\n",
|
222 |
+
" \"coins\": 93,\n",
|
223 |
+
" \"collar\": 94,\n",
|
224 |
+
" \"compact discs\": 95,\n",
|
225 |
+
" \"containers\": 96,\n",
|
226 |
+
" \"coverings\": 97,\n",
|
227 |
+
" \"covers\": 98,\n",
|
228 |
+
" \"cups\": 99,\n",
|
229 |
+
" \"deity figurine\": 100,\n",
|
230 |
+
" \"diagrams\": 101,\n",
|
231 |
+
" \"dishes\": 102,\n",
|
232 |
+
" \"dolls\": 103,\n",
|
233 |
+
" \"drawings\": 104,\n",
|
234 |
+
" \"dresses\": 105,\n",
|
235 |
+
" \"drums\": 106,\n",
|
236 |
+
" \"earrings\": 107,\n",
|
237 |
+
" \"embroidery\": 108,\n",
|
238 |
+
" \"ensembles\": 109,\n",
|
239 |
+
" \"envelopes\": 110,\n",
|
240 |
+
" \"equipment for personal use: grooming, hygiene and health care\": 111,\n",
|
241 |
+
" \"ewers\": 112,\n",
|
242 |
+
" \"fans\": 113,\n",
|
243 |
+
" \"figures\": 114,\n",
|
244 |
+
" \"figurines\": 115,\n",
|
245 |
+
" \"flags\": 116,\n",
|
246 |
+
" \"flasks\": 117,\n",
|
247 |
+
" \"furniture components\": 118,\n",
|
248 |
+
" \"gaming counters\": 119,\n",
|
249 |
+
" \"glassware\": 120,\n",
|
250 |
+
" \"hairpins\": 121,\n",
|
251 |
+
" \"handles\": 122,\n",
|
252 |
+
" \"harnesses\": 123,\n",
|
253 |
+
" \"hats\": 124,\n",
|
254 |
+
" \"headdresses\": 125,\n",
|
255 |
+
" \"heads\": 126,\n",
|
256 |
+
" \"incense burners\": 127,\n",
|
257 |
+
" \"inlays\": 128,\n",
|
258 |
+
" \"jackets\": 129,\n",
|
259 |
+
" \"jars\": 130,\n",
|
260 |
+
" \"jewelry\": 131,\n",
|
261 |
+
" \"juglets\": 132,\n",
|
262 |
+
" \"jugs\": 133,\n",
|
263 |
+
" \"keys\": 134,\n",
|
264 |
+
" \"kimonos\": 135,\n",
|
265 |
+
" \"knives\": 136,\n",
|
266 |
+
" \"lamps\": 137,\n",
|
267 |
+
" \"lanterns\": 138,\n",
|
268 |
+
" \"lids\": 139,\n",
|
269 |
+
" \"maces\": 140,\n",
|
270 |
+
" \"masks\": 141,\n",
|
271 |
+
" \"medals\": 142,\n",
|
272 |
+
" \"mirrors\": 143,\n",
|
273 |
+
" \"models\": 144,\n",
|
274 |
+
" \"mounts\": 145,\n",
|
275 |
+
" \"nails\": 146,\n",
|
276 |
+
" \"necklaces\": 147,\n",
|
277 |
+
" \"needles\": 148,\n",
|
278 |
+
" \"netsukes\": 149,\n",
|
279 |
+
" \"ornaments\": 150,\n",
|
280 |
+
" \"pages\": 151,\n",
|
281 |
+
" \"paintings\": 152,\n",
|
282 |
+
" \"paper money\": 153,\n",
|
283 |
+
" \"pendants\": 154,\n",
|
284 |
+
" \"petticoats\": 155,\n",
|
285 |
+
" \"photographs\": 156,\n",
|
286 |
+
" \"pictures\": 157,\n",
|
287 |
+
" \"pins\": 158,\n",
|
288 |
+
" \"playing cards\": 159,\n",
|
289 |
+
" \"poker\": 160,\n",
|
290 |
+
" \"postage stamps\": 161,\n",
|
291 |
+
" \"postcards\": 162,\n",
|
292 |
+
" \"posters\": 163,\n",
|
293 |
+
" \"pots\": 164,\n",
|
294 |
+
" \"pottery\": 165,\n",
|
295 |
+
" \"prints\": 166,\n",
|
296 |
+
" \"puppets\": 167,\n",
|
297 |
+
" \"purses\": 168,\n",
|
298 |
+
" \"reliefs\": 169,\n",
|
299 |
+
" \"rings\": 170,\n",
|
300 |
+
" \"robes\": 171,\n",
|
301 |
+
" \"rubbings\": 172,\n",
|
302 |
+
" \"rugs\": 173,\n",
|
303 |
+
" \"sandals\": 174,\n",
|
304 |
+
" \"saris\": 175,\n",
|
305 |
+
" \"sarongs\": 176,\n",
|
306 |
+
" \"scabbards\": 177,\n",
|
307 |
+
" \"scaraboids\": 178,\n",
|
308 |
+
" \"scarabs\": 179,\n",
|
309 |
+
" \"scrolls\": 180,\n",
|
310 |
+
" \"seed\": 181,\n",
|
311 |
+
" \"seppa\": 182,\n",
|
312 |
+
" \"shadow puppets\": 183,\n",
|
313 |
+
" \"shawls\": 184,\n",
|
314 |
+
" \"shell\": 185,\n",
|
315 |
+
" \"sherds\": 186,\n",
|
316 |
+
" \"shields\": 187,\n",
|
317 |
+
" \"shoes\": 188,\n",
|
318 |
+
" \"sketches\": 189,\n",
|
319 |
+
" \"skirts\": 190,\n",
|
320 |
+
" \"snuff bottles\": 191,\n",
|
321 |
+
" \"socks\": 192,\n",
|
322 |
+
" \"spatulas\": 193,\n",
|
323 |
+
" \"spoons\": 194,\n",
|
324 |
+
" \"statues\": 195,\n",
|
325 |
+
" \"statuettes\": 196,\n",
|
326 |
+
" \"stelae\": 197,\n",
|
327 |
+
" \"straps\": 198,\n",
|
328 |
+
" \"studs\": 199,\n",
|
329 |
+
" \"swords\": 200,\n",
|
330 |
+
" \"tablets\": 201,\n",
|
331 |
+
" \"tacks\": 202,\n",
|
332 |
+
" \"tea bowls\": 203,\n",
|
333 |
+
" \"teapots\": 204,\n",
|
334 |
+
" \"tiles\": 205,\n",
|
335 |
+
" \"tools\": 206,\n",
|
336 |
+
" \"toys\": 207,\n",
|
337 |
+
" \"trays\": 208,\n",
|
338 |
+
" \"tubes\": 209,\n",
|
339 |
+
" \"tweezers\": 210,\n",
|
340 |
+
" \"underwear\": 211,\n",
|
341 |
+
" \"unidentified\": 212,\n",
|
342 |
+
" \"ushabti\": 213,\n",
|
343 |
+
" \"utensils\": 214,\n",
|
344 |
+
" \"vases\": 215,\n",
|
345 |
+
" \"vessels\": 216,\n",
|
346 |
+
" \"weight\": 217,\n",
|
347 |
+
" \"weights\": 218,\n",
|
348 |
+
" \"whorls\": 219,\n",
|
349 |
+
" \"wood blocks\": 220,\n",
|
350 |
+
"}"
|
351 |
+
]
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"cell_type": "code",
|
355 |
+
"execution_count": 38,
|
356 |
+
"metadata": {},
|
357 |
+
"outputs": [],
|
358 |
+
"source": [
|
359 |
+
"import json"
|
360 |
+
]
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"cell_type": "code",
|
364 |
+
"execution_count": 39,
|
365 |
+
"metadata": {},
|
366 |
+
"outputs": [],
|
367 |
+
"source": [
|
368 |
+
"# json dump\n",
|
369 |
+
"with open(\"l2i.json\", \"w\") as f:\n",
|
370 |
+
" json.dump({str(v): k for k, v in l2i.items()}, f)\n",
|
371 |
+
"# {str(v): k for k, v in l2i.items()}"
|
372 |
+
]
|
373 |
+
},
|
374 |
+
{
|
375 |
+
"cell_type": "code",
|
376 |
+
"execution_count": 7,
|
377 |
+
"metadata": {},
|
378 |
+
"outputs": [],
|
379 |
+
"source": [
|
380 |
+
"from transformers import AutoConfig"
|
381 |
+
]
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"cell_type": "code",
|
385 |
+
"execution_count": 45,
|
386 |
+
"metadata": {},
|
387 |
+
"outputs": [
|
388 |
+
{
|
389 |
+
"data": {
|
390 |
+
"application/vnd.jupyter.widget-view+json": {
|
391 |
+
"model_id": "005c080fdcf141acaa30ba191a8c8f3c",
|
392 |
+
"version_major": 2,
|
393 |
+
"version_minor": 0
|
394 |
+
},
|
395 |
+
"text/plain": [
|
396 |
+
"config.json: 0%| | 0.00/10.9k [00:00<?, ?B/s]"
|
397 |
+
]
|
398 |
+
},
|
399 |
+
"metadata": {},
|
400 |
+
"output_type": "display_data"
|
401 |
+
}
|
402 |
+
],
|
403 |
+
"source": [
|
404 |
+
"config = AutoConfig.from_pretrained(\"james-burton/om6txt_name\")"
|
405 |
+
]
|
406 |
+
},
|
407 |
+
{
|
408 |
+
"cell_type": "code",
|
409 |
+
"execution_count": 46,
|
410 |
+
"metadata": {},
|
411 |
+
"outputs": [
|
412 |
+
{
|
413 |
+
"name": "stderr",
|
414 |
+
"output_type": "stream",
|
415 |
+
"text": [
|
416 |
+
"/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)\n",
|
417 |
+
"Failed to load module: /home/james/snap/code/common/.cache/gio-modules/libgiolibproxy.so\n",
|
418 |
+
"eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE\n"
|
419 |
+
]
|
420 |
+
}
|
421 |
+
],
|
422 |
+
"source": [
|
423 |
+
"from PIL import Image\n",
|
424 |
+
"\n",
|
425 |
+
"image_path = \"../data/processed/OM_3Dimages_white/egyptian/1951/1951.42-tt_2.png\"\n",
|
426 |
+
"image = Image.open(image_path)\n",
|
427 |
+
"image.show()"
|
428 |
+
]
|
429 |
+
},
|
430 |
+
{
|
431 |
+
"cell_type": "code",
|
432 |
+
"execution_count": null,
|
433 |
+
"metadata": {},
|
434 |
+
"outputs": [],
|
435 |
+
"source": []
|
436 |
+
}
|
437 |
+
],
|
438 |
+
"metadata": {
|
439 |
+
"kernelspec": {
|
440 |
+
"display_name": "ArtifactClassification",
|
441 |
+
"language": "python",
|
442 |
+
"name": "python3"
|
443 |
+
},
|
444 |
+
"language_info": {
|
445 |
+
"codemirror_mode": {
|
446 |
+
"name": "ipython",
|
447 |
+
"version": 3
|
448 |
+
},
|
449 |
+
"file_extension": ".py",
|
450 |
+
"mimetype": "text/x-python",
|
451 |
+
"name": "python",
|
452 |
+
"nbconvert_exporter": "python",
|
453 |
+
"pygments_lexer": "ipython3",
|
454 |
+
"version": "3.10.12"
|
455 |
+
}
|
456 |
+
},
|
457 |
+
"nbformat": 4,
|
458 |
+
"nbformat_minor": 2
|
459 |
+
}
|
1.0-checking_dataset_size.ipynb
ADDED
@@ -0,0 +1,559 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 3,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd\n",
|
10 |
+
"import os\n",
|
11 |
+
"import re"
|
12 |
+
]
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"cell_type": "code",
|
16 |
+
"execution_count": 4,
|
17 |
+
"metadata": {},
|
18 |
+
"outputs": [],
|
19 |
+
"source": [
|
20 |
+
"# Loaded variable 'df' from URI: /home/james/CodingProjects/ArcPostDoc/HeDAP-imagesearch/Durham_University_Museums_data (1).xlsx\n",
|
21 |
+
"df = pd.read_excel(\n",
|
22 |
+
" r\"/home/james/CodingProjects/ArcPostDoc/HeDAP-imagesearch/Durham_University_Museums_data (1).xlsx\"\n",
|
23 |
+
")"
|
24 |
+
]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"cell_type": "code",
|
28 |
+
"execution_count": 5,
|
29 |
+
"metadata": {},
|
30 |
+
"outputs": [
|
31 |
+
{
|
32 |
+
"data": {
|
33 |
+
"text/html": [
|
34 |
+
"<div>\n",
|
35 |
+
"<style scoped>\n",
|
36 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
37 |
+
" vertical-align: middle;\n",
|
38 |
+
" }\n",
|
39 |
+
"\n",
|
40 |
+
" .dataframe tbody tr th {\n",
|
41 |
+
" vertical-align: top;\n",
|
42 |
+
" }\n",
|
43 |
+
"\n",
|
44 |
+
" .dataframe thead th {\n",
|
45 |
+
" text-align: right;\n",
|
46 |
+
" }\n",
|
47 |
+
"</style>\n",
|
48 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
49 |
+
" <thead>\n",
|
50 |
+
" <tr style=\"text-align: right;\">\n",
|
51 |
+
" <th></th>\n",
|
52 |
+
" <th>object_number</th>\n",
|
53 |
+
" <th>object_name</th>\n",
|
54 |
+
" <th>other_name</th>\n",
|
55 |
+
" <th>reproduction.reference</th>\n",
|
56 |
+
" <th>description</th>\n",
|
57 |
+
" <th>label.text</th>\n",
|
58 |
+
" <th>material</th>\n",
|
59 |
+
" <th>technique</th>\n",
|
60 |
+
" <th>physical_description</th>\n",
|
61 |
+
" <th>number_of_parts</th>\n",
|
62 |
+
" <th>...</th>\n",
|
63 |
+
" <th>Unnamed: 25</th>\n",
|
64 |
+
" <th>Unnamed: 26</th>\n",
|
65 |
+
" <th>Unnamed: 27</th>\n",
|
66 |
+
" <th>Unnamed: 28</th>\n",
|
67 |
+
" <th>Unnamed: 29</th>\n",
|
68 |
+
" <th>Unnamed: 30</th>\n",
|
69 |
+
" <th>Unnamed: 31</th>\n",
|
70 |
+
" <th>Unnamed: 32</th>\n",
|
71 |
+
" <th>Unnamed: 33</th>\n",
|
72 |
+
" <th>Unnamed: 34</th>\n",
|
73 |
+
" </tr>\n",
|
74 |
+
" </thead>\n",
|
75 |
+
" <tbody>\n",
|
76 |
+
" <tr>\n",
|
77 |
+
" <th>0</th>\n",
|
78 |
+
" <td>EG1</td>\n",
|
79 |
+
" <td>Kohl Pot</td>\n",
|
80 |
+
" <td>jar</td>\n",
|
81 |
+
" <td>NaN</td>\n",
|
82 |
+
" <td>Shouldered, squat, incised kohl jar, blackened...</td>\n",
|
83 |
+
" <td>NaN</td>\n",
|
84 |
+
" <td>limestone</td>\n",
|
85 |
+
" <td>NaN</td>\n",
|
86 |
+
" <td>NaN</td>\n",
|
87 |
+
" <td>NaN</td>\n",
|
88 |
+
" <td>...</td>\n",
|
89 |
+
" <td>NaN</td>\n",
|
90 |
+
" <td>NaN</td>\n",
|
91 |
+
" <td>NaN</td>\n",
|
92 |
+
" <td>NaN</td>\n",
|
93 |
+
" <td>NaN</td>\n",
|
94 |
+
" <td>NaN</td>\n",
|
95 |
+
" <td>NaN</td>\n",
|
96 |
+
" <td>NaN</td>\n",
|
97 |
+
" <td>NaN</td>\n",
|
98 |
+
" <td>NaN</td>\n",
|
99 |
+
" </tr>\n",
|
100 |
+
" <tr>\n",
|
101 |
+
" <th>1</th>\n",
|
102 |
+
" <td>EG2</td>\n",
|
103 |
+
" <td>cups</td>\n",
|
104 |
+
" <td>beaker</td>\n",
|
105 |
+
" <td>NaN</td>\n",
|
106 |
+
" <td>slightly concave beaker with flaring rim and c...</td>\n",
|
107 |
+
" <td>NaN</td>\n",
|
108 |
+
" <td>travertine</td>\n",
|
109 |
+
" <td>NaN</td>\n",
|
110 |
+
" <td>NaN</td>\n",
|
111 |
+
" <td>NaN</td>\n",
|
112 |
+
" <td>...</td>\n",
|
113 |
+
" <td>NaN</td>\n",
|
114 |
+
" <td>NaN</td>\n",
|
115 |
+
" <td>NaN</td>\n",
|
116 |
+
" <td>NaN</td>\n",
|
117 |
+
" <td>NaN</td>\n",
|
118 |
+
" <td>NaN</td>\n",
|
119 |
+
" <td>NaN</td>\n",
|
120 |
+
" <td>NaN</td>\n",
|
121 |
+
" <td>NaN</td>\n",
|
122 |
+
" <td>NaN</td>\n",
|
123 |
+
" </tr>\n",
|
124 |
+
" <tr>\n",
|
125 |
+
" <th>2</th>\n",
|
126 |
+
" <td>EG3</td>\n",
|
127 |
+
" <td>bowls</td>\n",
|
128 |
+
" <td>bowl</td>\n",
|
129 |
+
" <td>../images/egyptian/eg/eg3-409-d1.jpg</td>\n",
|
130 |
+
" <td>squat shouldered jar, no rim</td>\n",
|
131 |
+
" <td><SPAN lang=en-GB style='FONT-SIZE: 12pt; FONT-...</td>\n",
|
132 |
+
" <td>limestone</td>\n",
|
133 |
+
" <td>NaN</td>\n",
|
134 |
+
" <td>NaN</td>\n",
|
135 |
+
" <td>NaN</td>\n",
|
136 |
+
" <td>...</td>\n",
|
137 |
+
" <td>NaN</td>\n",
|
138 |
+
" <td>NaN</td>\n",
|
139 |
+
" <td>NaN</td>\n",
|
140 |
+
" <td>NaN</td>\n",
|
141 |
+
" <td>NaN</td>\n",
|
142 |
+
" <td>NaN</td>\n",
|
143 |
+
" <td>NaN</td>\n",
|
144 |
+
" <td>NaN</td>\n",
|
145 |
+
" <td>NaN</td>\n",
|
146 |
+
" <td>NaN</td>\n",
|
147 |
+
" </tr>\n",
|
148 |
+
" <tr>\n",
|
149 |
+
" <th>3</th>\n",
|
150 |
+
" <td>EG4</td>\n",
|
151 |
+
" <td>bottles</td>\n",
|
152 |
+
" <td>jar</td>\n",
|
153 |
+
" <td>NaN</td>\n",
|
154 |
+
" <td>necked, globular jar with flared rim</td>\n",
|
155 |
+
" <td>NaN</td>\n",
|
156 |
+
" <td>travertine</td>\n",
|
157 |
+
" <td>NaN</td>\n",
|
158 |
+
" <td>NaN</td>\n",
|
159 |
+
" <td>NaN</td>\n",
|
160 |
+
" <td>...</td>\n",
|
161 |
+
" <td>NaN</td>\n",
|
162 |
+
" <td>NaN</td>\n",
|
163 |
+
" <td>NaN</td>\n",
|
164 |
+
" <td>NaN</td>\n",
|
165 |
+
" <td>NaN</td>\n",
|
166 |
+
" <td>NaN</td>\n",
|
167 |
+
" <td>NaN</td>\n",
|
168 |
+
" <td>NaN</td>\n",
|
169 |
+
" <td>NaN</td>\n",
|
170 |
+
" <td>NaN</td>\n",
|
171 |
+
" </tr>\n",
|
172 |
+
" <tr>\n",
|
173 |
+
" <th>4</th>\n",
|
174 |
+
" <td>EG5</td>\n",
|
175 |
+
" <td>bottles</td>\n",
|
176 |
+
" <td>jar</td>\n",
|
177 |
+
" <td>NaN</td>\n",
|
178 |
+
" <td>necked, globular jar with narrow rim, plus sto...</td>\n",
|
179 |
+
" <td>NaN</td>\n",
|
180 |
+
" <td>travertine</td>\n",
|
181 |
+
" <td>NaN</td>\n",
|
182 |
+
" <td>NaN</td>\n",
|
183 |
+
" <td>NaN</td>\n",
|
184 |
+
" <td>...</td>\n",
|
185 |
+
" <td>NaN</td>\n",
|
186 |
+
" <td>NaN</td>\n",
|
187 |
+
" <td>NaN</td>\n",
|
188 |
+
" <td>NaN</td>\n",
|
189 |
+
" <td>NaN</td>\n",
|
190 |
+
" <td>NaN</td>\n",
|
191 |
+
" <td>NaN</td>\n",
|
192 |
+
" <td>NaN</td>\n",
|
193 |
+
" <td>NaN</td>\n",
|
194 |
+
" <td>NaN</td>\n",
|
195 |
+
" </tr>\n",
|
196 |
+
" <tr>\n",
|
197 |
+
" <th>...</th>\n",
|
198 |
+
" <td>...</td>\n",
|
199 |
+
" <td>...</td>\n",
|
200 |
+
" <td>...</td>\n",
|
201 |
+
" <td>...</td>\n",
|
202 |
+
" <td>...</td>\n",
|
203 |
+
" <td>...</td>\n",
|
204 |
+
" <td>...</td>\n",
|
205 |
+
" <td>...</td>\n",
|
206 |
+
" <td>...</td>\n",
|
207 |
+
" <td>...</td>\n",
|
208 |
+
" <td>...</td>\n",
|
209 |
+
" <td>...</td>\n",
|
210 |
+
" <td>...</td>\n",
|
211 |
+
" <td>...</td>\n",
|
212 |
+
" <td>...</td>\n",
|
213 |
+
" <td>...</td>\n",
|
214 |
+
" <td>...</td>\n",
|
215 |
+
" <td>...</td>\n",
|
216 |
+
" <td>...</td>\n",
|
217 |
+
" <td>...</td>\n",
|
218 |
+
" <td>...</td>\n",
|
219 |
+
" </tr>\n",
|
220 |
+
" <tr>\n",
|
221 |
+
" <th>60081</th>\n",
|
222 |
+
" <td>DURMA.2020.3.2072</td>\n",
|
223 |
+
" <td>coins</td>\n",
|
224 |
+
" <td>NaN</td>\n",
|
225 |
+
" <td>../images/fulling_mill/2020/DURMA.2020.3.2072-...</td>\n",
|
226 |
+
" <td>A silver Roman coin which is a part of the Pie...</td>\n",
|
227 |
+
" <td>NaN</td>\n",
|
228 |
+
" <td>metal</td>\n",
|
229 |
+
" <td>hammering</td>\n",
|
230 |
+
" <td>A silver denarius of Elagabalus dating to the ...</td>\n",
|
231 |
+
" <td>1</td>\n",
|
232 |
+
" <td>...</td>\n",
|
233 |
+
" <td>NaN</td>\n",
|
234 |
+
" <td>NaN</td>\n",
|
235 |
+
" <td>NaN</td>\n",
|
236 |
+
" <td>NaN</td>\n",
|
237 |
+
" <td>NaN</td>\n",
|
238 |
+
" <td>NaN</td>\n",
|
239 |
+
" <td>NaN</td>\n",
|
240 |
+
" <td>NaN</td>\n",
|
241 |
+
" <td>NaN</td>\n",
|
242 |
+
" <td>NaN</td>\n",
|
243 |
+
" </tr>\n",
|
244 |
+
" <tr>\n",
|
245 |
+
" <th>60082</th>\n",
|
246 |
+
" <td>DUROM.2021.286</td>\n",
|
247 |
+
" <td>postcards</td>\n",
|
248 |
+
" <td>NaN</td>\n",
|
249 |
+
" <td>NaN</td>\n",
|
250 |
+
" <td>Portrait orientation postcard for the 1996 Ind...</td>\n",
|
251 |
+
" <td>NaN</td>\n",
|
252 |
+
" <td>paper</td>\n",
|
253 |
+
" <td>printing</td>\n",
|
254 |
+
" <td>Digital printed onto paper/card</td>\n",
|
255 |
+
" <td>1</td>\n",
|
256 |
+
" <td>...</td>\n",
|
257 |
+
" <td>NaN</td>\n",
|
258 |
+
" <td>NaN</td>\n",
|
259 |
+
" <td>NaN</td>\n",
|
260 |
+
" <td>NaN</td>\n",
|
261 |
+
" <td>NaN</td>\n",
|
262 |
+
" <td>NaN</td>\n",
|
263 |
+
" <td>NaN</td>\n",
|
264 |
+
" <td>NaN</td>\n",
|
265 |
+
" <td>NaN</td>\n",
|
266 |
+
" <td>NaN</td>\n",
|
267 |
+
" </tr>\n",
|
268 |
+
" <tr>\n",
|
269 |
+
" <th>60083</th>\n",
|
270 |
+
" <td>DUROM.2021.287</td>\n",
|
271 |
+
" <td>postcards</td>\n",
|
272 |
+
" <td>NaN</td>\n",
|
273 |
+
" <td>NaN</td>\n",
|
274 |
+
" <td>Pair of landscape orientation postcard sized p...</td>\n",
|
275 |
+
" <td>NaN</td>\n",
|
276 |
+
" <td>paper</td>\n",
|
277 |
+
" <td>printing</td>\n",
|
278 |
+
" <td>Digital print on card</td>\n",
|
279 |
+
" <td>2</td>\n",
|
280 |
+
" <td>...</td>\n",
|
281 |
+
" <td>NaN</td>\n",
|
282 |
+
" <td>NaN</td>\n",
|
283 |
+
" <td>NaN</td>\n",
|
284 |
+
" <td>NaN</td>\n",
|
285 |
+
" <td>NaN</td>\n",
|
286 |
+
" <td>NaN</td>\n",
|
287 |
+
" <td>NaN</td>\n",
|
288 |
+
" <td>NaN</td>\n",
|
289 |
+
" <td>NaN</td>\n",
|
290 |
+
" <td>NaN</td>\n",
|
291 |
+
" </tr>\n",
|
292 |
+
" <tr>\n",
|
293 |
+
" <th>60084</th>\n",
|
294 |
+
" <td>DUROM.2021.289</td>\n",
|
295 |
+
" <td>posters</td>\n",
|
296 |
+
" <td>NaN</td>\n",
|
297 |
+
" <td>NaN</td>\n",
|
298 |
+
" <td>Portrait orientation poster for the 1996 India...</td>\n",
|
299 |
+
" <td>NaN</td>\n",
|
300 |
+
" <td>paper</td>\n",
|
301 |
+
" <td>printing</td>\n",
|
302 |
+
" <td>digital print on gloss paper</td>\n",
|
303 |
+
" <td>1</td>\n",
|
304 |
+
" <td>...</td>\n",
|
305 |
+
" <td>NaN</td>\n",
|
306 |
+
" <td>NaN</td>\n",
|
307 |
+
" <td>NaN</td>\n",
|
308 |
+
" <td>NaN</td>\n",
|
309 |
+
" <td>NaN</td>\n",
|
310 |
+
" <td>NaN</td>\n",
|
311 |
+
" <td>NaN</td>\n",
|
312 |
+
" <td>NaN</td>\n",
|
313 |
+
" <td>NaN</td>\n",
|
314 |
+
" <td>NaN</td>\n",
|
315 |
+
" </tr>\n",
|
316 |
+
" <tr>\n",
|
317 |
+
" <th>60085</th>\n",
|
318 |
+
" <td>DUROM.2021.288</td>\n",
|
319 |
+
" <td>posters</td>\n",
|
320 |
+
" <td>NaN</td>\n",
|
321 |
+
" <td>NaN</td>\n",
|
322 |
+
" <td>Portrait orientation poster for the 1996 India...</td>\n",
|
323 |
+
" <td>NaN</td>\n",
|
324 |
+
" <td>paper</td>\n",
|
325 |
+
" <td>printing</td>\n",
|
326 |
+
" <td>Digital print on paper</td>\n",
|
327 |
+
" <td>1</td>\n",
|
328 |
+
" <td>...</td>\n",
|
329 |
+
" <td>NaN</td>\n",
|
330 |
+
" <td>NaN</td>\n",
|
331 |
+
" <td>NaN</td>\n",
|
332 |
+
" <td>NaN</td>\n",
|
333 |
+
" <td>NaN</td>\n",
|
334 |
+
" <td>NaN</td>\n",
|
335 |
+
" <td>NaN</td>\n",
|
336 |
+
" <td>NaN</td>\n",
|
337 |
+
" <td>NaN</td>\n",
|
338 |
+
" <td>NaN</td>\n",
|
339 |
+
" </tr>\n",
|
340 |
+
" </tbody>\n",
|
341 |
+
"</table>\n",
|
342 |
+
"<p>60086 rows × 35 columns</p>\n",
|
343 |
+
"</div>"
|
344 |
+
],
|
345 |
+
"text/plain": [
|
346 |
+
" object_number object_name other_name \\\n",
|
347 |
+
"0 EG1 Kohl Pot jar \n",
|
348 |
+
"1 EG2 cups beaker \n",
|
349 |
+
"2 EG3 bowls bowl \n",
|
350 |
+
"3 EG4 bottles jar \n",
|
351 |
+
"4 EG5 bottles jar \n",
|
352 |
+
"... ... ... ... \n",
|
353 |
+
"60081 DURMA.2020.3.2072 coins NaN \n",
|
354 |
+
"60082 DUROM.2021.286 postcards NaN \n",
|
355 |
+
"60083 DUROM.2021.287 postcards NaN \n",
|
356 |
+
"60084 DUROM.2021.289 posters NaN \n",
|
357 |
+
"60085 DUROM.2021.288 posters NaN \n",
|
358 |
+
"\n",
|
359 |
+
" reproduction.reference \\\n",
|
360 |
+
"0 NaN \n",
|
361 |
+
"1 NaN \n",
|
362 |
+
"2 ../images/egyptian/eg/eg3-409-d1.jpg \n",
|
363 |
+
"3 NaN \n",
|
364 |
+
"4 NaN \n",
|
365 |
+
"... ... \n",
|
366 |
+
"60081 ../images/fulling_mill/2020/DURMA.2020.3.2072-... \n",
|
367 |
+
"60082 NaN \n",
|
368 |
+
"60083 NaN \n",
|
369 |
+
"60084 NaN \n",
|
370 |
+
"60085 NaN \n",
|
371 |
+
"\n",
|
372 |
+
" description \\\n",
|
373 |
+
"0 Shouldered, squat, incised kohl jar, blackened... \n",
|
374 |
+
"1 slightly concave beaker with flaring rim and c... \n",
|
375 |
+
"2 squat shouldered jar, no rim \n",
|
376 |
+
"3 necked, globular jar with flared rim \n",
|
377 |
+
"4 necked, globular jar with narrow rim, plus sto... \n",
|
378 |
+
"... ... \n",
|
379 |
+
"60081 A silver Roman coin which is a part of the Pie... \n",
|
380 |
+
"60082 Portrait orientation postcard for the 1996 Ind... \n",
|
381 |
+
"60083 Pair of landscape orientation postcard sized p... \n",
|
382 |
+
"60084 Portrait orientation poster for the 1996 India... \n",
|
383 |
+
"60085 Portrait orientation poster for the 1996 India... \n",
|
384 |
+
"\n",
|
385 |
+
" label.text material \\\n",
|
386 |
+
"0 NaN limestone \n",
|
387 |
+
"1 NaN travertine \n",
|
388 |
+
"2 <SPAN lang=en-GB style='FONT-SIZE: 12pt; FONT-... limestone \n",
|
389 |
+
"3 NaN travertine \n",
|
390 |
+
"4 NaN travertine \n",
|
391 |
+
"... ... ... \n",
|
392 |
+
"60081 NaN metal \n",
|
393 |
+
"60082 NaN paper \n",
|
394 |
+
"60083 NaN paper \n",
|
395 |
+
"60084 NaN paper \n",
|
396 |
+
"60085 NaN paper \n",
|
397 |
+
"\n",
|
398 |
+
" technique physical_description \\\n",
|
399 |
+
"0 NaN NaN \n",
|
400 |
+
"1 NaN NaN \n",
|
401 |
+
"2 NaN NaN \n",
|
402 |
+
"3 NaN NaN \n",
|
403 |
+
"4 NaN NaN \n",
|
404 |
+
"... ... ... \n",
|
405 |
+
"60081 hammering A silver denarius of Elagabalus dating to the ... \n",
|
406 |
+
"60082 printing Digital printed onto paper/card \n",
|
407 |
+
"60083 printing Digital print on card \n",
|
408 |
+
"60084 printing digital print on gloss paper \n",
|
409 |
+
"60085 printing Digital print on paper \n",
|
410 |
+
"\n",
|
411 |
+
" number_of_parts ... Unnamed: 25 Unnamed: 26 Unnamed: 27 Unnamed: 28 \\\n",
|
412 |
+
"0 NaN ... NaN NaN NaN NaN \n",
|
413 |
+
"1 NaN ... NaN NaN NaN NaN \n",
|
414 |
+
"2 NaN ... NaN NaN NaN NaN \n",
|
415 |
+
"3 NaN ... NaN NaN NaN NaN \n",
|
416 |
+
"4 NaN ... NaN NaN NaN NaN \n",
|
417 |
+
"... ... ... ... ... ... ... \n",
|
418 |
+
"60081 1 ... NaN NaN NaN NaN \n",
|
419 |
+
"60082 1 ... NaN NaN NaN NaN \n",
|
420 |
+
"60083 2 ... NaN NaN NaN NaN \n",
|
421 |
+
"60084 1 ... NaN NaN NaN NaN \n",
|
422 |
+
"60085 1 ... NaN NaN NaN NaN \n",
|
423 |
+
"\n",
|
424 |
+
" Unnamed: 29 Unnamed: 30 Unnamed: 31 Unnamed: 32 Unnamed: 33 Unnamed: 34 \n",
|
425 |
+
"0 NaN NaN NaN NaN NaN NaN \n",
|
426 |
+
"1 NaN NaN NaN NaN NaN NaN \n",
|
427 |
+
"2 NaN NaN NaN NaN NaN NaN \n",
|
428 |
+
"3 NaN NaN NaN NaN NaN NaN \n",
|
429 |
+
"4 NaN NaN NaN NaN NaN NaN \n",
|
430 |
+
"... ... ... ... ... ... ... \n",
|
431 |
+
"60081 NaN NaN NaN NaN NaN NaN \n",
|
432 |
+
"60082 NaN NaN NaN NaN NaN NaN \n",
|
433 |
+
"60083 NaN NaN NaN NaN NaN NaN \n",
|
434 |
+
"60084 NaN NaN NaN NaN NaN NaN \n",
|
435 |
+
"60085 NaN NaN NaN NaN NaN NaN \n",
|
436 |
+
"\n",
|
437 |
+
"[60086 rows x 35 columns]"
|
438 |
+
]
|
439 |
+
},
|
440 |
+
"execution_count": 5,
|
441 |
+
"metadata": {},
|
442 |
+
"output_type": "execute_result"
|
443 |
+
}
|
444 |
+
],
|
445 |
+
"source": [
|
446 |
+
"df"
|
447 |
+
]
|
448 |
+
},
|
449 |
+
{
|
450 |
+
"cell_type": "code",
|
451 |
+
"execution_count": 6,
|
452 |
+
"metadata": {},
|
453 |
+
"outputs": [
|
454 |
+
{
|
455 |
+
"data": {
|
456 |
+
"text/plain": [
|
457 |
+
"6625"
|
458 |
+
]
|
459 |
+
},
|
460 |
+
"execution_count": 6,
|
461 |
+
"metadata": {},
|
462 |
+
"output_type": "execute_result"
|
463 |
+
}
|
464 |
+
],
|
465 |
+
"source": [
|
466 |
+
"df[\"description\"].isna().sum()"
|
467 |
+
]
|
468 |
+
},
|
469 |
+
{
|
470 |
+
"cell_type": "code",
|
471 |
+
"execution_count": 7,
|
472 |
+
"metadata": {},
|
473 |
+
"outputs": [
|
474 |
+
{
|
475 |
+
"name": "stdout",
|
476 |
+
"output_type": "stream",
|
477 |
+
"text": [
|
478 |
+
"There are 60086 records in the dataset\n",
|
479 |
+
"26809 records have an image location\n",
|
480 |
+
"53461 records have an description\n",
|
481 |
+
"There are 60067 unique museum numbers\n",
|
482 |
+
"There are 46166 unique descriptions\n"
|
483 |
+
]
|
484 |
+
}
|
485 |
+
],
|
486 |
+
"source": [
|
487 |
+
"print(f\"There are {len(df)} records in the dataset\")\n",
|
488 |
+
"print(f\"{df['reproduction.reference'].notna().sum()} records have an image location\")\n",
|
489 |
+
"print(f\"{df['description'].notna().sum()} records have an description\")\n",
|
490 |
+
"print(f\"There are {len(df['object_number'].unique())} unique museum numbers\")\n",
|
491 |
+
"print(f\"There are {len(df['description'].unique())} unique descriptions\")"
|
492 |
+
]
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"cell_type": "code",
|
496 |
+
"execution_count": 8,
|
497 |
+
"metadata": {},
|
498 |
+
"outputs": [
|
499 |
+
{
|
500 |
+
"name": "stdout",
|
501 |
+
"output_type": "stream",
|
502 |
+
"text": [
|
503 |
+
"Total number of images in ../data/raw/images/: 39200\n",
|
504 |
+
"Total number of files in ../data/raw/images/: 39244\n"
|
505 |
+
]
|
506 |
+
}
|
507 |
+
],
|
508 |
+
"source": [
|
509 |
+
"import os\n",
|
510 |
+
"\n",
|
511 |
+
"image_count = 0\n",
|
512 |
+
"file_count = 0\n",
|
513 |
+
"\n",
|
514 |
+
"# Define the root directory\n",
|
515 |
+
"root_dir = \"../data/raw/images/\"\n",
|
516 |
+
"\n",
|
517 |
+
"# Iterate through all subdirectories and files\n",
|
518 |
+
"for root, dirs, files in os.walk(root_dir):\n",
|
519 |
+
" for file in files:\n",
|
520 |
+
" file_count += 1\n",
|
521 |
+
" # Check if the file is an image file\n",
|
522 |
+
" if file.endswith((\".jpg\", \".jpeg\", \".png\", \".gif\", \".JPG\", \".JPEG\", \".PNG\", \".GIF\")):\n",
|
523 |
+
" # Increment the image count\n",
|
524 |
+
" image_count += 1\n",
|
525 |
+
"\n",
|
526 |
+
"print(f\"Total number of images in {root_dir}: {image_count}\")\n",
|
527 |
+
"print(f\"Total number of files in {root_dir}: {file_count}\")"
|
528 |
+
]
|
529 |
+
},
|
530 |
+
{
|
531 |
+
"cell_type": "code",
|
532 |
+
"execution_count": null,
|
533 |
+
"metadata": {},
|
534 |
+
"outputs": [],
|
535 |
+
"source": []
|
536 |
+
}
|
537 |
+
],
|
538 |
+
"metadata": {
|
539 |
+
"kernelspec": {
|
540 |
+
"display_name": "env",
|
541 |
+
"language": "python",
|
542 |
+
"name": "python3"
|
543 |
+
},
|
544 |
+
"language_info": {
|
545 |
+
"codemirror_mode": {
|
546 |
+
"name": "ipython",
|
547 |
+
"version": 3
|
548 |
+
},
|
549 |
+
"file_extension": ".py",
|
550 |
+
"mimetype": "text/x-python",
|
551 |
+
"name": "python",
|
552 |
+
"nbconvert_exporter": "python",
|
553 |
+
"pygments_lexer": "ipython3",
|
554 |
+
"version": "3.10.12"
|
555 |
+
}
|
556 |
+
},
|
557 |
+
"nbformat": 4,
|
558 |
+
"nbformat_minor": 2
|
559 |
+
}
|
1.1-exploring_OM_image_matching.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
2.0-assessing_OM_dataset.ipynb
ADDED
@@ -0,0 +1,1468 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 89,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd\n",
|
10 |
+
"import matplotlib.pyplot as plt\n",
|
11 |
+
"import numpy as np"
|
12 |
+
]
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"cell_type": "code",
|
16 |
+
"execution_count": 2,
|
17 |
+
"metadata": {},
|
18 |
+
"outputs": [],
|
19 |
+
"source": [
|
20 |
+
"obj2info = pd.read_csv(\"../data/processed/OM_obj_to_info.csv\")\n",
|
21 |
+
"file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")"
|
22 |
+
]
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cell_type": "code",
|
26 |
+
"execution_count": 3,
|
27 |
+
"metadata": {},
|
28 |
+
"outputs": [],
|
29 |
+
"source": [
|
30 |
+
"file_counts = file2obj[\"obj_num\"].value_counts()\n",
|
31 |
+
"# file2obj"
|
32 |
+
]
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"cell_type": "code",
|
36 |
+
"execution_count": 4,
|
37 |
+
"metadata": {},
|
38 |
+
"outputs": [
|
39 |
+
{
|
40 |
+
"data": {
|
41 |
+
"text/plain": [
|
42 |
+
"obj_num\n",
|
43 |
+
"durom.1969.406 249\n",
|
44 |
+
"durom.1973.47 191\n",
|
45 |
+
"DUROM.1954.Spalding29.W 112\n",
|
46 |
+
"durom.1960.2332 101\n",
|
47 |
+
"durom.2014.1 76\n",
|
48 |
+
" ... \n",
|
49 |
+
"durom.2006.46.32 1\n",
|
50 |
+
"durom.2006.44.16 1\n",
|
51 |
+
"durom.2006.45.194 1\n",
|
52 |
+
"durom.2006.46.13 1\n",
|
53 |
+
"durom.1964.183 1\n",
|
54 |
+
"Name: count, Length: 12642, dtype: int64"
|
55 |
+
]
|
56 |
+
},
|
57 |
+
"execution_count": 4,
|
58 |
+
"metadata": {},
|
59 |
+
"output_type": "execute_result"
|
60 |
+
}
|
61 |
+
],
|
62 |
+
"source": [
|
63 |
+
"file_counts"
|
64 |
+
]
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"cell_type": "code",
|
68 |
+
"execution_count": 5,
|
69 |
+
"metadata": {},
|
70 |
+
"outputs": [
|
71 |
+
{
|
72 |
+
"data": {
|
73 |
+
"text/html": [
|
74 |
+
"<div>\n",
|
75 |
+
"<style scoped>\n",
|
76 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
77 |
+
" vertical-align: middle;\n",
|
78 |
+
" }\n",
|
79 |
+
"\n",
|
80 |
+
" .dataframe tbody tr th {\n",
|
81 |
+
" vertical-align: top;\n",
|
82 |
+
" }\n",
|
83 |
+
"\n",
|
84 |
+
" .dataframe thead th {\n",
|
85 |
+
" text-align: right;\n",
|
86 |
+
" }\n",
|
87 |
+
"</style>\n",
|
88 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
89 |
+
" <thead>\n",
|
90 |
+
" <tr style=\"text-align: right;\">\n",
|
91 |
+
" <th></th>\n",
|
92 |
+
" <th>Images per instance</th>\n",
|
93 |
+
" <th>Number of instances</th>\n",
|
94 |
+
" <th>Number of images</th>\n",
|
95 |
+
" </tr>\n",
|
96 |
+
" </thead>\n",
|
97 |
+
" <tbody>\n",
|
98 |
+
" <tr>\n",
|
99 |
+
" <th>0</th>\n",
|
100 |
+
" <td>3</td>\n",
|
101 |
+
" <td>696</td>\n",
|
102 |
+
" <td>2088</td>\n",
|
103 |
+
" </tr>\n",
|
104 |
+
" <tr>\n",
|
105 |
+
" <th>1</th>\n",
|
106 |
+
" <td>4</td>\n",
|
107 |
+
" <td>703</td>\n",
|
108 |
+
" <td>2812</td>\n",
|
109 |
+
" </tr>\n",
|
110 |
+
" <tr>\n",
|
111 |
+
" <th>2</th>\n",
|
112 |
+
" <td>5</td>\n",
|
113 |
+
" <td>360</td>\n",
|
114 |
+
" <td>1800</td>\n",
|
115 |
+
" </tr>\n",
|
116 |
+
" <tr>\n",
|
117 |
+
" <th>3</th>\n",
|
118 |
+
" <td>6</td>\n",
|
119 |
+
" <td>853</td>\n",
|
120 |
+
" <td>5118</td>\n",
|
121 |
+
" </tr>\n",
|
122 |
+
" <tr>\n",
|
123 |
+
" <th>4</th>\n",
|
124 |
+
" <td>7</td>\n",
|
125 |
+
" <td>471</td>\n",
|
126 |
+
" <td>3297</td>\n",
|
127 |
+
" </tr>\n",
|
128 |
+
" <tr>\n",
|
129 |
+
" <th>5</th>\n",
|
130 |
+
" <td>8</td>\n",
|
131 |
+
" <td>223</td>\n",
|
132 |
+
" <td>1784</td>\n",
|
133 |
+
" </tr>\n",
|
134 |
+
" <tr>\n",
|
135 |
+
" <th>6</th>\n",
|
136 |
+
" <td>9</td>\n",
|
137 |
+
" <td>110</td>\n",
|
138 |
+
" <td>990</td>\n",
|
139 |
+
" </tr>\n",
|
140 |
+
" <tr>\n",
|
141 |
+
" <th>7</th>\n",
|
142 |
+
" <td>10+</td>\n",
|
143 |
+
" <td>456</td>\n",
|
144 |
+
" <td>7836</td>\n",
|
145 |
+
" </tr>\n",
|
146 |
+
" <tr>\n",
|
147 |
+
" <th>8</th>\n",
|
148 |
+
" <td>Total</td>\n",
|
149 |
+
" <td>3872</td>\n",
|
150 |
+
" <td>25725</td>\n",
|
151 |
+
" </tr>\n",
|
152 |
+
" </tbody>\n",
|
153 |
+
"</table>\n",
|
154 |
+
"</div>"
|
155 |
+
],
|
156 |
+
"text/plain": [
|
157 |
+
" Images per instance Number of instances Number of images\n",
|
158 |
+
"0 3 696 2088\n",
|
159 |
+
"1 4 703 2812\n",
|
160 |
+
"2 5 360 1800\n",
|
161 |
+
"3 6 853 5118\n",
|
162 |
+
"4 7 471 3297\n",
|
163 |
+
"5 8 223 1784\n",
|
164 |
+
"6 9 110 990\n",
|
165 |
+
"7 10+ 456 7836\n",
|
166 |
+
"8 Total 3872 25725"
|
167 |
+
]
|
168 |
+
},
|
169 |
+
"execution_count": 5,
|
170 |
+
"metadata": {},
|
171 |
+
"output_type": "execute_result"
|
172 |
+
}
|
173 |
+
],
|
174 |
+
"source": [
|
175 |
+
"distribution_df = pd.DataFrame()\n",
|
176 |
+
"distribution_df[\"Images per instance\"] = file_counts.value_counts().sort_index().index\n",
|
177 |
+
"distribution_df[\"Number of instances\"] = file_counts.value_counts().sort_index().values\n",
|
178 |
+
"distribution_df[\"Number of images\"] = (\n",
|
179 |
+
" distribution_df[\"Images per instance\"] * distribution_df[\"Number of instances\"]\n",
|
180 |
+
")\n",
|
181 |
+
"num_instances_10plus = distribution_df[distribution_df[\"Images per instance\"] >= 10][\n",
|
182 |
+
" \"Number of instances\"\n",
|
183 |
+
"].sum()\n",
|
184 |
+
"num_images_10plus = distribution_df[distribution_df[\"Images per instance\"] >= 10][\n",
|
185 |
+
" \"Number of images\"\n",
|
186 |
+
"].sum()\n",
|
187 |
+
"distribution_df = distribution_df[\n",
|
188 |
+
" (distribution_df[\"Images per instance\"] < 10) & (distribution_df[\"Images per instance\"] > 2)\n",
|
189 |
+
"]\n",
|
190 |
+
"\n",
|
191 |
+
"distribution_df = pd.concat(\n",
|
192 |
+
" [\n",
|
193 |
+
" distribution_df,\n",
|
194 |
+
" pd.DataFrame(\n",
|
195 |
+
" {\n",
|
196 |
+
" \"Images per instance\": [\"10+\"],\n",
|
197 |
+
" \"Number of instances\": [num_instances_10plus],\n",
|
198 |
+
" \"Number of images\": [num_images_10plus],\n",
|
199 |
+
" }\n",
|
200 |
+
" ),\n",
|
201 |
+
" ],\n",
|
202 |
+
" ignore_index=True,\n",
|
203 |
+
")\n",
|
204 |
+
"\n",
|
205 |
+
"# append total\n",
|
206 |
+
"distribution_df = pd.concat(\n",
|
207 |
+
" [\n",
|
208 |
+
" distribution_df,\n",
|
209 |
+
" pd.DataFrame(\n",
|
210 |
+
" {\n",
|
211 |
+
" \"Images per instance\": [\"Total\"],\n",
|
212 |
+
" \"Number of instances\": [distribution_df[\"Number of instances\"].sum()],\n",
|
213 |
+
" \"Number of images\": [distribution_df[\"Number of images\"].sum()],\n",
|
214 |
+
" }\n",
|
215 |
+
" ),\n",
|
216 |
+
" ],\n",
|
217 |
+
" ignore_index=True,\n",
|
218 |
+
")\n",
|
219 |
+
"# distribution_df = distribution_df[['Images per instance', 'Number of images', 'Number of instances']]\n",
|
220 |
+
"distribution_df"
|
221 |
+
]
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"cell_type": "markdown",
|
225 |
+
"metadata": {},
|
226 |
+
"source": [
|
227 |
+
"This distribution broadly follows that from Winterbottom's paper, with a few minor differences. \n",
|
228 |
+
"\n",
|
229 |
+
"I am not expecting it to be exactly the same, as winterbottom did not use the database at all, instead just looked at the images"
|
230 |
+
]
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"cell_type": "markdown",
|
234 |
+
"metadata": {},
|
235 |
+
"source": [
|
236 |
+
"## Assessing for alternative text labels"
|
237 |
+
]
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"cell_type": "code",
|
241 |
+
"execution_count": 61,
|
242 |
+
"metadata": {},
|
243 |
+
"outputs": [],
|
244 |
+
"source": [
|
245 |
+
"full_df = pd.read_excel(\"../data/raw/Durham_University_Museums_data.xlsx\")\n",
|
246 |
+
"full_df = full_df.filter(regex=r\"^(?!Unnamed).*$\")\n",
|
247 |
+
"full_df = full_df.dropna(subset=[\"description\"])"
|
248 |
+
]
|
249 |
+
},
|
250 |
+
{
|
251 |
+
"cell_type": "code",
|
252 |
+
"execution_count": 69,
|
253 |
+
"metadata": {},
|
254 |
+
"outputs": [
|
255 |
+
{
|
256 |
+
"data": {
|
257 |
+
"text/html": [
|
258 |
+
"<div>\n",
|
259 |
+
"<style scoped>\n",
|
260 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
261 |
+
" vertical-align: middle;\n",
|
262 |
+
" }\n",
|
263 |
+
"\n",
|
264 |
+
" .dataframe tbody tr th {\n",
|
265 |
+
" vertical-align: top;\n",
|
266 |
+
" }\n",
|
267 |
+
"\n",
|
268 |
+
" .dataframe thead th {\n",
|
269 |
+
" text-align: right;\n",
|
270 |
+
" }\n",
|
271 |
+
"</style>\n",
|
272 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
273 |
+
" <thead>\n",
|
274 |
+
" <tr style=\"text-align: right;\">\n",
|
275 |
+
" <th></th>\n",
|
276 |
+
" <th>Column</th>\n",
|
277 |
+
" <th>Null Percentage</th>\n",
|
278 |
+
" <th>unique_values</th>\n",
|
279 |
+
" </tr>\n",
|
280 |
+
" </thead>\n",
|
281 |
+
" <tbody>\n",
|
282 |
+
" <tr>\n",
|
283 |
+
" <th>0</th>\n",
|
284 |
+
" <td>object_number</td>\n",
|
285 |
+
" <td>0.00</td>\n",
|
286 |
+
" <td>53460</td>\n",
|
287 |
+
" </tr>\n",
|
288 |
+
" <tr>\n",
|
289 |
+
" <th>4</th>\n",
|
290 |
+
" <td>description</td>\n",
|
291 |
+
" <td>0.00</td>\n",
|
292 |
+
" <td>1191</td>\n",
|
293 |
+
" </tr>\n",
|
294 |
+
" <tr>\n",
|
295 |
+
" <th>6</th>\n",
|
296 |
+
" <td>material</td>\n",
|
297 |
+
" <td>4.43</td>\n",
|
298 |
+
" <td>6442</td>\n",
|
299 |
+
" </tr>\n",
|
300 |
+
" <tr>\n",
|
301 |
+
" <th>1</th>\n",
|
302 |
+
" <td>object_name</td>\n",
|
303 |
+
" <td>8.96</td>\n",
|
304 |
+
" <td>26163</td>\n",
|
305 |
+
" </tr>\n",
|
306 |
+
" <tr>\n",
|
307 |
+
" <th>22</th>\n",
|
308 |
+
" <td>alternative_number</td>\n",
|
309 |
+
" <td>18.30</td>\n",
|
310 |
+
" <td>46165</td>\n",
|
311 |
+
" </tr>\n",
|
312 |
+
" <tr>\n",
|
313 |
+
" <th>13</th>\n",
|
314 |
+
" <td>production.place</td>\n",
|
315 |
+
" <td>34.42</td>\n",
|
316 |
+
" <td>3234</td>\n",
|
317 |
+
" </tr>\n",
|
318 |
+
" <tr>\n",
|
319 |
+
" <th>12</th>\n",
|
320 |
+
" <td>production.period</td>\n",
|
321 |
+
" <td>40.90</td>\n",
|
322 |
+
" <td>414</td>\n",
|
323 |
+
" </tr>\n",
|
324 |
+
" <tr>\n",
|
325 |
+
" <th>3</th>\n",
|
326 |
+
" <td>reproduction.reference</td>\n",
|
327 |
+
" <td>50.23</td>\n",
|
328 |
+
" <td>76</td>\n",
|
329 |
+
" </tr>\n",
|
330 |
+
" <tr>\n",
|
331 |
+
" <th>11</th>\n",
|
332 |
+
" <td>production.date.end</td>\n",
|
333 |
+
" <td>50.90</td>\n",
|
334 |
+
" <td>6923</td>\n",
|
335 |
+
" </tr>\n",
|
336 |
+
" <tr>\n",
|
337 |
+
" <th>10</th>\n",
|
338 |
+
" <td>production.date.start</td>\n",
|
339 |
+
" <td>51.04</td>\n",
|
340 |
+
" <td>127</td>\n",
|
341 |
+
" </tr>\n",
|
342 |
+
" <tr>\n",
|
343 |
+
" <th>2</th>\n",
|
344 |
+
" <td>other_name</td>\n",
|
345 |
+
" <td>58.72</td>\n",
|
346 |
+
" <td>968</td>\n",
|
347 |
+
" </tr>\n",
|
348 |
+
" <tr>\n",
|
349 |
+
" <th>9</th>\n",
|
350 |
+
" <td>number_of_parts</td>\n",
|
351 |
+
" <td>62.08</td>\n",
|
352 |
+
" <td>949</td>\n",
|
353 |
+
" </tr>\n",
|
354 |
+
" <tr>\n",
|
355 |
+
" <th>8</th>\n",
|
356 |
+
" <td>physical_description</td>\n",
|
357 |
+
" <td>73.54</td>\n",
|
358 |
+
" <td>485</td>\n",
|
359 |
+
" </tr>\n",
|
360 |
+
" <tr>\n",
|
361 |
+
" <th>14</th>\n",
|
362 |
+
" <td>field_coll.place</td>\n",
|
363 |
+
" <td>77.88</td>\n",
|
364 |
+
" <td>812</td>\n",
|
365 |
+
" </tr>\n",
|
366 |
+
" <tr>\n",
|
367 |
+
" <th>16</th>\n",
|
368 |
+
" <td>field_coll.method</td>\n",
|
369 |
+
" <td>83.38</td>\n",
|
370 |
+
" <td>546</td>\n",
|
371 |
+
" </tr>\n",
|
372 |
+
" <tr>\n",
|
373 |
+
" <th>18</th>\n",
|
374 |
+
" <td>content.subject</td>\n",
|
375 |
+
" <td>87.25</td>\n",
|
376 |
+
" <td>1449</td>\n",
|
377 |
+
" </tr>\n",
|
378 |
+
" <tr>\n",
|
379 |
+
" <th>7</th>\n",
|
380 |
+
" <td>technique</td>\n",
|
381 |
+
" <td>87.58</td>\n",
|
382 |
+
" <td>22</td>\n",
|
383 |
+
" </tr>\n",
|
384 |
+
" <tr>\n",
|
385 |
+
" <th>21</th>\n",
|
386 |
+
" <td>association.subject</td>\n",
|
387 |
+
" <td>88.35</td>\n",
|
388 |
+
" <td>516</td>\n",
|
389 |
+
" </tr>\n",
|
390 |
+
" <tr>\n",
|
391 |
+
" <th>15</th>\n",
|
392 |
+
" <td>field_coll.notes</td>\n",
|
393 |
+
" <td>91.09</td>\n",
|
394 |
+
" <td>773</td>\n",
|
395 |
+
" </tr>\n",
|
396 |
+
" <tr>\n",
|
397 |
+
" <th>5</th>\n",
|
398 |
+
" <td>label.text</td>\n",
|
399 |
+
" <td>91.69</td>\n",
|
400 |
+
" <td>78</td>\n",
|
401 |
+
" </tr>\n",
|
402 |
+
" <tr>\n",
|
403 |
+
" <th>20</th>\n",
|
404 |
+
" <td>association.person</td>\n",
|
405 |
+
" <td>95.54</td>\n",
|
406 |
+
" <td>289</td>\n",
|
407 |
+
" </tr>\n",
|
408 |
+
" <tr>\n",
|
409 |
+
" <th>17</th>\n",
|
410 |
+
" <td>content.person.name</td>\n",
|
411 |
+
" <td>95.89</td>\n",
|
412 |
+
" <td>247</td>\n",
|
413 |
+
" </tr>\n",
|
414 |
+
" <tr>\n",
|
415 |
+
" <th>19</th>\n",
|
416 |
+
" <td>association.period</td>\n",
|
417 |
+
" <td>97.70</td>\n",
|
418 |
+
" <td>36718</td>\n",
|
419 |
+
" </tr>\n",
|
420 |
+
" </tbody>\n",
|
421 |
+
"</table>\n",
|
422 |
+
"</div>"
|
423 |
+
],
|
424 |
+
"text/plain": [
|
425 |
+
" Column Null Percentage unique_values\n",
|
426 |
+
"0 object_number 0.00 53460\n",
|
427 |
+
"4 description 0.00 1191\n",
|
428 |
+
"6 material 4.43 6442\n",
|
429 |
+
"1 object_name 8.96 26163\n",
|
430 |
+
"22 alternative_number 18.30 46165\n",
|
431 |
+
"13 production.place 34.42 3234\n",
|
432 |
+
"12 production.period 40.90 414\n",
|
433 |
+
"3 reproduction.reference 50.23 76\n",
|
434 |
+
"11 production.date.end 50.90 6923\n",
|
435 |
+
"10 production.date.start 51.04 127\n",
|
436 |
+
"2 other_name 58.72 968\n",
|
437 |
+
"9 number_of_parts 62.08 949\n",
|
438 |
+
"8 physical_description 73.54 485\n",
|
439 |
+
"14 field_coll.place 77.88 812\n",
|
440 |
+
"16 field_coll.method 83.38 546\n",
|
441 |
+
"18 content.subject 87.25 1449\n",
|
442 |
+
"7 technique 87.58 22\n",
|
443 |
+
"21 association.subject 88.35 516\n",
|
444 |
+
"15 field_coll.notes 91.09 773\n",
|
445 |
+
"5 label.text 91.69 78\n",
|
446 |
+
"20 association.person 95.54 289\n",
|
447 |
+
"17 content.person.name 95.89 247\n",
|
448 |
+
"19 association.period 97.70 36718"
|
449 |
+
]
|
450 |
+
},
|
451 |
+
"execution_count": 69,
|
452 |
+
"metadata": {},
|
453 |
+
"output_type": "execute_result"
|
454 |
+
}
|
455 |
+
],
|
456 |
+
"source": [
|
457 |
+
"null_percentage = (full_df.isnull().sum() / len(full_df)) * 100\n",
|
458 |
+
"desc_df = pd.DataFrame(\n",
|
459 |
+
" {\"Column\": null_percentage.index, \"Null Percentage\": null_percentage.values}\n",
|
460 |
+
")\n",
|
461 |
+
"desc_df[\"Null Percentage\"] = desc_df[\"Null Percentage\"].round(2)\n",
|
462 |
+
"desc_df = desc_df.sort_values(by=\"Null Percentage\")\n",
|
463 |
+
"desc_df[\"unique_values\"] = full_df.nunique().values\n",
|
464 |
+
"desc_df"
|
465 |
+
]
|
466 |
+
},
|
467 |
+
{
|
468 |
+
"cell_type": "code",
|
469 |
+
"execution_count": 97,
|
470 |
+
"metadata": {},
|
471 |
+
"outputs": [],
|
472 |
+
"source": [
|
473 |
+
"def get_distribution(df, column, lower_bound=2):\n",
|
474 |
+
" distribution = pd.DataFrame()\n",
|
475 |
+
" col_counts = df[column].value_counts()\n",
|
476 |
+
" distribution[f\"{column}s per instance\"] = col_counts.value_counts().sort_index().index\n",
|
477 |
+
" distribution[\"Number of instances\"] = col_counts.value_counts().sort_index().values\n",
|
478 |
+
" distribution[f\"Number of {column}s\"] = (\n",
|
479 |
+
" distribution[f\"{column}s per instance\"] * distribution[\"Number of instances\"]\n",
|
480 |
+
" )\n",
|
481 |
+
" num_instances_10_50 = distribution[\n",
|
482 |
+
" (distribution[f\"{column}s per instance\"] >= 10)\n",
|
483 |
+
" & (distribution[f\"{column}s per instance\"] < 50)\n",
|
484 |
+
" ][\"Number of instances\"].sum()\n",
|
485 |
+
" num_images_10_50 = distribution[\n",
|
486 |
+
" (distribution[f\"{column}s per instance\"] >= 10)\n",
|
487 |
+
" & (distribution[f\"{column}s per instance\"] < 50)\n",
|
488 |
+
" ][f\"Number of {column}s\"].sum()\n",
|
489 |
+
" num_instances_50_100 = distribution[\n",
|
490 |
+
" (distribution[f\"{column}s per instance\"] >= 50)\n",
|
491 |
+
" & (distribution[f\"{column}s per instance\"] < 100)\n",
|
492 |
+
" ][\"Number of instances\"].sum()\n",
|
493 |
+
" num_images_50_100 = distribution[\n",
|
494 |
+
" (distribution[f\"{column}s per instance\"] >= 50)\n",
|
495 |
+
" & (distribution[f\"{column}s per instance\"] < 100)\n",
|
496 |
+
" ][f\"Number of {column}s\"].sum()\n",
|
497 |
+
" num_instances_100_1000 = distribution[\n",
|
498 |
+
" (distribution[f\"{column}s per instance\"] >= 100)\n",
|
499 |
+
" & (distribution[f\"{column}s per instance\"] < 1000)\n",
|
500 |
+
" ][\"Number of instances\"].sum()\n",
|
501 |
+
" num_images_100_1000 = distribution[\n",
|
502 |
+
" (distribution[f\"{column}s per instance\"] >= 100)\n",
|
503 |
+
" & (distribution[f\"{column}s per instance\"] < 1000)\n",
|
504 |
+
" ][f\"Number of {column}s\"].sum()\n",
|
505 |
+
" num_instances_1000plus = distribution[distribution[f\"{column}s per instance\"] >= 1000][\n",
|
506 |
+
" \"Number of instances\"\n",
|
507 |
+
" ].sum()\n",
|
508 |
+
" num_images_1000plus = distribution[distribution[f\"{column}s per instance\"] >= 1000][\n",
|
509 |
+
" f\"Number of {column}s\"\n",
|
510 |
+
" ].sum()\n",
|
511 |
+
"\n",
|
512 |
+
" distribution = distribution[\n",
|
513 |
+
" (distribution[f\"{column}s per instance\"] < 10)\n",
|
514 |
+
" & (distribution[f\"{column}s per instance\"] > lower_bound)\n",
|
515 |
+
" ]\n",
|
516 |
+
"\n",
|
517 |
+
" distribution = pd.concat(\n",
|
518 |
+
" [\n",
|
519 |
+
" distribution,\n",
|
520 |
+
" pd.DataFrame(\n",
|
521 |
+
" {\n",
|
522 |
+
" f\"{column}s per instance\": [\"10-50\"],\n",
|
523 |
+
" \"Number of instances\": [num_instances_10_50],\n",
|
524 |
+
" f\"Number of {column}s\": [num_images_10_50],\n",
|
525 |
+
" }\n",
|
526 |
+
" ),\n",
|
527 |
+
" pd.DataFrame(\n",
|
528 |
+
" {\n",
|
529 |
+
" f\"{column}s per instance\": [\"50-100\"],\n",
|
530 |
+
" \"Number of instances\": [num_instances_50_100],\n",
|
531 |
+
" f\"Number of {column}s\": [num_images_50_100],\n",
|
532 |
+
" }\n",
|
533 |
+
" ),\n",
|
534 |
+
" pd.DataFrame(\n",
|
535 |
+
" {\n",
|
536 |
+
" f\"{column}s per instance\": [\"100-1000\"],\n",
|
537 |
+
" \"Number of instances\": [num_instances_100_1000],\n",
|
538 |
+
" f\"Number of {column}s\": [num_images_100_1000],\n",
|
539 |
+
" }\n",
|
540 |
+
" ),\n",
|
541 |
+
" pd.DataFrame(\n",
|
542 |
+
" {\n",
|
543 |
+
" f\"{column}s per instance\": [\"1000+\"],\n",
|
544 |
+
" \"Number of instances\": [num_instances_1000plus],\n",
|
545 |
+
" f\"Number of {column}s\": [num_images_1000plus],\n",
|
546 |
+
" }\n",
|
547 |
+
" ),\n",
|
548 |
+
" ],\n",
|
549 |
+
" ignore_index=True,\n",
|
550 |
+
" )\n",
|
551 |
+
"\n",
|
552 |
+
" distribution = pd.concat(\n",
|
553 |
+
" [\n",
|
554 |
+
" distribution,\n",
|
555 |
+
" pd.DataFrame(\n",
|
556 |
+
" {\n",
|
557 |
+
" f\"{column}s per instance\": [\"Total\"],\n",
|
558 |
+
" \"Number of instances\": [distribution[\"Number of instances\"].sum()],\n",
|
559 |
+
" f\"Number of {column}s\": [distribution[f\"Number of {column}s\"].sum()],\n",
|
560 |
+
" }\n",
|
561 |
+
" ),\n",
|
562 |
+
" ],\n",
|
563 |
+
" ignore_index=True,\n",
|
564 |
+
" )\n",
|
565 |
+
" # rename columns\n",
|
566 |
+
" return distribution"
|
567 |
+
]
|
568 |
+
},
|
569 |
+
{
|
570 |
+
"cell_type": "code",
|
571 |
+
"execution_count": 107,
|
572 |
+
"metadata": {},
|
573 |
+
"outputs": [
|
574 |
+
{
|
575 |
+
"data": {
|
576 |
+
"text/html": [
|
577 |
+
"<div>\n",
|
578 |
+
"<style scoped>\n",
|
579 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
580 |
+
" vertical-align: middle;\n",
|
581 |
+
" }\n",
|
582 |
+
"\n",
|
583 |
+
" .dataframe tbody tr th {\n",
|
584 |
+
" vertical-align: top;\n",
|
585 |
+
" }\n",
|
586 |
+
"\n",
|
587 |
+
" .dataframe thead th {\n",
|
588 |
+
" text-align: right;\n",
|
589 |
+
" }\n",
|
590 |
+
"</style>\n",
|
591 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
592 |
+
" <thead>\n",
|
593 |
+
" <tr style=\"text-align: right;\">\n",
|
594 |
+
" <th></th>\n",
|
595 |
+
" <th>object_names per instance</th>\n",
|
596 |
+
" <th>Number of instances</th>\n",
|
597 |
+
" <th>Number of object_names</th>\n",
|
598 |
+
" </tr>\n",
|
599 |
+
" </thead>\n",
|
600 |
+
" <tbody>\n",
|
601 |
+
" <tr>\n",
|
602 |
+
" <th>0</th>\n",
|
603 |
+
" <td>3</td>\n",
|
604 |
+
" <td>93</td>\n",
|
605 |
+
" <td>279</td>\n",
|
606 |
+
" </tr>\n",
|
607 |
+
" <tr>\n",
|
608 |
+
" <th>1</th>\n",
|
609 |
+
" <td>4</td>\n",
|
610 |
+
" <td>57</td>\n",
|
611 |
+
" <td>228</td>\n",
|
612 |
+
" </tr>\n",
|
613 |
+
" <tr>\n",
|
614 |
+
" <th>2</th>\n",
|
615 |
+
" <td>5</td>\n",
|
616 |
+
" <td>53</td>\n",
|
617 |
+
" <td>265</td>\n",
|
618 |
+
" </tr>\n",
|
619 |
+
" <tr>\n",
|
620 |
+
" <th>3</th>\n",
|
621 |
+
" <td>6</td>\n",
|
622 |
+
" <td>32</td>\n",
|
623 |
+
" <td>192</td>\n",
|
624 |
+
" </tr>\n",
|
625 |
+
" <tr>\n",
|
626 |
+
" <th>4</th>\n",
|
627 |
+
" <td>7</td>\n",
|
628 |
+
" <td>27</td>\n",
|
629 |
+
" <td>189</td>\n",
|
630 |
+
" </tr>\n",
|
631 |
+
" <tr>\n",
|
632 |
+
" <th>5</th>\n",
|
633 |
+
" <td>8</td>\n",
|
634 |
+
" <td>24</td>\n",
|
635 |
+
" <td>192</td>\n",
|
636 |
+
" </tr>\n",
|
637 |
+
" <tr>\n",
|
638 |
+
" <th>6</th>\n",
|
639 |
+
" <td>9</td>\n",
|
640 |
+
" <td>27</td>\n",
|
641 |
+
" <td>243</td>\n",
|
642 |
+
" </tr>\n",
|
643 |
+
" <tr>\n",
|
644 |
+
" <th>7</th>\n",
|
645 |
+
" <td>10-50</td>\n",
|
646 |
+
" <td>227</td>\n",
|
647 |
+
" <td>4921</td>\n",
|
648 |
+
" </tr>\n",
|
649 |
+
" <tr>\n",
|
650 |
+
" <th>8</th>\n",
|
651 |
+
" <td>50-100</td>\n",
|
652 |
+
" <td>51</td>\n",
|
653 |
+
" <td>3683</td>\n",
|
654 |
+
" </tr>\n",
|
655 |
+
" <tr>\n",
|
656 |
+
" <th>9</th>\n",
|
657 |
+
" <td>100-1000</td>\n",
|
658 |
+
" <td>65</td>\n",
|
659 |
+
" <td>17027</td>\n",
|
660 |
+
" </tr>\n",
|
661 |
+
" <tr>\n",
|
662 |
+
" <th>10</th>\n",
|
663 |
+
" <td>1000+</td>\n",
|
664 |
+
" <td>7</td>\n",
|
665 |
+
" <td>20758</td>\n",
|
666 |
+
" </tr>\n",
|
667 |
+
" <tr>\n",
|
668 |
+
" <th>11</th>\n",
|
669 |
+
" <td>Total</td>\n",
|
670 |
+
" <td>663</td>\n",
|
671 |
+
" <td>47977</td>\n",
|
672 |
+
" </tr>\n",
|
673 |
+
" </tbody>\n",
|
674 |
+
"</table>\n",
|
675 |
+
"</div>"
|
676 |
+
],
|
677 |
+
"text/plain": [
|
678 |
+
" object_names per instance Number of instances Number of object_names\n",
|
679 |
+
"0 3 93 279\n",
|
680 |
+
"1 4 57 228\n",
|
681 |
+
"2 5 53 265\n",
|
682 |
+
"3 6 32 192\n",
|
683 |
+
"4 7 27 189\n",
|
684 |
+
"5 8 24 192\n",
|
685 |
+
"6 9 27 243\n",
|
686 |
+
"7 10-50 227 4921\n",
|
687 |
+
"8 50-100 51 3683\n",
|
688 |
+
"9 100-1000 65 17027\n",
|
689 |
+
"10 1000+ 7 20758\n",
|
690 |
+
"11 Total 663 47977"
|
691 |
+
]
|
692 |
+
},
|
693 |
+
"execution_count": 107,
|
694 |
+
"metadata": {},
|
695 |
+
"output_type": "execute_result"
|
696 |
+
}
|
697 |
+
],
|
698 |
+
"source": [
|
699 |
+
"get_distribution(full_df, \"object_name\")"
|
700 |
+
]
|
701 |
+
},
|
702 |
+
{
|
703 |
+
"cell_type": "code",
|
704 |
+
"execution_count": 100,
|
705 |
+
"metadata": {},
|
706 |
+
"outputs": [
|
707 |
+
{
|
708 |
+
"data": {
|
709 |
+
"text/html": [
|
710 |
+
"<div>\n",
|
711 |
+
"<style scoped>\n",
|
712 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
713 |
+
" vertical-align: middle;\n",
|
714 |
+
" }\n",
|
715 |
+
"\n",
|
716 |
+
" .dataframe tbody tr th {\n",
|
717 |
+
" vertical-align: top;\n",
|
718 |
+
" }\n",
|
719 |
+
"\n",
|
720 |
+
" .dataframe thead th {\n",
|
721 |
+
" text-align: right;\n",
|
722 |
+
" }\n",
|
723 |
+
"</style>\n",
|
724 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
725 |
+
" <thead>\n",
|
726 |
+
" <tr style=\"text-align: right;\">\n",
|
727 |
+
" <th></th>\n",
|
728 |
+
" <th>materials per instance</th>\n",
|
729 |
+
" <th>Number of instances</th>\n",
|
730 |
+
" <th>Number of materials</th>\n",
|
731 |
+
" </tr>\n",
|
732 |
+
" </thead>\n",
|
733 |
+
" <tbody>\n",
|
734 |
+
" <tr>\n",
|
735 |
+
" <th>0</th>\n",
|
736 |
+
" <td>3</td>\n",
|
737 |
+
" <td>30</td>\n",
|
738 |
+
" <td>90</td>\n",
|
739 |
+
" </tr>\n",
|
740 |
+
" <tr>\n",
|
741 |
+
" <th>1</th>\n",
|
742 |
+
" <td>4</td>\n",
|
743 |
+
" <td>16</td>\n",
|
744 |
+
" <td>64</td>\n",
|
745 |
+
" </tr>\n",
|
746 |
+
" <tr>\n",
|
747 |
+
" <th>2</th>\n",
|
748 |
+
" <td>5</td>\n",
|
749 |
+
" <td>14</td>\n",
|
750 |
+
" <td>70</td>\n",
|
751 |
+
" </tr>\n",
|
752 |
+
" <tr>\n",
|
753 |
+
" <th>3</th>\n",
|
754 |
+
" <td>6</td>\n",
|
755 |
+
" <td>9</td>\n",
|
756 |
+
" <td>54</td>\n",
|
757 |
+
" </tr>\n",
|
758 |
+
" <tr>\n",
|
759 |
+
" <th>4</th>\n",
|
760 |
+
" <td>7</td>\n",
|
761 |
+
" <td>10</td>\n",
|
762 |
+
" <td>70</td>\n",
|
763 |
+
" </tr>\n",
|
764 |
+
" <tr>\n",
|
765 |
+
" <th>5</th>\n",
|
766 |
+
" <td>8</td>\n",
|
767 |
+
" <td>6</td>\n",
|
768 |
+
" <td>48</td>\n",
|
769 |
+
" </tr>\n",
|
770 |
+
" <tr>\n",
|
771 |
+
" <th>6</th>\n",
|
772 |
+
" <td>9</td>\n",
|
773 |
+
" <td>5</td>\n",
|
774 |
+
" <td>45</td>\n",
|
775 |
+
" </tr>\n",
|
776 |
+
" <tr>\n",
|
777 |
+
" <th>7</th>\n",
|
778 |
+
" <td>10-50</td>\n",
|
779 |
+
" <td>88</td>\n",
|
780 |
+
" <td>1975</td>\n",
|
781 |
+
" </tr>\n",
|
782 |
+
" <tr>\n",
|
783 |
+
" <th>8</th>\n",
|
784 |
+
" <td>50-100</td>\n",
|
785 |
+
" <td>21</td>\n",
|
786 |
+
" <td>1409</td>\n",
|
787 |
+
" </tr>\n",
|
788 |
+
" <tr>\n",
|
789 |
+
" <th>9</th>\n",
|
790 |
+
" <td>100-1000</td>\n",
|
791 |
+
" <td>43</td>\n",
|
792 |
+
" <td>13030</td>\n",
|
793 |
+
" </tr>\n",
|
794 |
+
" <tr>\n",
|
795 |
+
" <th>10</th>\n",
|
796 |
+
" <td>1000+</td>\n",
|
797 |
+
" <td>12</td>\n",
|
798 |
+
" <td>34036</td>\n",
|
799 |
+
" </tr>\n",
|
800 |
+
" <tr>\n",
|
801 |
+
" <th>11</th>\n",
|
802 |
+
" <td>Total</td>\n",
|
803 |
+
" <td>254</td>\n",
|
804 |
+
" <td>50891</td>\n",
|
805 |
+
" </tr>\n",
|
806 |
+
" </tbody>\n",
|
807 |
+
"</table>\n",
|
808 |
+
"</div>"
|
809 |
+
],
|
810 |
+
"text/plain": [
|
811 |
+
" materials per instance Number of instances Number of materials\n",
|
812 |
+
"0 3 30 90\n",
|
813 |
+
"1 4 16 64\n",
|
814 |
+
"2 5 14 70\n",
|
815 |
+
"3 6 9 54\n",
|
816 |
+
"4 7 10 70\n",
|
817 |
+
"5 8 6 48\n",
|
818 |
+
"6 9 5 45\n",
|
819 |
+
"7 10-50 88 1975\n",
|
820 |
+
"8 50-100 21 1409\n",
|
821 |
+
"9 100-1000 43 13030\n",
|
822 |
+
"10 1000+ 12 34036\n",
|
823 |
+
"11 Total 254 50891"
|
824 |
+
]
|
825 |
+
},
|
826 |
+
"execution_count": 100,
|
827 |
+
"metadata": {},
|
828 |
+
"output_type": "execute_result"
|
829 |
+
}
|
830 |
+
],
|
831 |
+
"source": [
|
832 |
+
"get_distribution(full_df, \"material\")"
|
833 |
+
]
|
834 |
+
},
|
835 |
+
{
|
836 |
+
"cell_type": "markdown",
|
837 |
+
"metadata": {},
|
838 |
+
"source": [
|
839 |
+
"Production date could be used for a regression task, and the other fields could be used for a classification task."
|
840 |
+
]
|
841 |
+
},
|
842 |
+
{
|
843 |
+
"cell_type": "markdown",
|
844 |
+
"metadata": {},
|
845 |
+
"source": [
|
846 |
+
"### Year"
|
847 |
+
]
|
848 |
+
},
|
849 |
+
{
|
850 |
+
"cell_type": "code",
|
851 |
+
"execution_count": 101,
|
852 |
+
"metadata": {},
|
853 |
+
"outputs": [
|
854 |
+
{
|
855 |
+
"data": {
|
856 |
+
"text/html": [
|
857 |
+
"<div>\n",
|
858 |
+
"<style scoped>\n",
|
859 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
860 |
+
" vertical-align: middle;\n",
|
861 |
+
" }\n",
|
862 |
+
"\n",
|
863 |
+
" .dataframe tbody tr th {\n",
|
864 |
+
" vertical-align: top;\n",
|
865 |
+
" }\n",
|
866 |
+
"\n",
|
867 |
+
" .dataframe thead th {\n",
|
868 |
+
" text-align: right;\n",
|
869 |
+
" }\n",
|
870 |
+
"</style>\n",
|
871 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
872 |
+
" <thead>\n",
|
873 |
+
" <tr style=\"text-align: right;\">\n",
|
874 |
+
" <th></th>\n",
|
875 |
+
" <th>production.date.starts per instance</th>\n",
|
876 |
+
" <th>Number of instances</th>\n",
|
877 |
+
" <th>Number of production.date.starts</th>\n",
|
878 |
+
" </tr>\n",
|
879 |
+
" </thead>\n",
|
880 |
+
" <tbody>\n",
|
881 |
+
" <tr>\n",
|
882 |
+
" <th>0</th>\n",
|
883 |
+
" <td>1</td>\n",
|
884 |
+
" <td>275</td>\n",
|
885 |
+
" <td>275</td>\n",
|
886 |
+
" </tr>\n",
|
887 |
+
" <tr>\n",
|
888 |
+
" <th>1</th>\n",
|
889 |
+
" <td>2</td>\n",
|
890 |
+
" <td>129</td>\n",
|
891 |
+
" <td>258</td>\n",
|
892 |
+
" </tr>\n",
|
893 |
+
" <tr>\n",
|
894 |
+
" <th>2</th>\n",
|
895 |
+
" <td>3</td>\n",
|
896 |
+
" <td>75</td>\n",
|
897 |
+
" <td>225</td>\n",
|
898 |
+
" </tr>\n",
|
899 |
+
" <tr>\n",
|
900 |
+
" <th>3</th>\n",
|
901 |
+
" <td>4</td>\n",
|
902 |
+
" <td>72</td>\n",
|
903 |
+
" <td>288</td>\n",
|
904 |
+
" </tr>\n",
|
905 |
+
" <tr>\n",
|
906 |
+
" <th>4</th>\n",
|
907 |
+
" <td>5</td>\n",
|
908 |
+
" <td>45</td>\n",
|
909 |
+
" <td>225</td>\n",
|
910 |
+
" </tr>\n",
|
911 |
+
" <tr>\n",
|
912 |
+
" <th>5</th>\n",
|
913 |
+
" <td>6</td>\n",
|
914 |
+
" <td>32</td>\n",
|
915 |
+
" <td>192</td>\n",
|
916 |
+
" </tr>\n",
|
917 |
+
" <tr>\n",
|
918 |
+
" <th>6</th>\n",
|
919 |
+
" <td>7</td>\n",
|
920 |
+
" <td>20</td>\n",
|
921 |
+
" <td>140</td>\n",
|
922 |
+
" </tr>\n",
|
923 |
+
" <tr>\n",
|
924 |
+
" <th>7</th>\n",
|
925 |
+
" <td>8</td>\n",
|
926 |
+
" <td>16</td>\n",
|
927 |
+
" <td>128</td>\n",
|
928 |
+
" </tr>\n",
|
929 |
+
" <tr>\n",
|
930 |
+
" <th>8</th>\n",
|
931 |
+
" <td>9</td>\n",
|
932 |
+
" <td>21</td>\n",
|
933 |
+
" <td>189</td>\n",
|
934 |
+
" </tr>\n",
|
935 |
+
" <tr>\n",
|
936 |
+
" <th>9</th>\n",
|
937 |
+
" <td>10-50</td>\n",
|
938 |
+
" <td>199</td>\n",
|
939 |
+
" <td>4226</td>\n",
|
940 |
+
" </tr>\n",
|
941 |
+
" <tr>\n",
|
942 |
+
" <th>10</th>\n",
|
943 |
+
" <td>50-100</td>\n",
|
944 |
+
" <td>39</td>\n",
|
945 |
+
" <td>2661</td>\n",
|
946 |
+
" </tr>\n",
|
947 |
+
" <tr>\n",
|
948 |
+
" <th>11</th>\n",
|
949 |
+
" <td>100-1000</td>\n",
|
950 |
+
" <td>41</td>\n",
|
951 |
+
" <td>10259</td>\n",
|
952 |
+
" </tr>\n",
|
953 |
+
" <tr>\n",
|
954 |
+
" <th>12</th>\n",
|
955 |
+
" <td>1000+</td>\n",
|
956 |
+
" <td>4</td>\n",
|
957 |
+
" <td>7110</td>\n",
|
958 |
+
" </tr>\n",
|
959 |
+
" <tr>\n",
|
960 |
+
" <th>13</th>\n",
|
961 |
+
" <td>Total</td>\n",
|
962 |
+
" <td>968</td>\n",
|
963 |
+
" <td>26176</td>\n",
|
964 |
+
" </tr>\n",
|
965 |
+
" </tbody>\n",
|
966 |
+
"</table>\n",
|
967 |
+
"</div>"
|
968 |
+
],
|
969 |
+
"text/plain": [
|
970 |
+
" production.date.starts per instance Number of instances \\\n",
|
971 |
+
"0 1 275 \n",
|
972 |
+
"1 2 129 \n",
|
973 |
+
"2 3 75 \n",
|
974 |
+
"3 4 72 \n",
|
975 |
+
"4 5 45 \n",
|
976 |
+
"5 6 32 \n",
|
977 |
+
"6 7 20 \n",
|
978 |
+
"7 8 16 \n",
|
979 |
+
"8 9 21 \n",
|
980 |
+
"9 10-50 199 \n",
|
981 |
+
"10 50-100 39 \n",
|
982 |
+
"11 100-1000 41 \n",
|
983 |
+
"12 1000+ 4 \n",
|
984 |
+
"13 Total 968 \n",
|
985 |
+
"\n",
|
986 |
+
" Number of production.date.starts \n",
|
987 |
+
"0 275 \n",
|
988 |
+
"1 258 \n",
|
989 |
+
"2 225 \n",
|
990 |
+
"3 288 \n",
|
991 |
+
"4 225 \n",
|
992 |
+
"5 192 \n",
|
993 |
+
"6 140 \n",
|
994 |
+
"7 128 \n",
|
995 |
+
"8 189 \n",
|
996 |
+
"9 4226 \n",
|
997 |
+
"10 2661 \n",
|
998 |
+
"11 10259 \n",
|
999 |
+
"12 7110 \n",
|
1000 |
+
"13 26176 "
|
1001 |
+
]
|
1002 |
+
},
|
1003 |
+
"execution_count": 101,
|
1004 |
+
"metadata": {},
|
1005 |
+
"output_type": "execute_result"
|
1006 |
+
}
|
1007 |
+
],
|
1008 |
+
"source": [
|
1009 |
+
"get_distribution(full_df, \"production.date.start\", lower_bound=0)"
|
1010 |
+
]
|
1011 |
+
},
|
1012 |
+
{
|
1013 |
+
"cell_type": "code",
|
1014 |
+
"execution_count": 102,
|
1015 |
+
"metadata": {},
|
1016 |
+
"outputs": [
|
1017 |
+
{
|
1018 |
+
"data": {
|
1019 |
+
"text/html": [
|
1020 |
+
"<div>\n",
|
1021 |
+
"<style scoped>\n",
|
1022 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
1023 |
+
" vertical-align: middle;\n",
|
1024 |
+
" }\n",
|
1025 |
+
"\n",
|
1026 |
+
" .dataframe tbody tr th {\n",
|
1027 |
+
" vertical-align: top;\n",
|
1028 |
+
" }\n",
|
1029 |
+
"\n",
|
1030 |
+
" .dataframe thead th {\n",
|
1031 |
+
" text-align: right;\n",
|
1032 |
+
" }\n",
|
1033 |
+
"</style>\n",
|
1034 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
1035 |
+
" <thead>\n",
|
1036 |
+
" <tr style=\"text-align: right;\">\n",
|
1037 |
+
" <th></th>\n",
|
1038 |
+
" <th>production.date.ends per instance</th>\n",
|
1039 |
+
" <th>Number of instances</th>\n",
|
1040 |
+
" <th>Number of production.date.ends</th>\n",
|
1041 |
+
" </tr>\n",
|
1042 |
+
" </thead>\n",
|
1043 |
+
" <tbody>\n",
|
1044 |
+
" <tr>\n",
|
1045 |
+
" <th>0</th>\n",
|
1046 |
+
" <td>1</td>\n",
|
1047 |
+
" <td>285</td>\n",
|
1048 |
+
" <td>285</td>\n",
|
1049 |
+
" </tr>\n",
|
1050 |
+
" <tr>\n",
|
1051 |
+
" <th>1</th>\n",
|
1052 |
+
" <td>2</td>\n",
|
1053 |
+
" <td>120</td>\n",
|
1054 |
+
" <td>240</td>\n",
|
1055 |
+
" </tr>\n",
|
1056 |
+
" <tr>\n",
|
1057 |
+
" <th>2</th>\n",
|
1058 |
+
" <td>3</td>\n",
|
1059 |
+
" <td>63</td>\n",
|
1060 |
+
" <td>189</td>\n",
|
1061 |
+
" </tr>\n",
|
1062 |
+
" <tr>\n",
|
1063 |
+
" <th>3</th>\n",
|
1064 |
+
" <td>4</td>\n",
|
1065 |
+
" <td>46</td>\n",
|
1066 |
+
" <td>184</td>\n",
|
1067 |
+
" </tr>\n",
|
1068 |
+
" <tr>\n",
|
1069 |
+
" <th>4</th>\n",
|
1070 |
+
" <td>5</td>\n",
|
1071 |
+
" <td>32</td>\n",
|
1072 |
+
" <td>160</td>\n",
|
1073 |
+
" </tr>\n",
|
1074 |
+
" <tr>\n",
|
1075 |
+
" <th>5</th>\n",
|
1076 |
+
" <td>6</td>\n",
|
1077 |
+
" <td>37</td>\n",
|
1078 |
+
" <td>222</td>\n",
|
1079 |
+
" </tr>\n",
|
1080 |
+
" <tr>\n",
|
1081 |
+
" <th>6</th>\n",
|
1082 |
+
" <td>7</td>\n",
|
1083 |
+
" <td>26</td>\n",
|
1084 |
+
" <td>182</td>\n",
|
1085 |
+
" </tr>\n",
|
1086 |
+
" <tr>\n",
|
1087 |
+
" <th>7</th>\n",
|
1088 |
+
" <td>8</td>\n",
|
1089 |
+
" <td>20</td>\n",
|
1090 |
+
" <td>160</td>\n",
|
1091 |
+
" </tr>\n",
|
1092 |
+
" <tr>\n",
|
1093 |
+
" <th>8</th>\n",
|
1094 |
+
" <td>9</td>\n",
|
1095 |
+
" <td>19</td>\n",
|
1096 |
+
" <td>171</td>\n",
|
1097 |
+
" </tr>\n",
|
1098 |
+
" <tr>\n",
|
1099 |
+
" <th>9</th>\n",
|
1100 |
+
" <td>10-50</td>\n",
|
1101 |
+
" <td>210</td>\n",
|
1102 |
+
" <td>4562</td>\n",
|
1103 |
+
" </tr>\n",
|
1104 |
+
" <tr>\n",
|
1105 |
+
" <th>10</th>\n",
|
1106 |
+
" <td>50-100</td>\n",
|
1107 |
+
" <td>41</td>\n",
|
1108 |
+
" <td>2588</td>\n",
|
1109 |
+
" </tr>\n",
|
1110 |
+
" <tr>\n",
|
1111 |
+
" <th>11</th>\n",
|
1112 |
+
" <td>100-1000</td>\n",
|
1113 |
+
" <td>47</td>\n",
|
1114 |
+
" <td>11609</td>\n",
|
1115 |
+
" </tr>\n",
|
1116 |
+
" <tr>\n",
|
1117 |
+
" <th>12</th>\n",
|
1118 |
+
" <td>1000+</td>\n",
|
1119 |
+
" <td>3</td>\n",
|
1120 |
+
" <td>5696</td>\n",
|
1121 |
+
" </tr>\n",
|
1122 |
+
" <tr>\n",
|
1123 |
+
" <th>13</th>\n",
|
1124 |
+
" <td>Total</td>\n",
|
1125 |
+
" <td>949</td>\n",
|
1126 |
+
" <td>26248</td>\n",
|
1127 |
+
" </tr>\n",
|
1128 |
+
" </tbody>\n",
|
1129 |
+
"</table>\n",
|
1130 |
+
"</div>"
|
1131 |
+
],
|
1132 |
+
"text/plain": [
|
1133 |
+
" production.date.ends per instance Number of instances \\\n",
|
1134 |
+
"0 1 285 \n",
|
1135 |
+
"1 2 120 \n",
|
1136 |
+
"2 3 63 \n",
|
1137 |
+
"3 4 46 \n",
|
1138 |
+
"4 5 32 \n",
|
1139 |
+
"5 6 37 \n",
|
1140 |
+
"6 7 26 \n",
|
1141 |
+
"7 8 20 \n",
|
1142 |
+
"8 9 19 \n",
|
1143 |
+
"9 10-50 210 \n",
|
1144 |
+
"10 50-100 41 \n",
|
1145 |
+
"11 100-1000 47 \n",
|
1146 |
+
"12 1000+ 3 \n",
|
1147 |
+
"13 Total 949 \n",
|
1148 |
+
"\n",
|
1149 |
+
" Number of production.date.ends \n",
|
1150 |
+
"0 285 \n",
|
1151 |
+
"1 240 \n",
|
1152 |
+
"2 189 \n",
|
1153 |
+
"3 184 \n",
|
1154 |
+
"4 160 \n",
|
1155 |
+
"5 222 \n",
|
1156 |
+
"6 182 \n",
|
1157 |
+
"7 160 \n",
|
1158 |
+
"8 171 \n",
|
1159 |
+
"9 4562 \n",
|
1160 |
+
"10 2588 \n",
|
1161 |
+
"11 11609 \n",
|
1162 |
+
"12 5696 \n",
|
1163 |
+
"13 26248 "
|
1164 |
+
]
|
1165 |
+
},
|
1166 |
+
"execution_count": 102,
|
1167 |
+
"metadata": {},
|
1168 |
+
"output_type": "execute_result"
|
1169 |
+
}
|
1170 |
+
],
|
1171 |
+
"source": [
|
1172 |
+
"get_distribution(full_df, \"production.date.end\", lower_bound=0)"
|
1173 |
+
]
|
1174 |
+
},
|
1175 |
+
{
|
1176 |
+
"cell_type": "code",
|
1177 |
+
"execution_count": 91,
|
1178 |
+
"metadata": {},
|
1179 |
+
"outputs": [
|
1180 |
+
{
|
1181 |
+
"data": {
|
1182 |
+
"text/html": [
|
1183 |
+
"<div>\n",
|
1184 |
+
"<style scoped>\n",
|
1185 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
1186 |
+
" vertical-align: middle;\n",
|
1187 |
+
" }\n",
|
1188 |
+
"\n",
|
1189 |
+
" .dataframe tbody tr th {\n",
|
1190 |
+
" vertical-align: top;\n",
|
1191 |
+
" }\n",
|
1192 |
+
"\n",
|
1193 |
+
" .dataframe thead th {\n",
|
1194 |
+
" text-align: right;\n",
|
1195 |
+
" }\n",
|
1196 |
+
"</style>\n",
|
1197 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
1198 |
+
" <thead>\n",
|
1199 |
+
" <tr style=\"text-align: right;\">\n",
|
1200 |
+
" <th></th>\n",
|
1201 |
+
" <th>start_year</th>\n",
|
1202 |
+
" <th>end_year</th>\n",
|
1203 |
+
" <th>year_diff</th>\n",
|
1204 |
+
" <th>mid_year</th>\n",
|
1205 |
+
" </tr>\n",
|
1206 |
+
" </thead>\n",
|
1207 |
+
" <tbody>\n",
|
1208 |
+
" <tr>\n",
|
1209 |
+
" <th>2</th>\n",
|
1210 |
+
" <td>-3000</td>\n",
|
1211 |
+
" <td>-3000</td>\n",
|
1212 |
+
" <td>0</td>\n",
|
1213 |
+
" <td>-3000</td>\n",
|
1214 |
+
" </tr>\n",
|
1215 |
+
" <tr>\n",
|
1216 |
+
" <th>142</th>\n",
|
1217 |
+
" <td>-600</td>\n",
|
1218 |
+
" <td>-332</td>\n",
|
1219 |
+
" <td>268</td>\n",
|
1220 |
+
" <td>-466</td>\n",
|
1221 |
+
" </tr>\n",
|
1222 |
+
" <tr>\n",
|
1223 |
+
" <th>143</th>\n",
|
1224 |
+
" <td>-1069</td>\n",
|
1225 |
+
" <td>-716</td>\n",
|
1226 |
+
" <td>353</td>\n",
|
1227 |
+
" <td>-893</td>\n",
|
1228 |
+
" </tr>\n",
|
1229 |
+
" <tr>\n",
|
1230 |
+
" <th>147</th>\n",
|
1231 |
+
" <td>-716</td>\n",
|
1232 |
+
" <td>-332</td>\n",
|
1233 |
+
" <td>384</td>\n",
|
1234 |
+
" <td>-524</td>\n",
|
1235 |
+
" </tr>\n",
|
1236 |
+
" <tr>\n",
|
1237 |
+
" <th>148</th>\n",
|
1238 |
+
" <td>-716</td>\n",
|
1239 |
+
" <td>-332</td>\n",
|
1240 |
+
" <td>384</td>\n",
|
1241 |
+
" <td>-524</td>\n",
|
1242 |
+
" </tr>\n",
|
1243 |
+
" <tr>\n",
|
1244 |
+
" <th>...</th>\n",
|
1245 |
+
" <td>...</td>\n",
|
1246 |
+
" <td>...</td>\n",
|
1247 |
+
" <td>...</td>\n",
|
1248 |
+
" <td>...</td>\n",
|
1249 |
+
" </tr>\n",
|
1250 |
+
" <tr>\n",
|
1251 |
+
" <th>60081</th>\n",
|
1252 |
+
" <td>218</td>\n",
|
1253 |
+
" <td>222</td>\n",
|
1254 |
+
" <td>4</td>\n",
|
1255 |
+
" <td>220</td>\n",
|
1256 |
+
" </tr>\n",
|
1257 |
+
" <tr>\n",
|
1258 |
+
" <th>60082</th>\n",
|
1259 |
+
" <td>1996</td>\n",
|
1260 |
+
" <td>1996</td>\n",
|
1261 |
+
" <td>0</td>\n",
|
1262 |
+
" <td>1996</td>\n",
|
1263 |
+
" </tr>\n",
|
1264 |
+
" <tr>\n",
|
1265 |
+
" <th>60083</th>\n",
|
1266 |
+
" <td>2016</td>\n",
|
1267 |
+
" <td>2016</td>\n",
|
1268 |
+
" <td>0</td>\n",
|
1269 |
+
" <td>2016</td>\n",
|
1270 |
+
" </tr>\n",
|
1271 |
+
" <tr>\n",
|
1272 |
+
" <th>60084</th>\n",
|
1273 |
+
" <td>1996</td>\n",
|
1274 |
+
" <td>1996</td>\n",
|
1275 |
+
" <td>0</td>\n",
|
1276 |
+
" <td>1996</td>\n",
|
1277 |
+
" </tr>\n",
|
1278 |
+
" <tr>\n",
|
1279 |
+
" <th>60085</th>\n",
|
1280 |
+
" <td>1996</td>\n",
|
1281 |
+
" <td>1996</td>\n",
|
1282 |
+
" <td>0</td>\n",
|
1283 |
+
" <td>1996</td>\n",
|
1284 |
+
" </tr>\n",
|
1285 |
+
" </tbody>\n",
|
1286 |
+
"</table>\n",
|
1287 |
+
"<p>26016 rows × 4 columns</p>\n",
|
1288 |
+
"</div>"
|
1289 |
+
],
|
1290 |
+
"text/plain": [
|
1291 |
+
" start_year end_year year_diff mid_year\n",
|
1292 |
+
"2 -3000 -3000 0 -3000\n",
|
1293 |
+
"142 -600 -332 268 -466\n",
|
1294 |
+
"143 -1069 -716 353 -893\n",
|
1295 |
+
"147 -716 -332 384 -524\n",
|
1296 |
+
"148 -716 -332 384 -524\n",
|
1297 |
+
"... ... ... ... ...\n",
|
1298 |
+
"60081 218 222 4 220\n",
|
1299 |
+
"60082 1996 1996 0 1996\n",
|
1300 |
+
"60083 2016 2016 0 2016\n",
|
1301 |
+
"60084 1996 1996 0 1996\n",
|
1302 |
+
"60085 1996 1996 0 1996\n",
|
1303 |
+
"\n",
|
1304 |
+
"[26016 rows x 4 columns]"
|
1305 |
+
]
|
1306 |
+
},
|
1307 |
+
"execution_count": 91,
|
1308 |
+
"metadata": {},
|
1309 |
+
"output_type": "execute_result"
|
1310 |
+
}
|
1311 |
+
],
|
1312 |
+
"source": [
|
1313 |
+
"year_df = pd.DataFrame()\n",
|
1314 |
+
"year_df[\"start_year\"] = full_df[\"production.date.start\"]\n",
|
1315 |
+
"year_df[\"end_year\"] = full_df[\"production.date.end\"]\n",
|
1316 |
+
"year_df = year_df.dropna()\n",
|
1317 |
+
"\n",
|
1318 |
+
"non_numeric_instances = year_df[\n",
|
1319 |
+
" pd.to_numeric(year_df[\"start_year\"], errors=\"coerce\").isna()\n",
|
1320 |
+
" | pd.to_numeric(year_df[\"end_year\"], errors=\"coerce\").isna()\n",
|
1321 |
+
"]\n",
|
1322 |
+
"# get non-numeric instances\n",
|
1323 |
+
"year_df = year_df[~year_df.index.isin(non_numeric_instances.index)]\n",
|
1324 |
+
"year_df[\"start_year\"] = year_df[\"start_year\"].astype(int)\n",
|
1325 |
+
"year_df[\"end_year\"] = year_df[\"end_year\"].astype(int)\n",
|
1326 |
+
"year_df[\"year_diff\"] = year_df[\"end_year\"] - year_df[\"start_year\"]\n",
|
1327 |
+
"\n",
|
1328 |
+
"year_df[\"mid_year\"] = year_df[\"start_year\"] + year_df[\"year_diff\"] / 2\n",
|
1329 |
+
"year_df[\"mid_year\"] = year_df[\"mid_year\"].apply(lambda x: int(np.floor(x)))\n",
|
1330 |
+
"year_df"
|
1331 |
+
]
|
1332 |
+
},
|
1333 |
+
{
|
1334 |
+
"cell_type": "code",
|
1335 |
+
"execution_count": 93,
|
1336 |
+
"metadata": {},
|
1337 |
+
"outputs": [
|
1338 |
+
{
|
1339 |
+
"data": {
|
1340 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHHCAYAAACiOWx7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABEHUlEQVR4nO3df3zP9f7/8ft7s723sZ/YZjWzIhQdoTQRsTasRE4dUaFFTtvJIp2UJCpZ0RDJOZkU+XUcdRCWn6Xlxwr5kVJ+xraKmZ/bbK/vH332+nrb8DJv29vcrpeLS72er8f79Xq83k/b7l7v5/s9m2EYhgAAAHBBbhXdAAAAwNWA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEVGLDhw+XzWYrl3O1bdtWbdu2NbdXrVolm82mefPmlcv5e/furTp16pTLucrq+PHjevLJJxUaGiqbzaakpCSnn+NS5txms2n48OFO7wGorAhNwFVi2rRpstls5h8vLy+FhYUpNjZW48eP17Fjx5xynoMHD2r48OHatGmTU47nTK7cmxVvvPGGpk2bpr///e/66KOP9Nhjj523tk6dOrLZbIqOji51/7/+9S/z78LGjRuvSL9vvvmmbDabli5dWur+Tp06yd/fXwcPHrwi5wdcjgHgqpCammpIMkaMGGF89NFHxtSpU4033njDiImJMWw2mxEREWFs3rzZ4TEFBQXGqVOnLuk8GzZsMCQZqampl/S4vLw8Iy8vz9xeuXKlIcmYO3fuJR2nrL3l5+cbp0+fdtq5roQWLVoYd911l6XaiIgIw8vLy3BzczMOHTpUYn+bNm0MLy8vQ5KxYcMGc/xS5lyS8corr5x3f35+vtG4cWPjhhtuME6ePOmwb86cOYYkY+LEiZbOBVQG3GkCrjIdO3bUo48+qj59+mjIkCFaunSpvvjiC2VnZ6tz5846deqUWVulShV5eXld0X5OnjwpSfL09JSnp+cVPdeFeHh4yG63V9j5rcjOzlZAQIDl+rvuukvVqlXT7NmzHcYPHDigL7/8UnFxcSUe48w59/Dw0JQpU7Rnzx6NHDnSHD927JiSkpJ05513qn///k4514UUFRXp9OnTV/w8wMUQmoBKoF27dnr55Ze1d+9effzxx+Z4aetb0tLS1KpVKwUEBKhatWqqX7++XnzxRUl/rkO6/fbbJUl9+vQxX/6ZNm2apD/XLTVq1EgZGRm6++675ePjYz723DVNxQoLC/Xiiy8qNDRUVatWVefOnbV//36Hmjp16qh3794lHnv2MS/WW2lrmk6cOKFBgwYpPDxcdrtd9evX19tvvy3DMBzqbDabEhMTtWDBAjVq1Eh2u1233HKLlixZUvoTfo7s7GzFx8crJCREXl5e+stf/qIPP/zQ3F+8vmv37t1atGiR2fuePXsueFwvLy89+OCDmjlzpsP4J598osDAQMXGxpZ4TGlznpeXp2effVY1a9aUr6+vOnfurAMHDli6tuJg9Pbbb2v79u2SpKFDhyo7O1tTpkyRm5ubcnJylJSUZD7PdevW1ejRo1VUVORwrLffflstW7ZU9erV5e3trWbNmpW65q14PmbMmKFbbrlFdrvd8lwAV1KVim4AgHM89thjevHFF7Vs2TL17du31Jpt27bpvvvu06233qoRI0bIbrdr165dWrt2rSSpYcOGGjFihIYNG6Z+/fqpdevWkqSWLVuax/jjjz/UsWNHde/eXY8++qhCQkIu2Nfrr78um82mf/7zn8rOzlZKSoqio6O1adMmeXt7W74+K72dzTAMde7cWStXrlR8fLyaNGmipUuXavDgwfr111/1zjvvONR/9dVXmj9/vp5++mn5+vpq/Pjx6tatm/bt26fq1auft69Tp06pbdu22rVrlxITExUZGam5c+eqd+/eysnJ0YABA9SwYUN99NFHevbZZ3X99ddr0KBBkqSaNWte9Lp79OihmJgY/fzzz7rxxhslSTNnztRf//pXeXh4WHrunnzySX388cfq0aOHWrZsqRUrVpR6l+p8Ro0apQULFuipp55SSkqKJk6cqMGDB6tx48Y6efKk2rRpo19//VVPPfWUateura+//lpDhgzRoUOHlJKSYh5n3Lhx6ty5s3r27Kn8/HzNmjVLDz30kBYuXFiinxUrVmjOnDlKTExUjRo1XH6RP64RFf36IABritc0nb1+5Vz+/v7GbbfdZm6/8sorxtlf5u+8844hyfjtt9/Oe4wLrRtq06aNIcmYPHlyqfvatGljbhevabruuuuM3Nxcc7x4Lcy4cePMsYiICKNXr14XPeaFeuvVq5cRERFhbi9YsMCQZLz22msOdX/9618Nm81m7Nq1yxyTZHh6ejqMbd682ZBkTJgwocS5zpaSkmJIMj7++GNzLD8/34iKijKqVavmcO0RERFGXFzcBY93bu2ZM2eM0NBQY+TIkYZhGMb27dsNScbq1atL/Ttx7pxv2rTJkGQ8/fTTDsfv0aPHRdc0nW3evHmGJCMoKMhhjdPIkSONqlWrGj/++KND/QsvvGC4u7sb+/btM8fOXReVn59vNGrUyGjXrp3DuCTDzc3N2LZtm6XegPLCy3NAJVKtWrULvouueD3Np59+WuKlE6vsdrv69Oljuf7xxx+Xr6+vuf3Xv/5VtWrV0uLFi8t0fqsWL14sd3d3PfPMMw7jgwYNkmEY+vzzzx3Go6OjzTs5knTrrbfKz89Pv/zyy0XPExoaqkceecQc8/Dw0DPPPKPjx49r9erVl3Ud7u7uevjhh/XJJ59IkmbMmKHw8HDzTtvFFD/P5z4Pl/pxB926dVOnTp10+PBhTZw40bxLOHfuXLVu3VqBgYH6/fffzT/R0dEqLCzUmjVrzGOcfWfxyJEjOnr0qFq3bq1vv/22xPnatGmjm2+++ZJ6BK40QhNQiRw/ftwhoJzrb3/7m+666y49+eSTCgkJUffu3TVnzpxLClDXXXfdJS34rlevnsO2zWZT3bp1L7qe53Lt3btXYWFhJZ6Phg0bmvvPVrt27RLHCAwM1JEjRy56nnr16snNzfHb6fnOUxY9evTQ9u3btXnzZs2cOVPdu3e3/FlMe/fulZubm0MglKT69etfch/Fa8qaN29ujv30009asmSJatas6fCn+KMSsrOzzdqFCxfqzjvvlJeXl4KCglSzZk299957Onr0aIlzRUZGXnJ/wJXGmiagkjhw4ICOHj2qunXrnrfG29tba9as0cqVK7Vo0SItWbJEs2fPVrt27bRs2TK5u7tf9DyXsg7JqvMFgMLCQks9OcP5zmOcs2i8IrRo0UI33nijkpKStHv3bvXo0aOiWzIVFRXp3nvv1fPPP1/q/ptuukmS9OWXX6pz5866++67NWnSJNWqVUseHh5KTU0tsdBdujJ/z4DLRWgCKomPPvpIkkp9R9XZ3Nzc1L59e7Vv315jx47VG2+8oZdeekkrV65UdHS00z9B/KeffnLYNgxDu3bt0q233mqOBQYGKicnp8Rj9+7dqxtuuMHcvpTeIiIi9MUXX+jYsWMOd5t++OEHc78zREREaMuWLSoqKnK42+Ts8zzyyCN67bXX1LBhQzVp0uSS+isqKtLPP//scHdp586dTunrxhtv1PHjx8/7IZzF/vOf/8jLy0tLly51+GiI1NRUp/QBlAdengMqgRUrVmjkyJGKjIxUz549z1t3+PDhEmPFP4Dz8vIkSVWrVpWkUkNMWUyfPt1hndW8efN06NAhdezY0Ry78cYb9c033yg/P98cW7hwYYmPJriU3jp16qTCwkK9++67DuPvvPOObDabw/kvR6dOnZSZmenwWUpnzpzRhAkTVK1aNbVp08Yp53nyySf1yiuvaMyYMZf0uOLrHD9+vMP42e9quxwPP/yw0tPTS/3U8JycHJ05c0bSn3fybDabCgsLzf179uzRggULnNIHUB640wRcZT7//HP98MMPOnPmjLKysrRixQqlpaUpIiJCn3322QU/2HDEiBFas2aN4uLiFBERoezsbE2aNEnXX3+9WrVqJenPABMQEKDJkyfL19dXVatWVYsWLcq8xiQoKEitWrVSnz59lJWVpZSUFNWtW9fhYxGefPJJzZs3Tx06dNDDDz+sn3/+WR9//HGJdTiX0tv999+ve+65Ry+99JL27Nmjv/zlL1q2bJk+/fRTJSUllTh2WfXr10/vv/++evfurYyMDNWpU0fz5s3T2rVrlZKScsE1ZpciIiKiTL8nrkmTJnrkkUc0adIkHT16VC1bttTy5cu1a9cup/Q1ePBgffbZZ7rvvvvUu3dvNWvWTCdOnND333+vefPmac+ePapRo4bi4uI0duxYdejQQT169FB2drYmTpyounXrasuWLU7pBbjSCE3AVWbYsGGS/vwE7qCgIDVu3FgpKSnq06fPRX9Ad+7cWXv27NHUqVP1+++/q0aNGmrTpo1effVV+fv7S/rznV8ffvihhgwZov79++vMmTNKTU0tc2h68cUXtWXLFo0aNUrHjh1T+/btNWnSJPn4+Jg1sbGxGjNmjMaOHaukpCQ1b95cCxcuND/PqNil9Obm5qbPPvtMw4YN0+zZs5Wamqo6derorbfeKnHcy+Ht7a1Vq1bphRde0Icffqjc3FzVr19fqamppX5gZ0WYOnWqatasqRkzZmjBggVq166dFi1apPDw8Ms+to+Pj1avXq033nhDc+fO1fTp0+Xn56ebbrrJ4e9Vu3bt9MEHH+jNN99UUlKSIiMjNXr0aO3Zs4fQhKuGzXCFVY4AAAAujjVNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAI+p8lJioqKdPDgQfn6+jr911AAAIArwzAMHTt2TGFhYSV+8fa5CE1OcvDgQad8UBwAACh/+/fv1/XXX3/BGkKTkxR/EvP+/fvl5+dXwd04T0FBgZYtW6aYmBh5eHhUdDs4C3Pj2pgf18b8uK7ynpvc3FyFh4db+pVHhCYnKX5Jzs/Pr9KFJh8fH/n5+fGNxcUwN66N+XFtzI/rqqi5sbK0hoXgAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFVSq6AQAAULnVeWHRRWv2vBlXDp1cHu40AQAAWFChoWnNmjW6//77FRYWJpvNpgULFjjsNwxDw4YNU61ateTt7a3o6Gj99NNPDjWHDx9Wz5495efnp4CAAMXHx+v48eMONVu2bFHr1q3l5eWl8PBwJScnl+hl7ty5atCggby8vNS4cWMtXrzY6dcLAACuXhUamk6cOKG//OUvmjhxYqn7k5OTNX78eE2ePFnr1q1T1apVFRsbq9OnT5s1PXv21LZt25SWlqaFCxdqzZo16tevn7k/NzdXMTExioiIUEZGht566y0NHz5cU6ZMMWu+/vprPfLII4qPj9d3332nLl26qEuXLtq6deuVu3gAAHBVqdA1TR07dlTHjh1L3WcYhlJSUjR06FA98MADkqTp06crJCRECxYsUPfu3bVjxw4tWbJEGzZsUPPmzSVJEyZMUKdOnfT2228rLCxMM2bMUH5+vqZOnSpPT0/dcsst2rRpk8aOHWuGq3HjxqlDhw4aPHiwJGnkyJFKS0vTu+++q8mTJ5fDMwEAAFydyy4E3717tzIzMxUdHW2O+fv7q0WLFkpPT1f37t2Vnp6ugIAAMzBJUnR0tNzc3LRu3Tp17dpV6enpuvvuu+Xp6WnWxMbGavTo0Tpy5IgCAwOVnp6ugQMHOpw/Nja2xMuFZ8vLy1NeXp65nZubK0kqKChQQUHB5V6+yyi+lsp0TZUFc+PamB/XxvyUL7u7cdGac+ekvObmUs7jsqEpMzNTkhQSEuIwHhISYu7LzMxUcHCww/4qVaooKCjIoSYyMrLEMYr3BQYGKjMz84LnKc2oUaP06quvlhhftmyZfHx8rFziVSUtLa2iW8B5MDeujflxbcxP+Ui+4+I1564lLq+5OXnypOValw1Nrm7IkCEOd6dyc3MVHh6umJgY+fn5VWBnzlVQUKC0tDTde++98vDwqOh2cBbmxrUxP66N+SlfjYYvvWjN1uGxksp/bopfKbLCZUNTaGioJCkrK0u1atUyx7OystSkSROzJjs72+FxZ86c0eHDh83Hh4aGKisry6GmePtiNcX7S2O322W320uMe3h4VMovwMp6XZUBc+PamB/XxvyUj7xC20Vrzp2H8pqbSzmHy35OU2RkpEJDQ7V8+XJzLDc3V+vWrVNUVJQkKSoqSjk5OcrIyDBrVqxYoaKiIrVo0cKsWbNmjcNrlmlpaapfv74CAwPNmrPPU1xTfB4AAIAKDU3Hjx/Xpk2btGnTJkl/Lv7etGmT9u3bJ5vNpqSkJL322mv67LPP9P333+vxxx9XWFiYunTpIklq2LChOnTooL59+2r9+vVau3atEhMT1b17d4WFhUmSevToIU9PT8XHx2vbtm2aPXu2xo0b5/DS2oABA7RkyRKNGTNGP/zwg4YPH66NGzcqMTGxvJ8SAADgoir05bmNGzfqnnvuMbeLg0yvXr00bdo0Pf/88zpx4oT69eunnJwctWrVSkuWLJGXl5f5mBkzZigxMVHt27eXm5ubunXrpvHjx5v7/f39tWzZMiUkJKhZs2aqUaOGhg0b5vBZTi1bttTMmTM1dOhQvfjii6pXr54WLFigRo0alcOzAAAArgYVGpratm0rwzj/2xBtNptGjBihESNGnLcmKChIM2fOvOB5br31Vn355ZcXrHnooYf00EMPXbhhAABwzXLZNU0AAACuhNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYIFLh6bCwkK9/PLLioyMlLe3t2688UaNHDlShmGYNYZhaNiwYapVq5a8vb0VHR2tn376yeE4hw8fVs+ePeXn56eAgADFx8fr+PHjDjVbtmxR69at5eXlpfDwcCUnJ5fLNQIAgKuDS4em0aNH67333tO7776rHTt2aPTo0UpOTtaECRPMmuTkZI0fP16TJ0/WunXrVLVqVcXGxur06dNmTc+ePbVt2zalpaVp4cKFWrNmjfr162fuz83NVUxMjCIiIpSRkaG33npLw4cP15QpU8r1egEAgOuqUtENXMjXX3+tBx54QHFxcZKkOnXq6JNPPtH69esl/XmXKSUlRUOHDtUDDzwgSZo+fbpCQkK0YMECde/eXTt27NCSJUu0YcMGNW/eXJI0YcIEderUSW+//bbCwsI0Y8YM5efna+rUqfL09NQtt9yiTZs2aezYsQ7hCgAAXLtcOjS1bNlSU6ZM0Y8//qibbrpJmzdv1ldffaWxY8dKknbv3q3MzExFR0ebj/H391eLFi2Unp6u7t27Kz09XQEBAWZgkqTo6Gi5ublp3bp16tq1q9LT03X33XfL09PTrImNjdXo0aN15MgRBQYGlugtLy9PeXl55nZubq4kqaCgQAUFBU5/LipK8bVUpmuqLJgb18b8uDbmp3zZ3Y2L1pw7J+U1N5dyHpcOTS+88IJyc3PVoEEDubu7q7CwUK+//rp69uwpScrMzJQkhYSEODwuJCTE3JeZmang4GCH/VWqVFFQUJBDTWRkZIljFO8rLTSNGjVKr776aonxZcuWycfHpyyX69LS0tIqugWcB3Pj2pgf18b8lI/kOy5es3jxYoft8pqbkydPWq516dA0Z84czZgxQzNnzjRfMktKSlJYWJh69epVob0NGTJEAwcONLdzc3MVHh6umJgY+fn5VWBnzlVQUKC0tDTde++98vDwqOh2cBbmxrUxP66N+SlfjYYvvWjN1uGxksp/bopfKbLCpUPT4MGD9cILL6h79+6SpMaNG2vv3r0aNWqUevXqpdDQUElSVlaWatWqZT4uKytLTZo0kSSFhoYqOzvb4bhnzpzR4cOHzceHhoYqKyvLoaZ4u7jmXHa7XXa7vcS4h4dHpfwCrKzXVRkwN66N+XFtzE/5yCu0XbTm3Hkor7m5lHO49LvnTp48KTc3xxbd3d1VVFQkSYqMjFRoaKiWL19u7s/NzdW6desUFRUlSYqKilJOTo4yMjLMmhUrVqioqEgtWrQwa9asWePwumZaWprq169f6ktzAADg2uPSoen+++/X66+/rkWLFmnPnj3673//q7Fjx6pr166SJJvNpqSkJL322mv67LPP9P333+vxxx9XWFiYunTpIklq2LChOnTooL59+2r9+vVau3atEhMT1b17d4WFhUmSevToIU9PT8XHx2vbtm2aPXu2xo0b5/DyGwAAuLa59MtzEyZM0Msvv6ynn35a2dnZCgsL01NPPaVhw4aZNc8//7xOnDihfv36KScnR61atdKSJUvk5eVl1syYMUOJiYlq37693Nzc1K1bN40fP97c7+/vr2XLlikhIUHNmjVTjRo1NGzYMD5uAAAAmFw6NPn6+iolJUUpKSnnrbHZbBoxYoRGjBhx3pqgoCDNnDnzgue69dZb9eWXX5a1VQAAUMm59MtzAAAAroLQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAVlCk2//PKLs/sAAABwaWUKTXXr1tU999yjjz/+WKdPn3Z2TwAAAC6nTKHp22+/1a233qqBAwcqNDRUTz31lNavX+/s3gAAAFxGmUJTkyZNNG7cOB08eFBTp07VoUOH1KpVKzVq1Ehjx47Vb7/95uw+AQAAKtRlLQSvUqWKHnzwQc2dO1ejR4/Wrl279Nxzzyk8PFyPP/64Dh065Kw+AQAAKtRlhaaNGzfq6aefVq1atTR27Fg999xz+vnnn5WWlqaDBw/qgQcecFafAAAAFapMoWns2LFq3LixWrZsqYMHD2r69Onau3evXnvtNUVGRqp169aaNm2avv3228tu8Ndff9Wjjz6q6tWry9vbW40bN9bGjRvN/YZhaNiwYapVq5a8vb0VHR2tn376yeEYhw8fVs+ePeXn56eAgADFx8fr+PHjDjVbtmxR69at5eXlpfDwcCUnJ1927wAAoPIoU2h677331KNHD+3du1cLFizQfffdJzc3x0MFBwfrgw8+uKzmjhw5orvuukseHh76/PPPtX37do0ZM0aBgYFmTXJyssaPH6/Jkydr3bp1qlq1qmJjYx3e1dezZ09t27ZNaWlpWrhwodasWaN+/fqZ+3NzcxUTE6OIiAhlZGTorbfe0vDhwzVlypTL6h8AAFQeVcryoHPv5JTG09NTvXr1KsvhTaNHj1Z4eLhSU1PNscjISPP/DcNQSkqKhg4dar4UOH36dIWEhGjBggXq3r27duzYoSVLlmjDhg1q3ry5JGnChAnq1KmT3n77bYWFhWnGjBnKz8/X1KlT5enpqVtuuUWbNm3S2LFjHcIVAAC4dpUpNKWmpqpatWp66KGHHMbnzp2rkydPXnZYKvbZZ58pNjZWDz30kFavXq3rrrtOTz/9tPr27StJ2r17tzIzMxUdHW0+xt/fXy1atFB6erq6d++u9PR0BQQEmIFJkqKjo+Xm5qZ169apa9euSk9P19133y1PT0+zJjY2VqNHj9aRI0cc7mwVy8vLU15enrmdm5srSSooKFBBQYFTrt8VFF9LZbqmyoK5cW3Mj2tjfsqX3d24aM25c1Jec3Mp5ylTaBo1apTef//9EuPBwcHq16+f00LTL7/8ovfee08DBw7Uiy++qA0bNuiZZ54x72JlZmZKkkJCQhweFxISYu7LzMxUcHCww/4qVaooKCjIoebsO1hnHzMzM7PU0DRq1Ci9+uqrJcaXLVsmHx+fMl6x60pLS6voFnAezI1rY35cG/NTPpLvuHjN4sWLHbbLa25OnjxpubZMoWnfvn0lQoYkRUREaN++fWU5ZKmKiorUvHlzvfHGG5Kk2267TVu3btXkyZOdFszKasiQIRo4cKC5nZubq/DwcMXExMjPz68CO3OugoICpaWl6d5775WHh0dFt4OzMDeujflxbcxP+Wo0fOlFa7YOj5VU/nNT/EqRFWUKTcHBwdqyZYvq1KnjML5582ZVr169LIcsVa1atXTzzTc7jDVs2FD/+c9/JEmhoaGSpKysLNWqVcusycrKUpMmTcya7Oxsh2OcOXNGhw8fNh8fGhqqrKwsh5ri7eKac9ntdtnt9hLjHh4elfILsLJeV2XA3Lg25se1MT/lI6/QdtGac+ehvObmUs5RpnfPPfLII3rmmWe0cuVKFRYWqrCwUCtWrNCAAQPUvXv3shyyVHfddZd27tzpMPbjjz8qIiJC0p+LwkNDQ7V8+XJzf25urtatW6eoqChJUlRUlHJycpSRkWHWrFixQkVFRWrRooVZs2bNGofXNdPS0lS/fv1SX5oDAADXnjKFppEjR6pFixZq3769vL295e3trZiYGLVr1858Kc0Znn32WX3zzTd64403tGvXLs2cOVNTpkxRQkKCJMlmsykpKUmvvfaaPvvsM33//fd6/PHHFRYWpi5dukj6885Uhw4d1LdvX61fv15r165VYmKiunfvrrCwMElSjx495Onpqfj4eG3btk2zZ8/WuHHjHF5+AwAA17YyvTzn6emp2bNna+TIkdq8ebP5oZPFd4Cc5fbbb9d///tfDRkyRCNGjFBkZKRSUlLUs2dPs+b555/XiRMn1K9fP+Xk5KhVq1ZasmSJvLy8zJoZM2YoMTFR7du3l5ubm7p166bx48eb+/39/bVs2TIlJCSoWbNmqlGjhoYNG8bHDQAAAFOZQlOxm266STfddJOzeinVfffdp/vuu++8+202m0aMGKERI0actyYoKEgzZ8684HluvfVWffnll2XuEwAAVG5lCk2FhYWaNm2ali9fruzsbBUVFTnsX7FihVOaAwAAcBVlCk0DBgzQtGnTFBcXp0aNGslmu/iqeAAAgKtZmULTrFmzNGfOHHXq1MnZ/QAAALikMr17ztPTU3Xr1nV2LwAAAC6rTKFp0KBBGjdunAzj4r9LBgAAoDIo08tzX331lVauXKnPP/9ct9xyS4lP05w/f75TmgMAAHAVZQpNAQEB6tq1q7N7AQAAcFllCk2pqanO7gMAAMCllWlNk/TnL7394osv9P777+vYsWOSpIMHD+r48eNOaw4AAMBVlOlO0969e9WhQwft27dPeXl5uvfee+Xr66vRo0crLy9PkydPdnafAAAAFapMd5oGDBig5s2b68iRI/L29jbHu3btquXLlzutOQAAAFdRpjtNX375pb7++mt5eno6jNepU0e//vqrUxoDAABwJWW601RUVKTCwsIS4wcOHJCvr+9lNwUAAOBqyhSaYmJilJKSYm7bbDYdP35cr7zyCr9aBQAAVEplenluzJgxio2N1c0336zTp0+rR48e+umnn1SjRg198sknzu4RAACgwpUpNF1//fXavHmzZs2apS1btuj48eOKj49Xz549HRaGAwAAVBZlCk2SVKVKFT366KPO7AUAAMBllSk0TZ8+/YL7H3/88TI1AwAA4KrKFJoGDBjgsF1QUKCTJ0/K09NTPj4+hCYAAFDplOndc0eOHHH4c/z4ce3cuVOtWrViITgAAKiUyvy7585Vr149vfnmmyXuQgEAAFQGTgtN0p+Lww8ePOjMQwIAALiEMq1p+uyzzxy2DcPQoUOH9O677+quu+5ySmMAAACupEyhqUuXLg7bNptNNWvWVLt27TRmzBhn9AUAAOBSyhSaioqKnN0HAACAS3PqmiYAAIDKqkx3mgYOHGi5duzYsWU5BQAAgEspU2j67rvv9N1336mgoED169eXJP34449yd3dX06ZNzTqbzeacLgEAACpYmULT/fffL19fX3344YcKDAyU9OcHXvbp00etW7fWoEGDnNokAABARSvTmqYxY8Zo1KhRZmCSpMDAQL322mu8ew4AAFRKZQpNubm5+u2330qM//bbbzp27NhlNwUAAOBqyhSaunbtqj59+mj+/Pk6cOCADhw4oP/85z+Kj4/Xgw8+6OweAQAAKlyZ1jRNnjxZzz33nHr06KGCgoI/D1SliuLj4/XWW285tUEAAABXUKbQ5OPjo0mTJumtt97Szz//LEm68cYbVbVqVac2BwAA4Cou68MtDx06pEOHDqlevXqqWrWqDMNwVl8AAAAupUx3mv744w89/PDDWrlypWw2m3766SfdcMMNio+PV2BgIO+gAwAAl6TOC4skSXZ3Q8l3SI2GL1VeoePnPe55M64iWjOV6U7Ts88+Kw8PD+3bt08+Pj7m+N/+9jctWbLEac0BAAC4ijLdaVq2bJmWLl2q66+/3mG8Xr162rt3r1MaAwAAcCVlutN04sQJhztMxQ4fPiy73X7ZTQEAALiaMoWm1q1ba/r06ea2zWZTUVGRkpOTdc899zitOQAAAFdRppfnkpOT1b59e23cuFH5+fl6/vnntW3bNh0+fFhr1651do8AAAAVrkx3mho1aqQff/xRrVq10gMPPKATJ07owQcf1Hfffacbb7zR2T0CAABUuEu+01RQUKAOHTpo8uTJeumll65ETwAAAC7nku80eXh4aMuWLVeiFwAAAJdVppfnHn30UX3wwQfO7gUAAMBllWkh+JkzZzR16lR98cUXatasWYnfOTd27FinNAcAAOAqLik0/fLLL6pTp462bt2qpk2bSpJ+/PFHhxqbzVbaQwEAAK5qlxSa6tWrp0OHDmnlypWS/vy1KePHj1dISMgVaQ4AAMBVXNKaJsMwHLY///xznThxwqkNAQAAuKIyLQQvdm6IAgAAqKwuKTTZbLYSa5ZYwwQAAK4Fl7SmyTAM9e7d2/ylvKdPn1b//v1LvHtu/vz5zusQAADABVxSaOrVq5fD9qOPPurUZgAAAFzVJYWm1NTUK9UHAACAS7usheAAAADXCkITAACABYQmAAAACwhNAAAAFlxVoenNN9+UzWZTUlKSOXb69GklJCSoevXqqlatmrp166asrCyHx+3bt09xcXHy8fFRcHCwBg8erDNnzjjUrFq1Sk2bNpXdblfdunU1bdq0crgiAABwtbhqQtOGDRv0/vvv69Zbb3UYf/bZZ/W///1Pc+fO1erVq3Xw4EE9+OCD5v7CwkLFxcUpPz9fX3/9tT788ENNmzZNw4YNM2t2796tuLg43XPPPdq0aZOSkpL05JNPaunSpeV2fQAAwLVdFaHp+PHj6tmzp/71r38pMDDQHD969Kg++OADjR07Vu3atVOzZs2Umpqqr7/+Wt98840kadmyZdq+fbs+/vhjNWnSRB07dtTIkSM1ceJE5efnS5ImT56syMhIjRkzRg0bNlRiYqL++te/6p133qmQ6wUAAK7nqghNCQkJiouLU3R0tMN4RkaGCgoKHMYbNGig2rVrKz09XZKUnp6uxo0bKyQkxKyJjY1Vbm6utm3bZtace+zY2FjzGAAAAJf04ZYVYdasWfr222+1YcOGEvsyMzPl6empgIAAh/GQkBBlZmaaNWcHpuL9xfsuVJObm6tTp07J29u7xLnz8vKUl5dnbufm5kqSCgoKVFBQcIlX6bqKr6UyXVNlwdy4NubHtTE/5cvublivdTMc/nu2KzFfl3JMlw5N+/fv14ABA5SWliYvL6+KbsfBqFGj9Oqrr5YYX7ZsmXx8fCqgoysrLS2tolvAeTA3ro35cW3MT/lIvuPSHzOyeVGJscWLFzuhG0cnT560XOvSoSkjI0PZ2dlq2rSpOVZYWKg1a9bo3Xff1dKlS5Wfn6+cnByHu01ZWVkKDQ2VJIWGhmr9+vUOxy1+d93ZNee+4y4rK0t+fn6l3mWSpCFDhmjgwIHmdm5ursLDwxUTEyM/P7+yX7SLKSgoUFpamu699155eHhUdDs4C3Pj2pgf18b8lK9Gw62/scruZmhk8yK9vNFNeUU2h31bh8c6uzXzlSIrXDo0tW/fXt9//73DWJ8+fdSgQQP985//VHh4uDw8PLR8+XJ169ZNkrRz507t27dPUVFRkqSoqCi9/vrrys7OVnBwsKQ//2Xh5+enm2++2aw5N72mpaWZxyiN3W6X3W4vMe7h4VEpvwAr63VVBsyNa2N+XBvzUz7yCm0XLzr3MUW2Eo+7EnN1Kcd06dDk6+urRo0aOYxVrVpV1atXN8fj4+M1cOBABQUFyc/PT//4xz8UFRWlO++8U5IUExOjm2++WY899piSk5OVmZmpoUOHKiEhwQw9/fv317vvvqvnn39eTzzxhFasWKE5c+Zo0aJF5XvBAADAZbl0aLLinXfekZubm7p166a8vDzFxsZq0qRJ5n53d3ctXLhQf//73xUVFaWqVauqV69eGjFihFkTGRmpRYsW6dlnn9W4ceN0/fXX69///rdiY51/GxAAAFydrrrQtGrVKodtLy8vTZw4URMnTjzvYyIiIi66eKxt27b67rvvnNEiAACohK6Kz2kCAACoaIQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMCCKhXdAAAArqrOC4suWrPnzbhy6ASugDtNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGBBlYpuAACAitJo+FLlFdoqug1cJbjTBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAUuHZpGjRql22+/Xb6+vgoODlaXLl20c+dOh5rTp08rISFB1atXV7Vq1dStWzdlZWU51Ozbt09xcXHy8fFRcHCwBg8erDNnzjjUrFq1Sk2bNpXdblfdunU1bdq0K315AADgKuLSoWn16tVKSEjQN998o7S0NBUUFCgmJkYnTpwwa5599ln973//09y5c7V69WodPHhQDz74oLm/sLBQcXFxys/P19dff60PP/xQ06ZN07Bhw8ya3bt3Ky4uTvfcc482bdqkpKQkPfnkk1q6dGm5Xi8AAHBdVSq6gQtZsmSJw/a0adMUHBysjIwM3X333Tp69Kg++OADzZw5U+3atZMkpaamqmHDhvrmm2905513atmyZdq+fbu++OILhYSEqEmTJho5cqT++c9/avjw4fL09NTkyZMVGRmpMWPGSJIaNmyor776Su+8845iY2PL/boBAIDrcek7Tec6evSoJCkoKEiSlJGRoYKCAkVHR5s1DRo0UO3atZWeni5JSk9PV+PGjRUSEmLWxMbGKjc3V9u2bTNrzj5GcU3xMQAAAFz6TtPZioqKlJSUpLvuukuNGjWSJGVmZsrT01MBAQEOtSEhIcrMzDRrzg5MxfuL912oJjc3V6dOnZK3t3eJfvLy8pSXl2du5+bmSpIKCgpUUFBwGVfqWoqvpTJdU2XB3Lg25se1Fc+L3c1w2rFwfnZ3689z8ZyUNjdX4rm+lGNeNaEpISFBW7du1VdffVXRrUj6c5H6q6++WmJ82bJl8vHxqYCOrqy0tLSKbgHnwdy4NubHtY1sXnTZx1i8eLETOqncku+49MeUNjdX4rk+efKk5dqrIjQlJiZq4cKFWrNmja6//npzPDQ0VPn5+crJyXG425SVlaXQ0FCzZv369Q7HK3533dk1577jLisrS35+fqXeZZKkIUOGaODAgeZ2bm6uwsPDFRMTIz8/v7JfrIspKChQWlqa7r33Xnl4eFR0OzgLc+PamB/XVjw/L290U16R7bKOtXU4a18vptFw62+ssrsZGtm8qNS5uRLPdfErRVa4dGgyDEP/+Mc/9N///lerVq1SZGSkw/5mzZrJw8NDy5cvV7du3SRJO3fu1L59+xQVFSVJioqK0uuvv67s7GwFBwdL+vNffn5+frr55pvNmnPTa1pamnmM0tjtdtnt9hLjHh4elfIbZGW9rsqAuXFtzI9ryyuyKa/w8kIT83txZXmOS5ubK/FcX8oxXTo0JSQkaObMmfr000/l6+trrkHy9/eXt7e3/P39FR8fr4EDByooKEh+fn76xz/+oaioKN15552SpJiYGN1888167LHHlJycrMzMTA0dOlQJCQlm6Onfv7/effddPf/883riiSe0YsUKzZkzR4sWLaqwawcAAK7Fpd8999577+no0aNq27atatWqZf6ZPXu2WfPOO+/ovvvuU7du3XT33XcrNDRU8+fPN/e7u7tr4cKFcnd3V1RUlB599FE9/vjjGjFihFkTGRmpRYsWKS0tTX/5y180ZswY/fvf/+bjBgAAgMml7zQZxsVX23t5eWnixImaOHHieWsiIiIuunisbdu2+u677y65RwAAcG1w6TtNAAAAroLQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACxw6c9pAgAArq3OC9fOb8/gThMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgQZWKbgAAADhPnRcWWarb82bcFe6k8uFOEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYUKWiGwAAAOWvzguLLlqz5824cujk6kFoAgDgKmEl6ODKITQBQCXFnQTAuQhNAAC4AFe8i+SKPVUkQhOuGfyrG7h2XOzr3e5uKPmOcmoGlQbvnjvHxIkTVadOHXl5ealFixZav359RbcEAABcAHeazjJ79mwNHDhQkydPVosWLZSSkqLY2Fjt3LlTwcHBFd0eAFzVuNuLqx2h6Sxjx45V37591adPH0nS5MmTtWjRIk2dOlUvvPBCBXcHoCJV1h/4rrZmxdX6saKy/t1ASYSm/5Ofn6+MjAwNGTLEHHNzc1N0dLTS09MrsDMAl8PVfgg76wesq10XLoz5qhwITf/n999/V2FhoUJCQhzGQ0JC9MMPP5Soz8vLU15enrl99OhRSdLhw4dVUFBwZZstRwUFBTp58qT++OMPeXh4VHQ7l6XKmRMXrfnjjz/KoRPnqIi5aTFq+UVr1g1pXw6d/MlKP+X5Ta7uc3PM/7e7GRp6W5GavDRfeUW2S+rn7OOcD9+8L0+VIkMnTxapSoGbCs+aH1S8C83NlfgefezYMUmSYRgX783pZ79GjBo1Sq+++mqJ8cjIyAroBs5SY0xFd3D14zn8/3pUdAO4IObHdZ1vbq7k95djx47J39//gjWEpv9To0YNubu7Kysry2E8KytLoaGhJeqHDBmigQMHmttFRUU6fPiwqlevLput8vyrJTc3V+Hh4dq/f7/8/Pwquh2chblxbcyPa2N+XFd5z41hGDp27JjCwsIuWkto+j+enp5q1qyZli9fri5dukj6MwgtX75ciYmJJertdrvsdrvDWEBAQDl0WjH8/Pz4xuKimBvXxvy4NubHdZXn3FzsDlMxQtNZBg4cqF69eql58+a64447lJKSohMnTpjvpgMAANcuQtNZ/va3v+m3337TsGHDlJmZqSZNmmjJkiUlFocDAIBrD6HpHImJiaW+HHetstvteuWVV0q8FImKx9y4NubHtTE/rsuV58ZmWHmPHQAAwDWO3z0HAABgAaEJAADAAkITAACABYQmAAAACwhN17BFixapRYsW8vb2VmBgoPmhnsX27dunuLg4+fj4KDg4WIMHD9aZM2ccalatWqWmTZvKbrerbt26mjZtWonzTJw4UXXq1JGXl5datGih9evXX8Grqlzy8vLUpEkT2Ww2bdq0yWHfli1b1Lp1a3l5eSk8PFzJycklHj937lw1aNBAXl5eaty4sRYvXuyw3zAMDRs2TLVq1ZK3t7eio6P1008/XclLuqrt2bNH8fHxioyMlLe3t2688Ua98sorys/Pd6hjblwb35OuvFGjRun222+Xr6+vgoOD1aVLF+3cudOh5vTp00pISFD16tVVrVo1devWrcRv5XDWzyGnMXBNmjdvnhEYGGi89957xs6dO41t27YZs2fPNvefOXPGaNSokREdHW189913xuLFi40aNWoYQ4YMMWt++eUXw8fHxxg4cKCxfft2Y8KECYa7u7uxZMkSs2bWrFmGp6enMXXqVGPbtm1G3759jYCAACMrK6tcr/dq9cwzzxgdO3Y0JBnfffedOX706FEjJCTE6Nmzp7F161bjk08+Mby9vY3333/frFm7dq3h7u5uJCcnG9u3bzeGDh1qeHh4GN9//71Z8+abbxr+/v7GggULjM2bNxudO3c2IiMjjVOnTpXnZV41Pv/8c6N3797G0qVLjZ9//tn49NNPjeDgYGPQoEFmDXPj2vieVD5iY2ON1NRUY+vWrcamTZuMTp06GbVr1zaOHz9u1vTv398IDw83li9fbmzcuNG48847jZYtW5r7nfVzyJkITdeggoIC47rrrjP+/e9/n7dm8eLFhpubm5GZmWmOvffee4afn5+Rl5dnGIZhPP/888Ytt9zi8Li//e1vRmxsrLl9xx13GAkJCeZ2YWGhERYWZowaNcpZl1NpLV682GjQoIGxbdu2EqFp0qRJRmBgoDkXhmEY//znP4369eub2w8//LARFxfncMwWLVoYTz31lGEYhlFUVGSEhoYab731lrk/JyfHsNvtxieffHKFrqrySU5ONiIjI81t5sa18T2pYmRnZxuSjNWrVxuG8effZw8PD2Pu3LlmzY4dOwxJRnp6umEYzvs55Ey8PHcN+vbbb/Xrr7/Kzc1Nt912m2rVqqWOHTtq69atZk16eroaN27s8GnosbGxys3N1bZt28ya6Ohoh2PHxsYqPT1dkpSfn6+MjAyHGjc3N0VHR5s1KF1WVpb69u2rjz76SD4+PiX2p6en6+6775anp6c5Fhsbq507d+rIkSNmzYXmZ/fu3crMzHSo8ff3V4sWLZifS3D06FEFBQWZ28yN6+J7UsU5evSoJJlfKxkZGSooKHCYiwYNGqh27drmXDjj55CzEZquQb/88oskafjw4Ro6dKgWLlyowMBAtW3bVocPH5YkZWZmlvj1McXbmZmZF6zJzc3VqVOn9Pvvv6uwsLDUmuJjoCTDMNS7d2/1799fzZs3L7Xmcubn7P1nP660GlzYrl27NGHCBD311FPmGHPjuvieVDGKioqUlJSku+66S40aNZL0599xT0/PEr/o/tyvg8v9OeRshKZK5IUXXpDNZrvgnx9++EFFRUWSpJdeekndunVTs2bNlJqaKpvNprlz51bwVVReVudnwoQJOnbsmIYMGVLRLV8zrM7N2X799Vd16NBBDz30kPr27VtBnQOuLyEhQVu3btWsWbMqupXLxu+eq0QGDRqk3r17X7Dmhhtu0KFDhyRJN998szlut9t1ww03aN++fZKk0NDQEu8oKX5XQ2hoqPnfc9/pkJWVJT8/P3l7e8vd3V3u7u6l1hQf41pidX5WrFih9PT0Er93qXnz5urZs6c+/PDD8z730sXn5+z9xWO1atVyqGnSpMklX9/VzOrcFDt48KDuuecetWzZUlOmTHGoY25cV40aNfieVM4SExO1cOFCrVmzRtdff705Hhoaqvz8fOXk5DjcbTr36+Byfw453RVZKQWXdvToUcNutzssBM/PzzeCg4PNd/gUL8A7+x0l77//vuHn52ecPn3aMIw/F+A1atTI4diPPPJIiYXgiYmJ5nZhYaFx3XXXsejyAvbu3Wt8//335p+lS5cakox58+YZ+/fvNwzj/y82zs/PNx83ZMiQEouN77vvPodjR0VFlVhs/Pbbb5v7i/9usNj4/A4cOGDUq1fP6N69u3HmzJkS+5kb18b3pPJRVFRkJCQkGGFhYcaPP/5YYn/xQvB58+aZYz/88EOpC8Ev9+eQMxGarlEDBgwwrrvuOmPp0qXGDz/8YMTHxxvBwcHG4cOHDcP4/2/1jImJMTZt2mQsWbLEqFmzZqlv9Rw8eLCxY8cOY+LEiaV+5IDdbjemTZtmbN++3ejXr58REBDg8G4IXNju3btLvHsuJyfHCAkJMR577DFj69atxqxZswwfH58Sb2uvUqWK8fbbbxs7duwwXnnllVLf1h4QEGB8+umnxpYtW4wHHniAt7VfwIEDB4y6desa7du3Nw4cOGAcOnTI/FOMuXFtfE8qH3//+98Nf39/Y9WqVQ5fJydPnjRr+vfvb9SuXdtYsWKFsXHjRiMqKsqIiooy9zvr55AzEZquUfn5+cagQYOM4OBgw9fX14iOjja2bt3qULNnzx6jY8eOhre3t1GjRg1j0KBBRkFBgUPNypUrjSZNmhienp7GDTfcYKSmppY414QJE4zatWsbnp6exh133GF88803V/LSKp3SQpNhGMbmzZuNVq1aGXa73bjuuuuMN998s8Rj58yZY9x0002Gp6enccsttxiLFi1y2F9UVGS8/PLLRkhIiGG324327dsbO3fuvJKXc1VLTU01JJX652zMjWvje9KVd76vk7N/Rpw6dcp4+umnjcDAQMPHx8fo2rWrwz9ADMN5P4ecxfZ/FwcAAIAL4N1zAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgDXhLZt2yopKemCNXXq1FFKSkq59APg6kNoAnBV6t27t2w2m/r3719iX0JCgmw2m8Mv4Z0/f75GjhxZ5vPFx8ercePGys/PdxhfvHixPD099e2335b52ACuDoQmAFet8PBwzZo1S6dOnTLHTp8+rZkzZ6p27doOtUFBQfL19S3zud555x0dO3ZMr7zyijmWk5Ojvn376uWXX1bTpk3LfOzzKSgocPoxAZQdoQnAVatp06YKDw/X/PnzzbH58+erdu3auu222xxqz315Ljs7W/fff7+8vb0VGRmpGTNmXPBcfn5+Sk1N1ZgxY7Ru3TpJUlJSkq677joNGTJE+/fv18MPP6yAgAAFBQXpgQce0J49e8zHb9iwQffee69q1Kghf39/tWnTpsTdKZvNpvfee0+dO3dW1apV9frrr5fxmQFwJRCaAFzVnnjiCaWmpprbU6dOVZ8+fS76uN69e2v//v1auXKl5s2bp0mTJik7O/uCj7nnnnv09NNPq1evXpo7d67mzJmj6dOnyzAMxcbGytfXV19++aXWrl2ratWqqUOHDubLeceOHVOvXr301Vdf6ZtvvlG9evXUqVMnHTt2zOEcw4cPV9euXfX999/riSeeKMMzAuBKqVLRDQDA5Xj00Uc1ZMgQ7d27V5K0du1azZo1S6tWrTrvY3788Ud9/vnnWr9+vW6//XZJ0gcffKCGDRte9HyjRo3SkiVL1L17d40ZM0YNGjTQxx9/rKKiIv373/+WzWaTJKWmpiogIECrVq1STEyM2rVr53CcKVOmKCAgQKtXr9Z9991njvfo0cNS6ANQ/rjTBOCqVrNmTcXFxWnatGlKTU1VXFycatSoccHH7NixQ1WqVFGzZs3MsQYNGiggIOCi5/P29tZzzz0nHx8fDRgwQJK0efNm7dq1S76+vqpWrZqqVaumoKAgnT59Wj///LMkKSsrS3379lW9evXk7+8vPz8/HT9+XPv27XM4fvPmzS/xGQBQXrjTBOCq98QTTygxMVGSNHHixCt+vipVqsjd3d28q3T8+HE1a9as1HVRNWvWlCT16tVLf/zxh8aNG6eIiAjZ7XZFRUWVeDde1apVr3j/AMqG0ATgqle8dshmsyk2Nvai9Q0aNNCZM2eUkZFhvjy3c+dO5eTklOn8TZs21ezZsxUcHCw/P79Sa9auXatJkyapU6dOkqT9+/fr999/L9P5AFQMXp4DcNVzd3fXjh07tH37drm7u1+0vn79+urQoYOeeuoprVu3ThkZGXryySfl7e1dpvP37NlTNWrU0AMPPKAvv/xSu3fv1qpVq/TMM8/owIEDkqR69erpo48+0o4dO7Ru3Tr17NmzzOcDUDEITQAqBT8/v/Pe5SlNamqqwsLC1KZNGz344IPq16+fgoODy3RuHx8frVmzRrVr19aDDz6ohg0bKj4+XqdPnzZ7+uCDD3TkyBE1bdpUjz32mJ555pkynw9AxbAZhmFUdBMAAACujjtNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALDg/wGKLN6uKdIJRwAAAABJRU5ErkJggg==",
|
1341 |
+
"text/plain": [
|
1342 |
+
"<Figure size 640x480 with 1 Axes>"
|
1343 |
+
]
|
1344 |
+
},
|
1345 |
+
"metadata": {},
|
1346 |
+
"output_type": "display_data"
|
1347 |
+
}
|
1348 |
+
],
|
1349 |
+
"source": [
|
1350 |
+
"year_df[\"mid_year\"].hist(bins=50)\n",
|
1351 |
+
"plt.xlabel(\"Mid Year\")\n",
|
1352 |
+
"plt.ylabel(\"Frequency\")\n",
|
1353 |
+
"plt.title(\"Distribution of Mid Year\")\n",
|
1354 |
+
"plt.show()"
|
1355 |
+
]
|
1356 |
+
},
|
1357 |
+
{
|
1358 |
+
"cell_type": "markdown",
|
1359 |
+
"metadata": {},
|
1360 |
+
"source": [
|
1361 |
+
"### Object name"
|
1362 |
+
]
|
1363 |
+
},
|
1364 |
+
{
|
1365 |
+
"cell_type": "code",
|
1366 |
+
"execution_count": 106,
|
1367 |
+
"metadata": {},
|
1368 |
+
"outputs": [
|
1369 |
+
{
|
1370 |
+
"data": {
|
1371 |
+
"text/plain": [
|
1372 |
+
"object_name\n",
|
1373 |
+
"sherds 5068\n",
|
1374 |
+
"photographs 4729\n",
|
1375 |
+
"coins 4609\n",
|
1376 |
+
"amulets 2485\n",
|
1377 |
+
"Woodblock Print 1386\n",
|
1378 |
+
"figures 1316\n",
|
1379 |
+
"vessels 1165\n",
|
1380 |
+
"bowls 807\n",
|
1381 |
+
"Papercut 731\n",
|
1382 |
+
"pages 647\n",
|
1383 |
+
"Slide 633\n",
|
1384 |
+
"jars 566\n",
|
1385 |
+
"Seal 554\n",
|
1386 |
+
"postcards 541\n",
|
1387 |
+
"vases 518\n",
|
1388 |
+
"Name: count, dtype: int64"
|
1389 |
+
]
|
1390 |
+
},
|
1391 |
+
"execution_count": 106,
|
1392 |
+
"metadata": {},
|
1393 |
+
"output_type": "execute_result"
|
1394 |
+
}
|
1395 |
+
],
|
1396 |
+
"source": [
|
1397 |
+
"full_df[\"object_name\"].value_counts()[:15]"
|
1398 |
+
]
|
1399 |
+
},
|
1400 |
+
{
|
1401 |
+
"cell_type": "markdown",
|
1402 |
+
"metadata": {},
|
1403 |
+
"source": [
|
1404 |
+
"### Material"
|
1405 |
+
]
|
1406 |
+
},
|
1407 |
+
{
|
1408 |
+
"cell_type": "code",
|
1409 |
+
"execution_count": 104,
|
1410 |
+
"metadata": {},
|
1411 |
+
"outputs": [
|
1412 |
+
{
|
1413 |
+
"data": {
|
1414 |
+
"text/plain": [
|
1415 |
+
"material\n",
|
1416 |
+
"pottery 8844\n",
|
1417 |
+
"paper 5822\n",
|
1418 |
+
"metal 3739\n",
|
1419 |
+
"photographic paper 3396\n",
|
1420 |
+
"faience 2961\n",
|
1421 |
+
" ... \n",
|
1422 |
+
"Shell (Ostrich Egg) 1\n",
|
1423 |
+
"serpentinite 1\n",
|
1424 |
+
"balsa 1\n",
|
1425 |
+
"maple 1\n",
|
1426 |
+
"fabric art 1\n",
|
1427 |
+
"Name: count, Length: 414, dtype: int64"
|
1428 |
+
]
|
1429 |
+
},
|
1430 |
+
"execution_count": 104,
|
1431 |
+
"metadata": {},
|
1432 |
+
"output_type": "execute_result"
|
1433 |
+
}
|
1434 |
+
],
|
1435 |
+
"source": [
|
1436 |
+
"full_df[\"material\"].value_counts()"
|
1437 |
+
]
|
1438 |
+
},
|
1439 |
+
{
|
1440 |
+
"cell_type": "code",
|
1441 |
+
"execution_count": null,
|
1442 |
+
"metadata": {},
|
1443 |
+
"outputs": [],
|
1444 |
+
"source": []
|
1445 |
+
}
|
1446 |
+
],
|
1447 |
+
"metadata": {
|
1448 |
+
"kernelspec": {
|
1449 |
+
"display_name": "ArtifactClassification",
|
1450 |
+
"language": "python",
|
1451 |
+
"name": "python3"
|
1452 |
+
},
|
1453 |
+
"language_info": {
|
1454 |
+
"codemirror_mode": {
|
1455 |
+
"name": "ipython",
|
1456 |
+
"version": 3
|
1457 |
+
},
|
1458 |
+
"file_extension": ".py",
|
1459 |
+
"mimetype": "text/x-python",
|
1460 |
+
"name": "python",
|
1461 |
+
"nbconvert_exporter": "python",
|
1462 |
+
"pygments_lexer": "ipython3",
|
1463 |
+
"version": "3.10.12"
|
1464 |
+
}
|
1465 |
+
},
|
1466 |
+
"nbformat": 4,
|
1467 |
+
"nbformat_minor": 2
|
1468 |
+
}
|
3.0-efficientnet_example.ipynb
ADDED
@@ -0,0 +1,1062 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"from PIL import Image\n",
|
10 |
+
"import pandas as pd\n",
|
11 |
+
"import os\n",
|
12 |
+
"from datasets import Dataset, Image, DatasetDict\n",
|
13 |
+
"from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor\n",
|
14 |
+
"from transformers import (\n",
|
15 |
+
" AutoImageProcessor,\n",
|
16 |
+
" AutoModelForImageClassification,\n",
|
17 |
+
" TrainingArguments,\n",
|
18 |
+
" Trainer,\n",
|
19 |
+
" DefaultDataCollator,\n",
|
20 |
+
")\n",
|
21 |
+
"import evaluate\n",
|
22 |
+
"import numpy as np"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"cell_type": "markdown",
|
27 |
+
"metadata": {},
|
28 |
+
"source": [
|
29 |
+
"### Load data"
|
30 |
+
]
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"cell_type": "code",
|
34 |
+
"execution_count": 2,
|
35 |
+
"metadata": {},
|
36 |
+
"outputs": [],
|
37 |
+
"source": [
|
38 |
+
"file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n",
|
39 |
+
"file2obj[\"image\"] = file2obj.apply(lambda x: os.path.join(\"..\", x[\"root\"], x[\"file\"]), axis=1)\n",
|
40 |
+
"file2obj.rename(columns={\"obj_num\": \"label\"}, inplace=True)\n",
|
41 |
+
"\n",
|
42 |
+
"# Group by 'obj_num' and count occurrences\n",
|
43 |
+
"obj_num_counts = file2obj[\"label\"].value_counts()\n",
|
44 |
+
"\n",
|
45 |
+
"# Filter rows where 'obj_num' appears more than twice\n",
|
46 |
+
"file2obj_3 = file2obj[file2obj[\"label\"].isin(obj_num_counts[obj_num_counts > 2].index)]"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"cell_type": "markdown",
|
51 |
+
"metadata": {},
|
52 |
+
"source": [
|
53 |
+
"### Form HF dataset"
|
54 |
+
]
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"cell_type": "code",
|
58 |
+
"execution_count": 3,
|
59 |
+
"metadata": {},
|
60 |
+
"outputs": [
|
61 |
+
{
|
62 |
+
"data": {
|
63 |
+
"application/vnd.jupyter.widget-view+json": {
|
64 |
+
"model_id": "59370086a1b64dc5842d9becd9019aad",
|
65 |
+
"version_major": 2,
|
66 |
+
"version_minor": 0
|
67 |
+
},
|
68 |
+
"text/plain": [
|
69 |
+
"Casting to class labels: 0%| | 0/25725 [00:00<?, ? examples/s]"
|
70 |
+
]
|
71 |
+
},
|
72 |
+
"metadata": {},
|
73 |
+
"output_type": "display_data"
|
74 |
+
}
|
75 |
+
],
|
76 |
+
"source": [
|
77 |
+
"ds = Dataset.from_pandas(file2obj_3[[\"image\", \"label\"]], preserve_index=False).cast_column(\n",
|
78 |
+
" \"image\", Image()\n",
|
79 |
+
")\n",
|
80 |
+
"ds = ds.class_encode_column(\"label\")\n",
|
81 |
+
"trainval_test = ds.train_test_split(stratify_by_column=\"label\", test_size=0.16)\n",
|
82 |
+
"train_val = trainval_test[\"train\"].train_test_split(stratify_by_column=\"label\", test_size=16 / 84)\n",
|
83 |
+
"ds = DatasetDict(\n",
|
84 |
+
" {\"train\": train_val[\"train\"], \"valid\": train_val[\"test\"], \"test\": trainval_test[\"test\"]}\n",
|
85 |
+
")"
|
86 |
+
]
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"cell_type": "markdown",
|
90 |
+
"metadata": {},
|
91 |
+
"source": [
|
92 |
+
"### Transform data"
|
93 |
+
]
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"cell_type": "code",
|
97 |
+
"execution_count": 4,
|
98 |
+
"metadata": {},
|
99 |
+
"outputs": [],
|
100 |
+
"source": [
|
101 |
+
"checkpoint = \"google/efficientnet-b3\"\n",
|
102 |
+
"image_processor = AutoImageProcessor.from_pretrained(checkpoint)\n",
|
103 |
+
"\n",
|
104 |
+
"\n",
|
105 |
+
"normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)\n",
|
106 |
+
"size = (\n",
|
107 |
+
" image_processor.size[\"shortest_edge\"]\n",
|
108 |
+
" if \"shortest_edge\" in image_processor.size\n",
|
109 |
+
" else (image_processor.size[\"height\"], image_processor.size[\"width\"])\n",
|
110 |
+
")\n",
|
111 |
+
"_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])\n",
|
112 |
+
"\n",
|
113 |
+
"\n",
|
114 |
+
"def transforms(examples):\n",
|
115 |
+
" examples[\"pixel_values\"] = [_transforms(img.convert(\"RGB\")) for img in examples[\"image\"]]\n",
|
116 |
+
" del examples[\"image\"]\n",
|
117 |
+
" return examples\n",
|
118 |
+
"\n",
|
119 |
+
"\n",
|
120 |
+
"ds = ds.with_transform(transforms)"
|
121 |
+
]
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"cell_type": "markdown",
|
125 |
+
"metadata": {},
|
126 |
+
"source": [
|
127 |
+
"### Set up model and metrics"
|
128 |
+
]
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"cell_type": "code",
|
132 |
+
"execution_count": 5,
|
133 |
+
"metadata": {},
|
134 |
+
"outputs": [
|
135 |
+
{
|
136 |
+
"name": "stderr",
|
137 |
+
"output_type": "stream",
|
138 |
+
"text": [
|
139 |
+
"Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:\n",
|
140 |
+
"- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([3872, 1536]) in the model instantiated\n",
|
141 |
+
"- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3872]) in the model instantiated\n",
|
142 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
143 |
+
]
|
144 |
+
}
|
145 |
+
],
|
146 |
+
"source": [
|
147 |
+
"labels = ds[\"train\"].features[\"label\"].names\n",
|
148 |
+
"model = AutoModelForImageClassification.from_pretrained(\n",
|
149 |
+
" checkpoint,\n",
|
150 |
+
" num_labels=len(labels),\n",
|
151 |
+
" id2label={str(i): c for i, c in enumerate(labels)},\n",
|
152 |
+
" label2id={c: str(i) for i, c in enumerate(labels)},\n",
|
153 |
+
" ignore_mismatched_sizes=True,\n",
|
154 |
+
")\n",
|
155 |
+
"\n",
|
156 |
+
"data_collator = DefaultDataCollator()\n",
|
157 |
+
"\n",
|
158 |
+
"accuracy = evaluate.load(\"accuracy\")\n",
|
159 |
+
"\n",
|
160 |
+
"\n",
|
161 |
+
"def compute_metrics(eval_pred):\n",
|
162 |
+
" predictions, labels = eval_pred\n",
|
163 |
+
" predictions = np.argmax(predictions, axis=1)\n",
|
164 |
+
" return accuracy.compute(predictions=predictions, references=labels)"
|
165 |
+
]
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"cell_type": "markdown",
|
169 |
+
"metadata": {},
|
170 |
+
"source": [
|
171 |
+
"### Train model"
|
172 |
+
]
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"cell_type": "code",
|
176 |
+
"execution_count": 10,
|
177 |
+
"metadata": {},
|
178 |
+
"outputs": [
|
179 |
+
{
|
180 |
+
"name": "stderr",
|
181 |
+
"output_type": "stream",
|
182 |
+
"text": [
|
183 |
+
"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Serializing object of type dict that is 147552 bytes\n",
|
184 |
+
"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Serializing object of type dict that is 147552 bytes\n"
|
185 |
+
]
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"data": {
|
189 |
+
"application/vnd.jupyter.widget-view+json": {
|
190 |
+
"model_id": "cb34337db5584dfbbc4a76bb7e724b26",
|
191 |
+
"version_major": 2,
|
192 |
+
"version_minor": 0
|
193 |
+
},
|
194 |
+
"text/plain": [
|
195 |
+
" 0%| | 0/274 [00:00<?, ?it/s]"
|
196 |
+
]
|
197 |
+
},
|
198 |
+
"metadata": {},
|
199 |
+
"output_type": "display_data"
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"name": "stdout",
|
203 |
+
"output_type": "stream",
|
204 |
+
"text": [
|
205 |
+
"{'loss': 8.0521, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.04}\n",
|
206 |
+
"{'loss': 8.0927, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.07}\n",
|
207 |
+
"{'loss': 8.1187, 'learning_rate': 4.959349593495935e-05, 'epoch': 0.11}\n",
|
208 |
+
"{'loss': 8.2335, 'learning_rate': 4.75609756097561e-05, 'epoch': 0.15}\n",
|
209 |
+
"{'loss': 8.2531, 'learning_rate': 4.5528455284552844e-05, 'epoch': 0.18}\n",
|
210 |
+
"{'loss': 8.2873, 'learning_rate': 4.3495934959349595e-05, 'epoch': 0.22}\n",
|
211 |
+
"{'loss': 8.2071, 'learning_rate': 4.146341463414634e-05, 'epoch': 0.26}\n",
|
212 |
+
"{'loss': 8.2287, 'learning_rate': 3.943089430894309e-05, 'epoch': 0.29}\n",
|
213 |
+
"{'loss': 8.1928, 'learning_rate': 3.739837398373984e-05, 'epoch': 0.33}\n",
|
214 |
+
"{'loss': 8.2053, 'learning_rate': 3.5365853658536584e-05, 'epoch': 0.36}\n",
|
215 |
+
"{'loss': 8.1621, 'learning_rate': 3.3333333333333335e-05, 'epoch': 0.4}\n",
|
216 |
+
"{'loss': 8.1731, 'learning_rate': 3.130081300813008e-05, 'epoch': 0.44}\n",
|
217 |
+
"{'loss': 8.1447, 'learning_rate': 2.926829268292683e-05, 'epoch': 0.47}\n",
|
218 |
+
"{'loss': 8.1161, 'learning_rate': 2.7235772357723577e-05, 'epoch': 0.51}\n",
|
219 |
+
"{'loss': 8.1081, 'learning_rate': 2.5203252032520324e-05, 'epoch': 0.55}\n",
|
220 |
+
"{'loss': 8.0828, 'learning_rate': 2.3170731707317075e-05, 'epoch': 0.58}\n",
|
221 |
+
"{'loss': 8.1312, 'learning_rate': 2.1138211382113822e-05, 'epoch': 0.62}\n",
|
222 |
+
"{'loss': 8.1627, 'learning_rate': 1.9105691056910573e-05, 'epoch': 0.66}\n",
|
223 |
+
"{'loss': 8.0692, 'learning_rate': 1.707317073170732e-05, 'epoch': 0.69}\n",
|
224 |
+
"{'loss': 7.9943, 'learning_rate': 1.5040650406504067e-05, 'epoch': 0.73}\n",
|
225 |
+
"{'loss': 8.0139, 'learning_rate': 1.3008130081300815e-05, 'epoch': 0.77}\n",
|
226 |
+
"{'loss': 8.027, 'learning_rate': 1.0975609756097562e-05, 'epoch': 0.8}\n",
|
227 |
+
"{'loss': 8.0628, 'learning_rate': 8.94308943089431e-06, 'epoch': 0.84}\n",
|
228 |
+
"{'loss': 8.1264, 'learning_rate': 6.910569105691057e-06, 'epoch': 0.88}\n",
|
229 |
+
"{'loss': 8.0408, 'learning_rate': 4.8780487804878055e-06, 'epoch': 0.91}\n",
|
230 |
+
"{'loss': 8.071, 'learning_rate': 2.8455284552845528e-06, 'epoch': 0.95}\n",
|
231 |
+
"{'loss': 8.1237, 'learning_rate': 8.130081300813009e-07, 'epoch': 0.99}\n"
|
232 |
+
]
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"data": {
|
236 |
+
"application/vnd.jupyter.widget-view+json": {
|
237 |
+
"model_id": "5915cfa41d474a399ce0f53bc8f6f947",
|
238 |
+
"version_major": 2,
|
239 |
+
"version_minor": 0
|
240 |
+
},
|
241 |
+
"text/plain": [
|
242 |
+
" 0%| | 0/65 [00:00<?, ?it/s]"
|
243 |
+
]
|
244 |
+
},
|
245 |
+
"metadata": {},
|
246 |
+
"output_type": "display_data"
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"name": "stdout",
|
250 |
+
"output_type": "stream",
|
251 |
+
"text": [
|
252 |
+
"{'eval_loss': 8.02699089050293, 'eval_accuracy': 0.02575315840621963, 'eval_runtime': 25.2001, 'eval_samples_per_second': 163.333, 'eval_steps_per_second': 2.579, 'epoch': 1.0}\n",
|
253 |
+
"{'train_runtime': 236.2359, 'train_samples_per_second': 74.049, 'train_steps_per_second': 1.16, 'train_loss': 8.129460439194728, 'epoch': 1.0}\n"
|
254 |
+
]
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"data": {
|
258 |
+
"text/plain": [
|
259 |
+
"TrainOutput(global_step=274, training_loss=8.129460439194728, metrics={'train_runtime': 236.2359, 'train_samples_per_second': 74.049, 'train_steps_per_second': 1.16, 'train_loss': 8.129460439194728, 'epoch': 1.0})"
|
260 |
+
]
|
261 |
+
},
|
262 |
+
"execution_count": 10,
|
263 |
+
"metadata": {},
|
264 |
+
"output_type": "execute_result"
|
265 |
+
}
|
266 |
+
],
|
267 |
+
"source": [
|
268 |
+
"training_args = TrainingArguments(\n",
|
269 |
+
" output_dir=\"../models/test\",\n",
|
270 |
+
" remove_unused_columns=False,\n",
|
271 |
+
" evaluation_strategy=\"epoch\",\n",
|
272 |
+
" save_strategy=\"epoch\",\n",
|
273 |
+
" learning_rate=5e-5,\n",
|
274 |
+
" per_device_train_batch_size=64,\n",
|
275 |
+
" # gradient_accumulation_steps=2,\n",
|
276 |
+
" per_device_eval_batch_size=64,\n",
|
277 |
+
" num_train_epochs=1,\n",
|
278 |
+
" warmup_ratio=0.1,\n",
|
279 |
+
" logging_steps=10,\n",
|
280 |
+
" load_best_model_at_end=True,\n",
|
281 |
+
" metric_for_best_model=\"accuracy\",\n",
|
282 |
+
" push_to_hub=False,\n",
|
283 |
+
"\n",
|
284 |
+
")\n",
|
285 |
+
"\n",
|
286 |
+
"trainer = Trainer(\n",
|
287 |
+
" model=model,\n",
|
288 |
+
" args=training_args,\n",
|
289 |
+
" train_dataset=ds[\"train\"], # .select(range(100)),\n",
|
290 |
+
" eval_dataset=ds[\"valid\"], # .select(range(100)),\n",
|
291 |
+
" tokenizer=image_processor,\n",
|
292 |
+
" compute_metrics=compute_metrics,\n",
|
293 |
+
" data_collator=data_collator,\n",
|
294 |
+
")\n",
|
295 |
+
"\n",
|
296 |
+
"trainer.train()"
|
297 |
+
]
|
298 |
+
},
|
299 |
+
{
|
300 |
+
"cell_type": "markdown",
|
301 |
+
"metadata": {},
|
302 |
+
"source": [
|
303 |
+
"### Evaluation"
|
304 |
+
]
|
305 |
+
},
|
306 |
+
{
|
307 |
+
"cell_type": "code",
|
308 |
+
"execution_count": 7,
|
309 |
+
"metadata": {},
|
310 |
+
"outputs": [
|
311 |
+
{
|
312 |
+
"data": {
|
313 |
+
"application/vnd.jupyter.widget-view+json": {
|
314 |
+
"model_id": "4979f1d5536f4a3e97ecbc36c7eebbfa",
|
315 |
+
"version_major": 2,
|
316 |
+
"version_minor": 0
|
317 |
+
},
|
318 |
+
"text/plain": [
|
319 |
+
" 0%| | 0/7 [00:00<?, ?it/s]"
|
320 |
+
]
|
321 |
+
},
|
322 |
+
"metadata": {},
|
323 |
+
"output_type": "display_data"
|
324 |
+
},
|
325 |
+
{
|
326 |
+
"name": "stdout",
|
327 |
+
"output_type": "stream",
|
328 |
+
"text": [
|
329 |
+
"{'eval_loss': 8.275933265686035, 'eval_accuracy': 0.0, 'eval_runtime': 0.6419, 'eval_samples_per_second': 155.791, 'eval_steps_per_second': 10.905, 'epoch': 0.57}\n"
|
330 |
+
]
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"data": {
|
334 |
+
"application/vnd.jupyter.widget-view+json": {
|
335 |
+
"model_id": "cf6f48e995bf427db3c86d1d988bf752",
|
336 |
+
"version_major": 2,
|
337 |
+
"version_minor": 0
|
338 |
+
},
|
339 |
+
"text/plain": [
|
340 |
+
" 0%| | 0/7 [00:00<?, ?it/s]"
|
341 |
+
]
|
342 |
+
},
|
343 |
+
"metadata": {},
|
344 |
+
"output_type": "display_data"
|
345 |
+
}
|
346 |
+
],
|
347 |
+
"source": [
|
348 |
+
"results = trainer.evaluate()\n",
|
349 |
+
"print(results)\n",
|
350 |
+
"\n",
|
351 |
+
"test_results = trainer.predict(ds[\"test\"].select(range(100)))"
|
352 |
+
]
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"cell_type": "code",
|
356 |
+
"execution_count": 12,
|
357 |
+
"metadata": {},
|
358 |
+
"outputs": [
|
359 |
+
{
|
360 |
+
"data": {
|
361 |
+
"text/plain": [
|
362 |
+
"EfficientNetForImageClassification(\n",
|
363 |
+
" (efficientnet): EfficientNetModel(\n",
|
364 |
+
" (embeddings): EfficientNetEmbeddings(\n",
|
365 |
+
" (padding): ZeroPad2d((0, 1, 0, 1))\n",
|
366 |
+
" (convolution): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=valid, bias=False)\n",
|
367 |
+
" (batchnorm): BatchNorm2d(40, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
368 |
+
" (activation): SiLU()\n",
|
369 |
+
" )\n",
|
370 |
+
" (encoder): EfficientNetEncoder(\n",
|
371 |
+
" (blocks): ModuleList(\n",
|
372 |
+
" (0): EfficientNetBlock(\n",
|
373 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
374 |
+
" (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
|
375 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=40, bias=False)\n",
|
376 |
+
" (depthwise_norm): BatchNorm2d(40, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
377 |
+
" (depthwise_act): SiLU()\n",
|
378 |
+
" )\n",
|
379 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
380 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
381 |
+
" (reduce): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
382 |
+
" (expand): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
383 |
+
" (act_reduce): SiLU()\n",
|
384 |
+
" (act_expand): Sigmoid()\n",
|
385 |
+
" )\n",
|
386 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
387 |
+
" (project_conv): Conv2d(40, 24, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
388 |
+
" (project_bn): BatchNorm2d(24, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
389 |
+
" (dropout): Dropout(p=0.0, inplace=False)\n",
|
390 |
+
" )\n",
|
391 |
+
" )\n",
|
392 |
+
" (1): EfficientNetBlock(\n",
|
393 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
394 |
+
" (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
|
395 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=24, bias=False)\n",
|
396 |
+
" (depthwise_norm): BatchNorm2d(24, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
397 |
+
" (depthwise_act): SiLU()\n",
|
398 |
+
" )\n",
|
399 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
400 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
401 |
+
" (reduce): Conv2d(24, 6, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
402 |
+
" (expand): Conv2d(6, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
403 |
+
" (act_reduce): SiLU()\n",
|
404 |
+
" (act_expand): Sigmoid()\n",
|
405 |
+
" )\n",
|
406 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
407 |
+
" (project_conv): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
408 |
+
" (project_bn): BatchNorm2d(24, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
409 |
+
" (dropout): Dropout(p=0.007692307692307693, inplace=False)\n",
|
410 |
+
" )\n",
|
411 |
+
" )\n",
|
412 |
+
" (2): EfficientNetBlock(\n",
|
413 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
414 |
+
" (expand_conv): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
415 |
+
" (expand_bn): BatchNorm2d(144, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
416 |
+
" (expand_act): SiLU()\n",
|
417 |
+
" )\n",
|
418 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
419 |
+
" (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
|
420 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=valid, groups=144, bias=False)\n",
|
421 |
+
" (depthwise_norm): BatchNorm2d(144, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
422 |
+
" (depthwise_act): SiLU()\n",
|
423 |
+
" )\n",
|
424 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
425 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
426 |
+
" (reduce): Conv2d(144, 6, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
427 |
+
" (expand): Conv2d(6, 144, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
428 |
+
" (act_reduce): SiLU()\n",
|
429 |
+
" (act_expand): Sigmoid()\n",
|
430 |
+
" )\n",
|
431 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
432 |
+
" (project_conv): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
433 |
+
" (project_bn): BatchNorm2d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
434 |
+
" (dropout): Dropout(p=0.015384615384615385, inplace=False)\n",
|
435 |
+
" )\n",
|
436 |
+
" )\n",
|
437 |
+
" (3): EfficientNetBlock(\n",
|
438 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
439 |
+
" (expand_conv): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
440 |
+
" (expand_bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
441 |
+
" (expand_act): SiLU()\n",
|
442 |
+
" )\n",
|
443 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
444 |
+
" (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
|
445 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=192, bias=False)\n",
|
446 |
+
" (depthwise_norm): BatchNorm2d(192, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
447 |
+
" (depthwise_act): SiLU()\n",
|
448 |
+
" )\n",
|
449 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
450 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
451 |
+
" (reduce): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
452 |
+
" (expand): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
453 |
+
" (act_reduce): SiLU()\n",
|
454 |
+
" (act_expand): Sigmoid()\n",
|
455 |
+
" )\n",
|
456 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
457 |
+
" (project_conv): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
458 |
+
" (project_bn): BatchNorm2d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
459 |
+
" (dropout): Dropout(p=0.02307692307692308, inplace=False)\n",
|
460 |
+
" )\n",
|
461 |
+
" )\n",
|
462 |
+
" (4): EfficientNetBlock(\n",
|
463 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
464 |
+
" (expand_conv): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
465 |
+
" (expand_bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
466 |
+
" (expand_act): SiLU()\n",
|
467 |
+
" )\n",
|
468 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
469 |
+
" (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
|
470 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=192, bias=False)\n",
|
471 |
+
" (depthwise_norm): BatchNorm2d(192, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
472 |
+
" (depthwise_act): SiLU()\n",
|
473 |
+
" )\n",
|
474 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
475 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
476 |
+
" (reduce): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
477 |
+
" (expand): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
478 |
+
" (act_reduce): SiLU()\n",
|
479 |
+
" (act_expand): Sigmoid()\n",
|
480 |
+
" )\n",
|
481 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
482 |
+
" (project_conv): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
483 |
+
" (project_bn): BatchNorm2d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
484 |
+
" (dropout): Dropout(p=0.03076923076923077, inplace=False)\n",
|
485 |
+
" )\n",
|
486 |
+
" )\n",
|
487 |
+
" (5): EfficientNetBlock(\n",
|
488 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
489 |
+
" (expand_conv): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
490 |
+
" (expand_bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
491 |
+
" (expand_act): SiLU()\n",
|
492 |
+
" )\n",
|
493 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
494 |
+
" (depthwise_conv_pad): ZeroPad2d((2, 2, 2, 2))\n",
|
495 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(192, 192, kernel_size=(5, 5), stride=(2, 2), padding=valid, groups=192, bias=False)\n",
|
496 |
+
" (depthwise_norm): BatchNorm2d(192, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
497 |
+
" (depthwise_act): SiLU()\n",
|
498 |
+
" )\n",
|
499 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
500 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
501 |
+
" (reduce): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
502 |
+
" (expand): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
503 |
+
" (act_reduce): SiLU()\n",
|
504 |
+
" (act_expand): Sigmoid()\n",
|
505 |
+
" )\n",
|
506 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
507 |
+
" (project_conv): Conv2d(192, 48, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
508 |
+
" (project_bn): BatchNorm2d(48, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
509 |
+
" (dropout): Dropout(p=0.038461538461538464, inplace=False)\n",
|
510 |
+
" )\n",
|
511 |
+
" )\n",
|
512 |
+
" (6): EfficientNetBlock(\n",
|
513 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
514 |
+
" (expand_conv): Conv2d(48, 288, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
515 |
+
" (expand_bn): BatchNorm2d(288, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
516 |
+
" (expand_act): SiLU()\n",
|
517 |
+
" )\n",
|
518 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
519 |
+
" (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
|
520 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(288, 288, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=288, bias=False)\n",
|
521 |
+
" (depthwise_norm): BatchNorm2d(288, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
522 |
+
" (depthwise_act): SiLU()\n",
|
523 |
+
" )\n",
|
524 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
525 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
526 |
+
" (reduce): Conv2d(288, 12, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
527 |
+
" (expand): Conv2d(12, 288, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
528 |
+
" (act_reduce): SiLU()\n",
|
529 |
+
" (act_expand): Sigmoid()\n",
|
530 |
+
" )\n",
|
531 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
532 |
+
" (project_conv): Conv2d(288, 48, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
533 |
+
" (project_bn): BatchNorm2d(48, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
534 |
+
" (dropout): Dropout(p=0.04615384615384616, inplace=False)\n",
|
535 |
+
" )\n",
|
536 |
+
" )\n",
|
537 |
+
" (7): EfficientNetBlock(\n",
|
538 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
539 |
+
" (expand_conv): Conv2d(48, 288, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
540 |
+
" (expand_bn): BatchNorm2d(288, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
541 |
+
" (expand_act): SiLU()\n",
|
542 |
+
" )\n",
|
543 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
544 |
+
" (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
|
545 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(288, 288, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=288, bias=False)\n",
|
546 |
+
" (depthwise_norm): BatchNorm2d(288, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
547 |
+
" (depthwise_act): SiLU()\n",
|
548 |
+
" )\n",
|
549 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
550 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
551 |
+
" (reduce): Conv2d(288, 12, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
552 |
+
" (expand): Conv2d(12, 288, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
553 |
+
" (act_reduce): SiLU()\n",
|
554 |
+
" (act_expand): Sigmoid()\n",
|
555 |
+
" )\n",
|
556 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
557 |
+
" (project_conv): Conv2d(288, 48, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
558 |
+
" (project_bn): BatchNorm2d(48, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
559 |
+
" (dropout): Dropout(p=0.05384615384615385, inplace=False)\n",
|
560 |
+
" )\n",
|
561 |
+
" )\n",
|
562 |
+
" (8): EfficientNetBlock(\n",
|
563 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
564 |
+
" (expand_conv): Conv2d(48, 288, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
565 |
+
" (expand_bn): BatchNorm2d(288, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
566 |
+
" (expand_act): SiLU()\n",
|
567 |
+
" )\n",
|
568 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
569 |
+
" (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
|
570 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(288, 288, kernel_size=(3, 3), stride=(2, 2), padding=valid, groups=288, bias=False)\n",
|
571 |
+
" (depthwise_norm): BatchNorm2d(288, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
572 |
+
" (depthwise_act): SiLU()\n",
|
573 |
+
" )\n",
|
574 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
575 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
576 |
+
" (reduce): Conv2d(288, 12, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
577 |
+
" (expand): Conv2d(12, 288, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
578 |
+
" (act_reduce): SiLU()\n",
|
579 |
+
" (act_expand): Sigmoid()\n",
|
580 |
+
" )\n",
|
581 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
582 |
+
" (project_conv): Conv2d(288, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
583 |
+
" (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
584 |
+
" (dropout): Dropout(p=0.06153846153846154, inplace=False)\n",
|
585 |
+
" )\n",
|
586 |
+
" )\n",
|
587 |
+
" (9): EfficientNetBlock(\n",
|
588 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
589 |
+
" (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
590 |
+
" (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
591 |
+
" (expand_act): SiLU()\n",
|
592 |
+
" )\n",
|
593 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
594 |
+
" (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
|
595 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=576, bias=False)\n",
|
596 |
+
" (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
597 |
+
" (depthwise_act): SiLU()\n",
|
598 |
+
" )\n",
|
599 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
600 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
601 |
+
" (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
602 |
+
" (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
603 |
+
" (act_reduce): SiLU()\n",
|
604 |
+
" (act_expand): Sigmoid()\n",
|
605 |
+
" )\n",
|
606 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
607 |
+
" (project_conv): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
608 |
+
" (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
609 |
+
" (dropout): Dropout(p=0.06923076923076923, inplace=False)\n",
|
610 |
+
" )\n",
|
611 |
+
" )\n",
|
612 |
+
" (10): EfficientNetBlock(\n",
|
613 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
614 |
+
" (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
615 |
+
" (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
616 |
+
" (expand_act): SiLU()\n",
|
617 |
+
" )\n",
|
618 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
619 |
+
" (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
|
620 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=576, bias=False)\n",
|
621 |
+
" (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
622 |
+
" (depthwise_act): SiLU()\n",
|
623 |
+
" )\n",
|
624 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
625 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
626 |
+
" (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
627 |
+
" (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
628 |
+
" (act_reduce): SiLU()\n",
|
629 |
+
" (act_expand): Sigmoid()\n",
|
630 |
+
" )\n",
|
631 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
632 |
+
" (project_conv): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
633 |
+
" (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
634 |
+
" (dropout): Dropout(p=0.07692307692307693, inplace=False)\n",
|
635 |
+
" )\n",
|
636 |
+
" )\n",
|
637 |
+
" (11): EfficientNetBlock(\n",
|
638 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
639 |
+
" (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
640 |
+
" (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
641 |
+
" (expand_act): SiLU()\n",
|
642 |
+
" )\n",
|
643 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
644 |
+
" (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
|
645 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=576, bias=False)\n",
|
646 |
+
" (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
647 |
+
" (depthwise_act): SiLU()\n",
|
648 |
+
" )\n",
|
649 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
650 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
651 |
+
" (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
652 |
+
" (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
653 |
+
" (act_reduce): SiLU()\n",
|
654 |
+
" (act_expand): Sigmoid()\n",
|
655 |
+
" )\n",
|
656 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
657 |
+
" (project_conv): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
658 |
+
" (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
659 |
+
" (dropout): Dropout(p=0.08461538461538462, inplace=False)\n",
|
660 |
+
" )\n",
|
661 |
+
" )\n",
|
662 |
+
" (12): EfficientNetBlock(\n",
|
663 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
664 |
+
" (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
665 |
+
" (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
666 |
+
" (expand_act): SiLU()\n",
|
667 |
+
" )\n",
|
668 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
669 |
+
" (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
|
670 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=576, bias=False)\n",
|
671 |
+
" (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
672 |
+
" (depthwise_act): SiLU()\n",
|
673 |
+
" )\n",
|
674 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
675 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
676 |
+
" (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
677 |
+
" (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
678 |
+
" (act_reduce): SiLU()\n",
|
679 |
+
" (act_expand): Sigmoid()\n",
|
680 |
+
" )\n",
|
681 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
682 |
+
" (project_conv): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
683 |
+
" (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
684 |
+
" (dropout): Dropout(p=0.09230769230769233, inplace=False)\n",
|
685 |
+
" )\n",
|
686 |
+
" )\n",
|
687 |
+
" (13): EfficientNetBlock(\n",
|
688 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
689 |
+
" (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
690 |
+
" (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
691 |
+
" (expand_act): SiLU()\n",
|
692 |
+
" )\n",
|
693 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
694 |
+
" (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
|
695 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=576, bias=False)\n",
|
696 |
+
" (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
697 |
+
" (depthwise_act): SiLU()\n",
|
698 |
+
" )\n",
|
699 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
700 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
701 |
+
" (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
702 |
+
" (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
703 |
+
" (act_reduce): SiLU()\n",
|
704 |
+
" (act_expand): Sigmoid()\n",
|
705 |
+
" )\n",
|
706 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
707 |
+
" (project_conv): Conv2d(576, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
708 |
+
" (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
709 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
710 |
+
" )\n",
|
711 |
+
" )\n",
|
712 |
+
" (14): EfficientNetBlock(\n",
|
713 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
714 |
+
" (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
715 |
+
" (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
716 |
+
" (expand_act): SiLU()\n",
|
717 |
+
" )\n",
|
718 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
719 |
+
" (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
|
720 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=816, bias=False)\n",
|
721 |
+
" (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
722 |
+
" (depthwise_act): SiLU()\n",
|
723 |
+
" )\n",
|
724 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
725 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
726 |
+
" (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
727 |
+
" (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
728 |
+
" (act_reduce): SiLU()\n",
|
729 |
+
" (act_expand): Sigmoid()\n",
|
730 |
+
" )\n",
|
731 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
732 |
+
" (project_conv): Conv2d(816, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
733 |
+
" (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
734 |
+
" (dropout): Dropout(p=0.1076923076923077, inplace=False)\n",
|
735 |
+
" )\n",
|
736 |
+
" )\n",
|
737 |
+
" (15): EfficientNetBlock(\n",
|
738 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
739 |
+
" (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
740 |
+
" (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
741 |
+
" (expand_act): SiLU()\n",
|
742 |
+
" )\n",
|
743 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
744 |
+
" (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
|
745 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=816, bias=False)\n",
|
746 |
+
" (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
747 |
+
" (depthwise_act): SiLU()\n",
|
748 |
+
" )\n",
|
749 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
750 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
751 |
+
" (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
752 |
+
" (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
753 |
+
" (act_reduce): SiLU()\n",
|
754 |
+
" (act_expand): Sigmoid()\n",
|
755 |
+
" )\n",
|
756 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
757 |
+
" (project_conv): Conv2d(816, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
758 |
+
" (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
759 |
+
" (dropout): Dropout(p=0.11538461538461539, inplace=False)\n",
|
760 |
+
" )\n",
|
761 |
+
" )\n",
|
762 |
+
" (16): EfficientNetBlock(\n",
|
763 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
764 |
+
" (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
765 |
+
" (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
766 |
+
" (expand_act): SiLU()\n",
|
767 |
+
" )\n",
|
768 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
769 |
+
" (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
|
770 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=816, bias=False)\n",
|
771 |
+
" (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
772 |
+
" (depthwise_act): SiLU()\n",
|
773 |
+
" )\n",
|
774 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
775 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
776 |
+
" (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
777 |
+
" (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
778 |
+
" (act_reduce): SiLU()\n",
|
779 |
+
" (act_expand): Sigmoid()\n",
|
780 |
+
" )\n",
|
781 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
782 |
+
" (project_conv): Conv2d(816, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
783 |
+
" (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
784 |
+
" (dropout): Dropout(p=0.12307692307692308, inplace=False)\n",
|
785 |
+
" )\n",
|
786 |
+
" )\n",
|
787 |
+
" (17): EfficientNetBlock(\n",
|
788 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
789 |
+
" (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
790 |
+
" (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
791 |
+
" (expand_act): SiLU()\n",
|
792 |
+
" )\n",
|
793 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
794 |
+
" (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
|
795 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=816, bias=False)\n",
|
796 |
+
" (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
797 |
+
" (depthwise_act): SiLU()\n",
|
798 |
+
" )\n",
|
799 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
800 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
801 |
+
" (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
802 |
+
" (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
803 |
+
" (act_reduce): SiLU()\n",
|
804 |
+
" (act_expand): Sigmoid()\n",
|
805 |
+
" )\n",
|
806 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
807 |
+
" (project_conv): Conv2d(816, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
808 |
+
" (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
809 |
+
" (dropout): Dropout(p=0.13076923076923078, inplace=False)\n",
|
810 |
+
" )\n",
|
811 |
+
" )\n",
|
812 |
+
" (18): EfficientNetBlock(\n",
|
813 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
814 |
+
" (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
815 |
+
" (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
816 |
+
" (expand_act): SiLU()\n",
|
817 |
+
" )\n",
|
818 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
819 |
+
" (depthwise_conv_pad): ZeroPad2d((2, 2, 2, 2))\n",
|
820 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(2, 2), padding=valid, groups=816, bias=False)\n",
|
821 |
+
" (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
822 |
+
" (depthwise_act): SiLU()\n",
|
823 |
+
" )\n",
|
824 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
825 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
826 |
+
" (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
827 |
+
" (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
828 |
+
" (act_reduce): SiLU()\n",
|
829 |
+
" (act_expand): Sigmoid()\n",
|
830 |
+
" )\n",
|
831 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
832 |
+
" (project_conv): Conv2d(816, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
833 |
+
" (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
834 |
+
" (dropout): Dropout(p=0.13846153846153847, inplace=False)\n",
|
835 |
+
" )\n",
|
836 |
+
" )\n",
|
837 |
+
" (19): EfficientNetBlock(\n",
|
838 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
839 |
+
" (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
840 |
+
" (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
841 |
+
" (expand_act): SiLU()\n",
|
842 |
+
" )\n",
|
843 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
844 |
+
" (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
|
845 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
|
846 |
+
" (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
847 |
+
" (depthwise_act): SiLU()\n",
|
848 |
+
" )\n",
|
849 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
850 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
851 |
+
" (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
852 |
+
" (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
853 |
+
" (act_reduce): SiLU()\n",
|
854 |
+
" (act_expand): Sigmoid()\n",
|
855 |
+
" )\n",
|
856 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
857 |
+
" (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
858 |
+
" (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
859 |
+
" (dropout): Dropout(p=0.14615384615384616, inplace=False)\n",
|
860 |
+
" )\n",
|
861 |
+
" )\n",
|
862 |
+
" (20): EfficientNetBlock(\n",
|
863 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
864 |
+
" (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
865 |
+
" (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
866 |
+
" (expand_act): SiLU()\n",
|
867 |
+
" )\n",
|
868 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
869 |
+
" (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
|
870 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
|
871 |
+
" (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
872 |
+
" (depthwise_act): SiLU()\n",
|
873 |
+
" )\n",
|
874 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
875 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
876 |
+
" (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
877 |
+
" (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
878 |
+
" (act_reduce): SiLU()\n",
|
879 |
+
" (act_expand): Sigmoid()\n",
|
880 |
+
" )\n",
|
881 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
882 |
+
" (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
883 |
+
" (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
884 |
+
" (dropout): Dropout(p=0.15384615384615385, inplace=False)\n",
|
885 |
+
" )\n",
|
886 |
+
" )\n",
|
887 |
+
" (21): EfficientNetBlock(\n",
|
888 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
889 |
+
" (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
890 |
+
" (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
891 |
+
" (expand_act): SiLU()\n",
|
892 |
+
" )\n",
|
893 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
894 |
+
" (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
|
895 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
|
896 |
+
" (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
897 |
+
" (depthwise_act): SiLU()\n",
|
898 |
+
" )\n",
|
899 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
900 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
901 |
+
" (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
902 |
+
" (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
903 |
+
" (act_reduce): SiLU()\n",
|
904 |
+
" (act_expand): Sigmoid()\n",
|
905 |
+
" )\n",
|
906 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
907 |
+
" (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
908 |
+
" (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
909 |
+
" (dropout): Dropout(p=0.16153846153846155, inplace=False)\n",
|
910 |
+
" )\n",
|
911 |
+
" )\n",
|
912 |
+
" (22): EfficientNetBlock(\n",
|
913 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
914 |
+
" (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
915 |
+
" (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
916 |
+
" (expand_act): SiLU()\n",
|
917 |
+
" )\n",
|
918 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
919 |
+
" (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
|
920 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
|
921 |
+
" (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
922 |
+
" (depthwise_act): SiLU()\n",
|
923 |
+
" )\n",
|
924 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
925 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
926 |
+
" (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
927 |
+
" (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
928 |
+
" (act_reduce): SiLU()\n",
|
929 |
+
" (act_expand): Sigmoid()\n",
|
930 |
+
" )\n",
|
931 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
932 |
+
" (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
933 |
+
" (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
934 |
+
" (dropout): Dropout(p=0.16923076923076924, inplace=False)\n",
|
935 |
+
" )\n",
|
936 |
+
" )\n",
|
937 |
+
" (23): EfficientNetBlock(\n",
|
938 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
939 |
+
" (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
940 |
+
" (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
941 |
+
" (expand_act): SiLU()\n",
|
942 |
+
" )\n",
|
943 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
944 |
+
" (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
|
945 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
|
946 |
+
" (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
947 |
+
" (depthwise_act): SiLU()\n",
|
948 |
+
" )\n",
|
949 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
950 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
951 |
+
" (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
952 |
+
" (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
953 |
+
" (act_reduce): SiLU()\n",
|
954 |
+
" (act_expand): Sigmoid()\n",
|
955 |
+
" )\n",
|
956 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
957 |
+
" (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
958 |
+
" (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
959 |
+
" (dropout): Dropout(p=0.17692307692307693, inplace=False)\n",
|
960 |
+
" )\n",
|
961 |
+
" )\n",
|
962 |
+
" (24): EfficientNetBlock(\n",
|
963 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
964 |
+
" (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
965 |
+
" (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
966 |
+
" (expand_act): SiLU()\n",
|
967 |
+
" )\n",
|
968 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
969 |
+
" (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
|
970 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
|
971 |
+
" (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
972 |
+
" (depthwise_act): SiLU()\n",
|
973 |
+
" )\n",
|
974 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
975 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
976 |
+
" (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
977 |
+
" (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
978 |
+
" (act_reduce): SiLU()\n",
|
979 |
+
" (act_expand): Sigmoid()\n",
|
980 |
+
" )\n",
|
981 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
982 |
+
" (project_conv): Conv2d(1392, 384, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
983 |
+
" (project_bn): BatchNorm2d(384, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
984 |
+
" (dropout): Dropout(p=0.18461538461538465, inplace=False)\n",
|
985 |
+
" )\n",
|
986 |
+
" )\n",
|
987 |
+
" (25): EfficientNetBlock(\n",
|
988 |
+
" (expansion): EfficientNetExpansionLayer(\n",
|
989 |
+
" (expand_conv): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
990 |
+
" (expand_bn): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
|
991 |
+
" (expand_act): SiLU()\n",
|
992 |
+
" )\n",
|
993 |
+
" (depthwise_conv): EfficientNetDepthwiseLayer(\n",
|
994 |
+
" (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
|
995 |
+
" (depthwise_conv): EfficientNetDepthwiseConv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=2304, bias=False)\n",
|
996 |
+
" (depthwise_norm): BatchNorm2d(2304, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
997 |
+
" (depthwise_act): SiLU()\n",
|
998 |
+
" )\n",
|
999 |
+
" (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
|
1000 |
+
" (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
|
1001 |
+
" (reduce): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
1002 |
+
" (expand): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
|
1003 |
+
" (act_reduce): SiLU()\n",
|
1004 |
+
" (act_expand): Sigmoid()\n",
|
1005 |
+
" )\n",
|
1006 |
+
" (projection): EfficientNetFinalBlockLayer(\n",
|
1007 |
+
" (project_conv): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
1008 |
+
" (project_bn): BatchNorm2d(384, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
1009 |
+
" (dropout): Dropout(p=0.19230769230769232, inplace=False)\n",
|
1010 |
+
" )\n",
|
1011 |
+
" )\n",
|
1012 |
+
" )\n",
|
1013 |
+
" (top_conv): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
|
1014 |
+
" (top_bn): BatchNorm2d(1536, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
|
1015 |
+
" (top_activation): SiLU()\n",
|
1016 |
+
" )\n",
|
1017 |
+
" (pooler): AvgPool2d(kernel_size=1536, stride=1536, padding=0)\n",
|
1018 |
+
" )\n",
|
1019 |
+
" (dropout): Dropout(p=0.3, inplace=False)\n",
|
1020 |
+
" (classifier): Linear(in_features=1536, out_features=3872, bias=True)\n",
|
1021 |
+
")"
|
1022 |
+
]
|
1023 |
+
},
|
1024 |
+
"execution_count": 12,
|
1025 |
+
"metadata": {},
|
1026 |
+
"output_type": "execute_result"
|
1027 |
+
}
|
1028 |
+
],
|
1029 |
+
"source": [
|
1030 |
+
"model"
|
1031 |
+
]
|
1032 |
+
},
|
1033 |
+
{
|
1034 |
+
"cell_type": "code",
|
1035 |
+
"execution_count": null,
|
1036 |
+
"metadata": {},
|
1037 |
+
"outputs": [],
|
1038 |
+
"source": []
|
1039 |
+
}
|
1040 |
+
],
|
1041 |
+
"metadata": {
|
1042 |
+
"kernelspec": {
|
1043 |
+
"display_name": "ArtifactClassification",
|
1044 |
+
"language": "python",
|
1045 |
+
"name": "python3"
|
1046 |
+
},
|
1047 |
+
"language_info": {
|
1048 |
+
"codemirror_mode": {
|
1049 |
+
"name": "ipython",
|
1050 |
+
"version": 3
|
1051 |
+
},
|
1052 |
+
"file_extension": ".py",
|
1053 |
+
"mimetype": "text/x-python",
|
1054 |
+
"name": "python",
|
1055 |
+
"nbconvert_exporter": "python",
|
1056 |
+
"pygments_lexer": "ipython3",
|
1057 |
+
"version": "3.10.12"
|
1058 |
+
}
|
1059 |
+
},
|
1060 |
+
"nbformat": 4,
|
1061 |
+
"nbformat_minor": 2
|
1062 |
+
}
|
4.0-assessing_BM_dataset.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
5.0-assessing_date_prediction.ipynb
ADDED
File without changes
|
README.md
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- generated_from_trainer
|
4 |
+
model-index:
|
5 |
+
- name: test
|
6 |
+
results: []
|
7 |
+
---
|
8 |
+
|
9 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
10 |
+
should probably proofread and complete it, then remove this comment. -->
|
11 |
+
|
12 |
+
# test
|
13 |
+
|
14 |
+
This model was trained from scratch on the None dataset.
|
15 |
+
|
16 |
+
## Model description
|
17 |
+
|
18 |
+
More information needed
|
19 |
+
|
20 |
+
## Intended uses & limitations
|
21 |
+
|
22 |
+
More information needed
|
23 |
+
|
24 |
+
## Training and evaluation data
|
25 |
+
|
26 |
+
More information needed
|
27 |
+
|
28 |
+
## Training procedure
|
29 |
+
|
30 |
+
### Training hyperparameters
|
31 |
+
|
32 |
+
The following hyperparameters were used during training:
|
33 |
+
- learning_rate: 5e-05
|
34 |
+
- train_batch_size: 128
|
35 |
+
- eval_batch_size: 128
|
36 |
+
- seed: 42
|
37 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
38 |
+
- lr_scheduler_type: linear
|
39 |
+
- num_epochs: 100
|
40 |
+
- mixed_precision_training: Native AMP
|
41 |
+
|
42 |
+
### Framework versions
|
43 |
+
|
44 |
+
- Transformers 4.38.2
|
45 |
+
- Pytorch 2.2.1+cu121
|
46 |
+
- Datasets 2.18.0
|
47 |
+
- Tokenizers 0.15.2
|
config.json
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "../models/james-burton/BritishMuseum-white/bm5-white_date_log/rose-blaze-27/checkpoint-5310",
|
3 |
+
"architectures": [
|
4 |
+
"EfficientNetForImageClassification"
|
5 |
+
],
|
6 |
+
"batch_norm_eps": 0.001,
|
7 |
+
"batch_norm_momentum": 0.99,
|
8 |
+
"depth_coefficient": 1.4,
|
9 |
+
"depth_divisor": 8,
|
10 |
+
"depthwise_padding": [
|
11 |
+
5,
|
12 |
+
18
|
13 |
+
],
|
14 |
+
"drop_connect_rate": 0.2,
|
15 |
+
"dropout_rate": 0.3,
|
16 |
+
"expand_ratios": [
|
17 |
+
1,
|
18 |
+
6,
|
19 |
+
6,
|
20 |
+
6,
|
21 |
+
6,
|
22 |
+
6,
|
23 |
+
6
|
24 |
+
],
|
25 |
+
"hidden_act": "swish",
|
26 |
+
"hidden_dim": 1536,
|
27 |
+
"id2label": {
|
28 |
+
"0": "LABEL_0"
|
29 |
+
},
|
30 |
+
"image_size": 300,
|
31 |
+
"in_channels": [
|
32 |
+
32,
|
33 |
+
16,
|
34 |
+
24,
|
35 |
+
40,
|
36 |
+
80,
|
37 |
+
112,
|
38 |
+
192
|
39 |
+
],
|
40 |
+
"initializer_range": 0.02,
|
41 |
+
"kernel_sizes": [
|
42 |
+
3,
|
43 |
+
3,
|
44 |
+
5,
|
45 |
+
3,
|
46 |
+
5,
|
47 |
+
5,
|
48 |
+
3
|
49 |
+
],
|
50 |
+
"label2id": {
|
51 |
+
"LABEL_0": 0
|
52 |
+
},
|
53 |
+
"model_type": "efficientnet",
|
54 |
+
"num_block_repeats": [
|
55 |
+
1,
|
56 |
+
2,
|
57 |
+
2,
|
58 |
+
3,
|
59 |
+
3,
|
60 |
+
4,
|
61 |
+
1
|
62 |
+
],
|
63 |
+
"num_channels": 3,
|
64 |
+
"num_hidden_layers": 64,
|
65 |
+
"out_channels": [
|
66 |
+
16,
|
67 |
+
24,
|
68 |
+
40,
|
69 |
+
80,
|
70 |
+
112,
|
71 |
+
192,
|
72 |
+
320
|
73 |
+
],
|
74 |
+
"pooling_type": "mean",
|
75 |
+
"squeeze_expansion_ratio": 0.25,
|
76 |
+
"strides": [
|
77 |
+
1,
|
78 |
+
2,
|
79 |
+
2,
|
80 |
+
2,
|
81 |
+
1,
|
82 |
+
2,
|
83 |
+
1
|
84 |
+
],
|
85 |
+
"torch_dtype": "float32",
|
86 |
+
"transformers_version": "4.38.2",
|
87 |
+
"width_coefficient": 1.2
|
88 |
+
}
|
material_min3.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
2 |
+
No,white,avg,om3-white_material,0.62,0.812,0.869,0.93,0.588,0.612,0.62
|
3 |
+
,,avg+3D,om3-white_material,0.609,0.812,0.871,0.934,0.572,0.598,0.609
|
4 |
+
,white+3Dx1,avg,om3-3Dwhite-1frame_material,0.608,0.793,0.861,0.924,0.586,0.584,0.608
|
5 |
+
,,avg+3D,om3-3Dwhite-1frame_material,0.604,0.794,0.857,0.923,0.579,0.579,0.604
|
6 |
+
,white+3Dx4,avg,om3-3Dwhite_material,0.618,0.809,0.871,0.929,0.596,0.598,0.618
|
7 |
+
,,avg+3D,om3-3Dwhite_material,0.627,0.811,0.873,0.926,0.604,0.607,0.627
|
8 |
+
Yes,white,avg,om3-white_material_bm-pretrn,0.62,0.817,0.883,0.933,0.583,0.579,0.62
|
9 |
+
,,avg+3D,om3-white_material_bm-pretrn,0.628,0.825,0.883,0.936,0.592,0.599,0.628
|
10 |
+
,white+3Dx1,avg,om3-3Dwhite-1frame_material_bm-pretrn,0.62,0.828,0.88,0.936,0.598,0.591,0.62
|
11 |
+
,,avg+3D,om3-3Dwhite-1frame_material_bm-pretrn,0.619,0.819,0.878,0.931,0.602,0.604,0.619
|
12 |
+
,white+3Dx4,avg,om3-3Dwhite_material_bm-pretrn,0.621,0.812,0.873,0.928,0.6,0.601,0.621
|
13 |
+
,,avg+3D,om3-3Dwhite_material_bm-pretrn,0.624,0.809,0.873,0.935,0.609,0.624,0.624
|
material_min3_max1.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
2 |
+
No,white,avg,om3-white_material,0.582,0.782,0.85,0.914,0.553,0.578,0.582
|
3 |
+
,,avg+3D,om3-white_material,0.576,0.784,0.85,0.92,0.543,0.572,0.576
|
4 |
+
,white+3Dx1,avg,om3-3Dwhite-1frame_material,0.573,0.759,0.838,0.91,0.555,0.558,0.573
|
5 |
+
,,avg+3D,om3-3Dwhite-1frame_material,0.567,0.762,0.84,0.915,0.548,0.556,0.567
|
6 |
+
,white+3Dx4,avg,om3-3Dwhite_material,0.575,0.777,0.843,0.912,0.557,0.561,0.575
|
7 |
+
,,avg+3D,om3-3Dwhite_material,0.583,0.779,0.849,0.912,0.563,0.565,0.583
|
8 |
+
Yes,white,avg,om3-white_material_bm-pretrn,0.587,0.787,0.856,0.917,0.555,0.551,0.587
|
9 |
+
,,avg+3D,om3-white_material_bm-pretrn,0.596,0.797,0.867,0.922,0.566,0.571,0.596
|
10 |
+
,white+3Dx1,avg,om3-3Dwhite-1frame_material_bm-pretrn,0.59,0.8,0.861,0.925,0.571,0.566,0.59
|
11 |
+
,,avg+3D,om3-3Dwhite-1frame_material_bm-pretrn,0.59,0.79,0.852,0.918,0.578,0.583,0.59
|
12 |
+
,white+3Dx4,avg,om3-3Dwhite_material_bm-pretrn,0.582,0.778,0.85,0.911,0.562,0.56,0.582
|
13 |
+
,,avg+3D,om3-3Dwhite_material_bm-pretrn,0.583,0.779,0.841,0.913,0.572,0.581,0.583
|
material_min4.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
2 |
+
No,white,avg,om4-white_material,0.617,0.817,0.868,0.926,0.587,0.596,0.617
|
3 |
+
,,avg+3D,om4-white_material,0.608,0.808,0.865,0.925,0.571,0.582,0.608
|
4 |
+
,white+3Dx1,avg,om4-3Dwhite-1frame_material,0.62,0.813,0.864,0.92,0.603,0.616,0.62
|
5 |
+
,,avg+3D,om4-3Dwhite-1frame_material,0.625,0.812,0.871,0.919,0.605,0.616,0.625
|
6 |
+
,white+3Dx4,avg,om4-3Dwhite_material,0.621,0.808,0.869,0.918,0.607,0.611,0.621
|
7 |
+
,,avg+3D,om4-3Dwhite_material,0.62,0.808,0.871,0.92,0.605,0.609,0.62
|
8 |
+
Yes,white,avg,om4-white_material_bm-pretrn,0.611,0.805,0.865,0.914,0.577,0.584,0.611
|
9 |
+
,,avg+3D,om4-white_material_bm-pretrn,0.598,0.802,0.87,0.919,0.566,0.582,0.598
|
10 |
+
,white+3Dx1,avg,om4-3Dwhite-1frame_material_bm-pretrn,0.635,0.824,0.877,0.93,0.61,0.608,0.635
|
11 |
+
,,avg+3D,om4-3Dwhite-1frame_material_bm-pretrn,0.64,0.816,0.873,0.924,0.618,0.621,0.64
|
12 |
+
,white+3Dx4,avg,om4-3Dwhite_material_bm-pretrn,0.626,0.822,0.877,0.923,0.601,0.614,0.626
|
13 |
+
,,avg+3D,om4-3Dwhite_material_bm-pretrn,0.628,0.821,0.877,0.928,0.605,0.61,0.628
|
material_min4_max1.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
2 |
+
No,white,avg,om4-white_material,0.585,0.791,0.85,0.912,0.558,0.563,0.585
|
3 |
+
,,avg+3D,om4-white_material,0.579,0.787,0.847,0.915,0.547,0.557,0.579
|
4 |
+
,white+3Dx1,avg,om4-3Dwhite-1frame_material,0.593,0.785,0.839,0.903,0.58,0.587,0.593
|
5 |
+
,,avg+3D,om4-3Dwhite-1frame_material,0.595,0.781,0.845,0.906,0.578,0.582,0.595
|
6 |
+
,white+3Dx4,avg,om4-3Dwhite_material,0.59,0.786,0.841,0.905,0.578,0.583,0.59
|
7 |
+
,,avg+3D,om4-3Dwhite_material,0.587,0.786,0.849,0.906,0.573,0.576,0.587
|
8 |
+
Yes,white,avg,om4-white_material_bm-pretrn,0.575,0.775,0.846,0.899,0.546,0.558,0.575
|
9 |
+
,,avg+3D,om4-white_material_bm-pretrn,0.576,0.771,0.84,0.908,0.549,0.568,0.576
|
10 |
+
,white+3Dx1,avg,om4-3Dwhite-1frame_material_bm-pretrn,0.596,0.799,0.858,0.916,0.573,0.571,0.596
|
11 |
+
,,avg+3D,om4-3Dwhite-1frame_material_bm-pretrn,0.598,0.792,0.859,0.914,0.579,0.582,0.598
|
12 |
+
,white+3Dx4,avg,om4-3Dwhite_material_bm-pretrn,0.59,0.793,0.857,0.91,0.567,0.574,0.59
|
13 |
+
,,avg+3D,om4-3Dwhite_material_bm-pretrn,0.598,0.791,0.859,0.92,0.577,0.578,0.598
|
material_min5.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
2 |
+
No,white,avg,om5-white_material,0.605,0.774,0.84,0.91,0.571,0.609,0.605
|
3 |
+
,,avg+3D,om5-white_material,0.603,0.777,0.845,0.919,0.564,0.603,0.603
|
4 |
+
,white+3Dx1,avg,om5-3Dwhite-1frame_material,0.628,0.806,0.868,0.924,0.6,0.602,0.628
|
5 |
+
,,avg+3D,om5-3Dwhite-1frame_material,0.628,0.809,0.871,0.927,0.597,0.602,0.628
|
6 |
+
,white+3Dx4,avg,om5-3Dwhite_material,0.63,0.816,0.874,0.924,0.598,0.601,0.63
|
7 |
+
,,avg+3D,om5-3Dwhite_material,0.622,0.818,0.877,0.927,0.587,0.588,0.622
|
8 |
+
Yes,white,avg,om5-white_material_bm-pretrn,0.605,0.791,0.855,0.912,0.572,0.586,0.605
|
9 |
+
,,avg+3D,om5-white_material_bm-pretrn,0.592,0.784,0.847,0.909,0.557,0.582,0.592
|
10 |
+
,white+3Dx1,avg,om5-3Dwhite-1frame_material_bm-pretrn,0.601,0.812,0.875,0.925,0.57,0.567,0.601
|
11 |
+
,,avg+3D,om5-3Dwhite-1frame_material_bm-pretrn,0.622,0.809,0.875,0.926,0.595,0.598,0.622
|
12 |
+
,white+3Dx4,avg,om5-3Dwhite_material_bm-pretrn,0.633,0.819,0.88,0.93,0.607,0.607,0.633
|
13 |
+
,,avg+3D,om5-3Dwhite_material_bm-pretrn,0.642,0.824,0.879,0.929,0.627,0.632,0.642
|
material_min5_max1.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
2 |
+
No,white,avg,om5-white_material,0.565,0.742,0.813,0.887,0.536,0.569,0.565
|
3 |
+
,,avg+3D,om5-white_material,0.568,0.734,0.822,0.899,0.532,0.576,0.568
|
4 |
+
,white+3Dx1,avg,om5-3Dwhite-1frame_material,0.604,0.774,0.84,0.915,0.58,0.581,0.604
|
5 |
+
,,avg+3D,om5-3Dwhite-1frame_material,0.599,0.777,0.846,0.919,0.573,0.574,0.599
|
6 |
+
,white+3Dx4,avg,om5-3Dwhite_material,0.593,0.792,0.849,0.911,0.565,0.563,0.593
|
7 |
+
,,avg+3D,om5-3Dwhite_material,0.591,0.79,0.85,0.917,0.562,0.567,0.591
|
8 |
+
Yes,white,avg,om5-white_material_bm-pretrn,0.569,0.767,0.831,0.902,0.54,0.547,0.569
|
9 |
+
,,avg+3D,om5-white_material_bm-pretrn,0.562,0.757,0.827,0.897,0.526,0.542,0.562
|
10 |
+
,white+3Dx1,avg,om5-3Dwhite-1frame_material_bm-pretrn,0.576,0.785,0.853,0.915,0.548,0.542,0.576
|
11 |
+
,,avg+3D,om5-3Dwhite-1frame_material_bm-pretrn,0.594,0.779,0.852,0.91,0.57,0.576,0.594
|
12 |
+
,white+3Dx4,avg,om5-3Dwhite_material_bm-pretrn,0.6,0.794,0.856,0.923,0.577,0.572,0.6
|
13 |
+
,,avg+3D,om5-3Dwhite_material_bm-pretrn,0.606,0.792,0.852,0.915,0.593,0.597,0.606
|
material_min6.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
2 |
+
No,white,avg,om6-white_material,0.632,0.817,0.878,0.927,0.606,0.598,0.632
|
3 |
+
,,avg+3D,om6-white_material,0.632,0.811,0.869,0.924,0.602,0.593,0.632
|
4 |
+
,white+3Dx1,avg,om6-3Dwhite-1frame_material,0.615,0.817,0.878,0.929,0.593,0.617,0.615
|
5 |
+
,,avg+3D,om6-3Dwhite-1frame_material,0.62,0.818,0.88,0.928,0.597,0.623,0.62
|
6 |
+
,white+3Dx4,avg,om6-3Dwhite_material,0.628,0.819,0.879,0.933,0.604,0.611,0.628
|
7 |
+
,,avg+3D,om6-3Dwhite_material,0.624,0.819,0.88,0.933,0.597,0.598,0.624
|
8 |
+
Yes,white,avg,om6-white_material_bm-pretrn,0.641,0.824,0.886,0.934,0.614,0.61,0.641
|
9 |
+
,,avg+3D,om6-white_material_bm-pretrn,0.644,0.834,0.885,0.938,0.617,0.618,0.644
|
10 |
+
,white+3Dx1,avg,om6-3Dwhite-1frame_material_bm-pretrn,0.622,0.818,0.88,0.935,0.593,0.59,0.622
|
11 |
+
,,avg+3D,om6-3Dwhite-1frame_material_bm-pretrn,0.617,0.813,0.879,0.932,0.586,0.584,0.617
|
12 |
+
,white+3Dx4,avg,om6-3Dwhite_material_bm-pretrn,0.64,0.824,0.879,0.936,0.615,0.615,0.64
|
13 |
+
,,avg+3D,om6-3Dwhite_material_bm-pretrn,0.648,0.827,0.883,0.94,0.622,0.622,0.648
|
material_min6_max1.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
2 |
+
No,white,avg,om6-white_material,0.603,0.793,0.854,0.911,0.58,0.572,0.603
|
3 |
+
,,avg+3D,om6-white_material,0.6,0.79,0.856,0.91,0.574,0.568,0.6
|
4 |
+
,white+3Dx1,avg,om6-3Dwhite-1frame_material,0.578,0.786,0.852,0.915,0.556,0.574,0.578
|
5 |
+
,,avg+3D,om6-3Dwhite-1frame_material,0.581,0.793,0.858,0.917,0.558,0.58,0.581
|
6 |
+
,white+3Dx4,avg,om6-3Dwhite_material,0.592,0.795,0.858,0.919,0.57,0.568,0.592
|
7 |
+
,,avg+3D,om6-3Dwhite_material,0.597,0.793,0.858,0.922,0.571,0.567,0.597
|
8 |
+
Yes,white,avg,om6-white_material_bm-pretrn,0.612,0.804,0.865,0.917,0.587,0.579,0.612
|
9 |
+
,,avg+3D,om6-white_material_bm-pretrn,0.613,0.812,0.864,0.922,0.589,0.586,0.613
|
10 |
+
,white+3Dx1,avg,om6-3Dwhite-1frame_material_bm-pretrn,0.598,0.791,0.864,0.919,0.573,0.566,0.598
|
11 |
+
,,avg+3D,om6-3Dwhite-1frame_material_bm-pretrn,0.599,0.787,0.863,0.924,0.571,0.568,0.599
|
12 |
+
,white+3Dx4,avg,om6-3Dwhite_material_bm-pretrn,0.597,0.793,0.859,0.924,0.575,0.575,0.597
|
13 |
+
,,avg+3D,om6-3Dwhite_material_bm-pretrn,0.609,0.799,0.867,0.927,0.585,0.58,0.609
|
material_x_plus3Ds.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
2 |
+
No,white,avg,om3-white_material,0.62,0.812,0.869,0.93,0.588,0.612,0.62
|
3 |
+
,,avg+3D,om3-white_material,0.609,0.812,0.871,0.934,0.572,0.598,0.609
|
4 |
+
,white+3Dx1,avg,om3-3Dwhite-1frame_material,0.608,0.793,0.861,0.924,0.586,0.584,0.608
|
5 |
+
,,avg+3D,om3-3Dwhite-1frame_material,0.604,0.794,0.857,0.923,0.579,0.579,0.604
|
6 |
+
,white+3Dx4,avg,om3-3Dwhite_material,0.618,0.809,0.871,0.929,0.596,0.598,0.618
|
7 |
+
,,avg+3D,om3-3Dwhite_material,0.627,0.811,0.873,0.926,0.604,0.607,0.627
|
8 |
+
Yes,white,avg,om3-white_material_bm-pretrn,0.62,0.817,0.883,0.933,0.583,0.579,0.62
|
9 |
+
,,avg+3D,om3-white_material_bm-pretrn,0.628,0.825,0.883,0.936,0.592,0.599,0.628
|
10 |
+
,white+3Dx1,avg,om3-3Dwhite-1frame_material_bm-pretrn,0.62,0.828,0.88,0.936,0.598,0.591,0.62
|
11 |
+
,,avg+3D,om3-3Dwhite-1frame_material_bm-pretrn,0.619,0.819,0.878,0.931,0.602,0.604,0.619
|
12 |
+
,white+3Dx4,avg,om3-3Dwhite_material_bm-pretrn,0.621,0.812,0.873,0.928,0.6,0.601,0.621
|
13 |
+
,,avg+3D,om3-3Dwhite_material_bm-pretrn,0.624,0.809,0.873,0.935,0.609,0.624,0.624
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3c9863addf5ea52fec2daa05bb2ecbaa772b5750cfd62532070f4ff77b4269a
|
3 |
+
size 43215124
|
object_name_min3.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
object_name_min3_max1.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
object_name_min4.csv
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
2 |
+
No,white,avg,om4-white_name,0.609,0.752,0.805,0.866,0.581,0.58,0.609
|
3 |
+
,,avg+3D,om4-white_name,0.603,0.744,0.8,0.863,0.573,0.57,0.603
|
4 |
+
,white+3Dx1,avg,om4-3Dwhite-1frame_name,0.584,0.743,0.792,0.858,0.548,0.543,0.584
|
5 |
+
,,avg+3D,om4-3Dwhite-1frame_name,0.579,0.741,0.793,0.854,0.541,0.536,0.579
|
6 |
+
,white+3Dx4,avg,om4-3Dwhite_name,0.563,0.716,0.777,0.843,0.54,0.558,0.563
|
7 |
+
,,avg+3D,om4-3Dwhite_name,0.571,0.715,0.779,0.849,0.547,0.563,0.571
|
8 |
+
Yes,white,avg,om4-white_name_bm-pretrn,0.58,0.739,0.798,0.867,0.547,0.546,0.58
|
9 |
+
,,avg,om4-white_name_bm-pretrn-b,0.588,0.743,0.798,0.857,0.552,0.553,0.588
|
10 |
+
,,avg+3D,om4-white_name_bm-pretrn,0.575,0.732,0.795,0.859,0.541,0.544,0.575
|
11 |
+
,,avg+3D,om4-white_name_bm-pretrn-b,0.59,0.749,0.803,0.863,0.56,0.564,0.59
|
12 |
+
,white+3Dx1,avg,om4-3Dwhite-1frame_name_bm-pretrn,0.587,0.742,0.789,0.857,0.552,0.548,0.587
|
13 |
+
,,avg,om4-3Dwhite-1frame_name_bm-pretrn-b,0.589,0.74,0.801,0.861,0.557,0.558,0.589
|
14 |
+
,,avg+3D,om4-3Dwhite-1frame_name_bm-pretrn,0.605,0.75,0.8,0.866,0.58,0.577,0.605
|
15 |
+
,,avg+3D,om4-3Dwhite-1frame_name_bm-pretrn-b,0.594,0.749,0.801,0.871,0.573,0.585,0.594
|
16 |
+
,white+3Dx4,avg,om4-3Dwhite_name_bm-pretrn,0.558,0.726,0.785,0.845,0.539,0.55,0.558
|
17 |
+
,,avg,om4-3Dwhite_name_bm-pretrn-b,0.566,0.721,0.774,0.85,0.543,0.55,0.566
|
18 |
+
,,avg+3D,om4-3Dwhite_name_bm-pretrn,0.55,0.718,0.781,0.839,0.538,0.572,0.55
|
19 |
+
,,avg+3D,om4-3Dwhite_name_bm-pretrn-b,0.562,0.709,0.765,0.839,0.554,0.597,0.562
|
object_name_min4_max1.csv
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
2 |
+
No,white,avg,om4-white_name,0.575,0.722,0.779,0.845,0.551,0.55,0.575
|
3 |
+
,,avg+3D,om4-white_name,0.568,0.721,0.779,0.841,0.542,0.543,0.568
|
4 |
+
,white+3Dx1,avg,om4-3Dwhite-1frame_name,0.54,0.718,0.766,0.836,0.507,0.497,0.54
|
5 |
+
,,avg+3D,om4-3Dwhite-1frame_name,0.544,0.712,0.771,0.834,0.507,0.496,0.544
|
6 |
+
,white+3Dx4,avg,om4-3Dwhite_name,0.53,0.682,0.749,0.821,0.512,0.533,0.53
|
7 |
+
,,avg+3D,om4-3Dwhite_name,0.535,0.686,0.75,0.828,0.515,0.536,0.535
|
8 |
+
Yes,white,avg,om4-white_name_bm-pretrn,0.549,0.709,0.776,0.84,0.518,0.514,0.549
|
9 |
+
,,avg,om4-white_name_bm-pretrn-b,0.55,0.722,0.769,0.841,0.518,0.516,0.55
|
10 |
+
,,avg+3D,om4-white_name_bm-pretrn,0.544,0.703,0.771,0.842,0.511,0.511,0.544
|
11 |
+
,,avg+3D,om4-white_name_bm-pretrn-b,0.554,0.721,0.778,0.844,0.529,0.534,0.554
|
12 |
+
,white+3Dx1,avg,om4-3Dwhite-1frame_name_bm-pretrn,0.555,0.712,0.761,0.836,0.523,0.513,0.555
|
13 |
+
,,avg,om4-3Dwhite-1frame_name_bm-pretrn-b,0.562,0.705,0.77,0.833,0.53,0.527,0.562
|
14 |
+
,,avg+3D,om4-3Dwhite-1frame_name_bm-pretrn,0.568,0.72,0.772,0.843,0.547,0.546,0.568
|
15 |
+
,,avg+3D,om4-3Dwhite-1frame_name_bm-pretrn-b,0.564,0.717,0.772,0.848,0.543,0.55,0.564
|
16 |
+
,white+3Dx4,avg,om4-3Dwhite_name_bm-pretrn,0.517,0.68,0.752,0.815,0.498,0.507,0.517
|
17 |
+
,,avg,om4-3Dwhite_name_bm-pretrn-b,0.516,0.67,0.733,0.823,0.499,0.514,0.516
|
18 |
+
,,avg+3D,om4-3Dwhite_name_bm-pretrn,0.506,0.677,0.743,0.807,0.494,0.528,0.506
|
19 |
+
,,avg+3D,om4-3Dwhite_name_bm-pretrn-b,0.518,0.66,0.726,0.807,0.508,0.544,0.518
|
object_name_min5.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
object_name_min5_max1.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
object_name_min6.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
object_name_min6_max1.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
|
preprocessor_config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"crop_size": {
|
3 |
+
"height": 289,
|
4 |
+
"width": 289
|
5 |
+
},
|
6 |
+
"do_center_crop": false,
|
7 |
+
"do_normalize": true,
|
8 |
+
"do_rescale": true,
|
9 |
+
"do_resize": true,
|
10 |
+
"image_mean": [
|
11 |
+
0.485,
|
12 |
+
0.456,
|
13 |
+
0.406
|
14 |
+
],
|
15 |
+
"image_processor_type": "EfficientNetImageProcessor",
|
16 |
+
"image_std": [
|
17 |
+
0.47853944,
|
18 |
+
0.4732864,
|
19 |
+
0.47434163
|
20 |
+
],
|
21 |
+
"include_top": true,
|
22 |
+
"resample": 0,
|
23 |
+
"rescale_factor": 0.00392156862745098,
|
24 |
+
"rescale_offset": false,
|
25 |
+
"size": {
|
26 |
+
"height": 300,
|
27 |
+
"width": 300
|
28 |
+
}
|
29 |
+
}
|
results.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc310ec19387e9a791da6356d5d50bd06c606c82ffd0f52ba6fe709f9154ce88
|
3 |
+
size 191453
|
results_local.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb5f163e18e85fccf040281518e2bb04ed3af6d8542d59d802297affdc23a6e4
|
3 |
+
size 191453
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf85df85ac87760d3117c6911e8713dacf4c0df3b705a14f35c5188bbf5c80d0
|
3 |
+
size 4856
|
wandb/debug-cli.james.log
ADDED
File without changes
|
wandb/debug-internal.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
wandb/debug.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20240214_112422-hfwsgqj3/files/config.yaml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20240214_112422-hfwsgqj3/files/output.log
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
|
2 |
+
[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
|
3 |
+
[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
|
4 |
+
[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
|
5 |
+
[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
|
6 |
+
Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:
|
7 |
+
- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([3872, 1536]) in the model instantiated
|
8 |
+
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3872]) in the model instantiated
|
9 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
10 |
+
[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
|
11 |
+
[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
|
12 |
+
[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
|
13 |
+
[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
|
14 |
+
[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
|
15 |
+
Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:
|
16 |
+
- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([3872, 1536]) in the model instantiated
|
17 |
+
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3872]) in the model instantiated
|
18 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
19 |
+
[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
|
20 |
+
[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
|
21 |
+
[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
|
22 |
+
[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
|
23 |
+
[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
|
24 |
+
[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
|
25 |
+
[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
|
26 |
+
[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
|
27 |
+
[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
|
28 |
+
Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:
|
29 |
+
- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([3872, 1536]) in the model instantiated
|
30 |
+
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3872]) in the model instantiated
|
31 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
32 |
+
[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
|
33 |
+
[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
|
wandb/run-20240214_112422-hfwsgqj3/files/requirements.txt
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate==0.27.2
|
2 |
+
aiohttp==3.9.3
|
3 |
+
aiosignal==1.3.1
|
4 |
+
alabaster==0.7.16
|
5 |
+
anyio==4.2.0
|
6 |
+
appdirs==1.4.4
|
7 |
+
argon2-cffi-bindings==21.2.0
|
8 |
+
argon2-cffi==23.1.0
|
9 |
+
arrow==1.3.0
|
10 |
+
artifact-classification==0.0.1
|
11 |
+
asttokens==2.4.1
|
12 |
+
async-lru==2.0.4
|
13 |
+
async-timeout==4.0.3
|
14 |
+
attrs==23.2.0
|
15 |
+
babel==2.14.0
|
16 |
+
beautifulsoup4==4.12.3
|
17 |
+
black==24.1.1
|
18 |
+
bleach==6.1.0
|
19 |
+
certifi==2024.2.2
|
20 |
+
cffi==1.16.0
|
21 |
+
charset-normalizer==3.3.2
|
22 |
+
click==8.1.7
|
23 |
+
comm==0.2.1
|
24 |
+
contourpy==1.2.0
|
25 |
+
coverage==7.4.1
|
26 |
+
cycler==0.12.1
|
27 |
+
datasets==2.17.0
|
28 |
+
debugpy==1.8.0
|
29 |
+
decorator==5.1.1
|
30 |
+
defusedxml==0.7.1
|
31 |
+
dill==0.3.8
|
32 |
+
docker-pycreds==0.4.0
|
33 |
+
docutils==0.20.1
|
34 |
+
easydict==1.11
|
35 |
+
et-xmlfile==1.1.0
|
36 |
+
evaluate==0.4.1
|
37 |
+
exceptiongroup==1.2.0
|
38 |
+
executing==2.0.1
|
39 |
+
fastjsonschema==2.19.1
|
40 |
+
filelock==3.13.1
|
41 |
+
flake8==7.0.0
|
42 |
+
fonttools==4.48.1
|
43 |
+
fqdn==1.5.1
|
44 |
+
frozenlist==1.4.1
|
45 |
+
fsspec==2023.10.0
|
46 |
+
gdown==5.1.0
|
47 |
+
gitdb==4.0.11
|
48 |
+
gitpython==3.1.41
|
49 |
+
h11==0.14.0
|
50 |
+
httpcore==1.0.2
|
51 |
+
httpx==0.26.0
|
52 |
+
huggingface-hub==0.20.3
|
53 |
+
idna==3.6
|
54 |
+
imagesize==1.4.1
|
55 |
+
ipykernel==6.29.2
|
56 |
+
ipython==8.21.0
|
57 |
+
ipywidgets==8.1.2
|
58 |
+
isoduration==20.11.0
|
59 |
+
isort==5.13.2
|
60 |
+
jedi==0.19.1
|
61 |
+
jinja2==3.1.3
|
62 |
+
joblib==1.3.2
|
63 |
+
json5==0.9.14
|
64 |
+
jsonpointer==2.4
|
65 |
+
jsonschema-specifications==2023.12.1
|
66 |
+
jsonschema==4.21.1
|
67 |
+
jupyter-client==8.6.0
|
68 |
+
jupyter-console==6.6.3
|
69 |
+
jupyter-core==5.7.1
|
70 |
+
jupyter-events==0.9.0
|
71 |
+
jupyter-lsp==2.2.2
|
72 |
+
jupyter-server-terminals==0.5.2
|
73 |
+
jupyter-server==2.12.5
|
74 |
+
jupyter==1.0.0
|
75 |
+
jupyterlab-pygments==0.3.0
|
76 |
+
jupyterlab-server==2.25.2
|
77 |
+
jupyterlab-widgets==3.0.10
|
78 |
+
jupyterlab==4.1.1
|
79 |
+
kiwisolver==1.4.5
|
80 |
+
kornia==0.7.1
|
81 |
+
loguru==0.7.2
|
82 |
+
markupsafe==2.1.5
|
83 |
+
matplotlib-inline==0.1.6
|
84 |
+
matplotlib==3.8.2
|
85 |
+
mccabe==0.7.0
|
86 |
+
mistune==3.0.2
|
87 |
+
mpmath==1.3.0
|
88 |
+
multidict==6.0.5
|
89 |
+
multiprocess==0.70.16
|
90 |
+
mypy-extensions==1.0.0
|
91 |
+
nbclient==0.9.0
|
92 |
+
nbconvert==7.16.0
|
93 |
+
nbformat==5.9.2
|
94 |
+
nest-asyncio==1.6.0
|
95 |
+
networkx==3.2.1
|
96 |
+
notebook-shim==0.2.3
|
97 |
+
notebook==7.1.0
|
98 |
+
numpy==1.26.4
|
99 |
+
nvidia-cublas-cu12==12.1.3.1
|
100 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
101 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
102 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
103 |
+
nvidia-cudnn-cu12==8.9.2.26
|
104 |
+
nvidia-cufft-cu12==11.0.2.54
|
105 |
+
nvidia-curand-cu12==10.3.2.106
|
106 |
+
nvidia-cusolver-cu12==11.4.5.107
|
107 |
+
nvidia-cusparse-cu12==12.1.0.106
|
108 |
+
nvidia-nccl-cu12==2.19.3
|
109 |
+
nvidia-nvjitlink-cu12==12.3.101
|
110 |
+
nvidia-nvtx-cu12==12.1.105
|
111 |
+
opencv-python==4.9.0.80
|
112 |
+
openpyxl==3.1.2
|
113 |
+
overrides==7.7.0
|
114 |
+
packaging==23.2
|
115 |
+
pandas==2.2.0
|
116 |
+
pandocfilters==1.5.1
|
117 |
+
parso==0.8.3
|
118 |
+
pathspec==0.12.1
|
119 |
+
pexpect==4.9.0
|
120 |
+
pillow==10.2.0
|
121 |
+
pip==24.0
|
122 |
+
platformdirs==4.2.0
|
123 |
+
prometheus-client==0.19.0
|
124 |
+
prompt-toolkit==3.0.43
|
125 |
+
protobuf==4.25.2
|
126 |
+
psutil==5.9.8
|
127 |
+
ptyprocess==0.7.0
|
128 |
+
pure-eval==0.2.2
|
129 |
+
pyarrow-hotfix==0.6
|
130 |
+
pyarrow==15.0.0
|
131 |
+
pycodestyle==2.11.1
|
132 |
+
pycparser==2.21
|
133 |
+
pyflakes==3.2.0
|
134 |
+
pygments==2.17.2
|
135 |
+
pyparsing==3.1.1
|
136 |
+
pysocks==1.7.1
|
137 |
+
python-dateutil==2.8.2
|
138 |
+
python-dotenv==1.0.1
|
139 |
+
python-json-logger==2.0.7
|
140 |
+
pytz==2024.1
|
141 |
+
pyyaml==6.0.1
|
142 |
+
pyzmq==25.1.2
|
143 |
+
qtconsole==5.5.1
|
144 |
+
qtpy==2.4.1
|
145 |
+
referencing==0.33.0
|
146 |
+
regex==2023.12.25
|
147 |
+
requests==2.31.0
|
148 |
+
responses==0.18.0
|
149 |
+
rfc3339-validator==0.1.4
|
150 |
+
rfc3986-validator==0.1.1
|
151 |
+
rpds-py==0.17.1
|
152 |
+
safetensors==0.4.2
|
153 |
+
scikit-learn==1.4.0
|
154 |
+
scipy==1.12.0
|
155 |
+
send2trash==1.8.2
|
156 |
+
sentry-sdk==1.40.4
|
157 |
+
setproctitle==1.3.3
|
158 |
+
setuptools==69.1.0
|
159 |
+
six==1.16.0
|
160 |
+
smmap==5.0.1
|
161 |
+
sniffio==1.3.0
|
162 |
+
snowballstemmer==2.2.0
|
163 |
+
soupsieve==2.5
|
164 |
+
sphinx==7.2.6
|
165 |
+
sphinxcontrib-applehelp==1.0.8
|
166 |
+
sphinxcontrib-devhelp==1.0.6
|
167 |
+
sphinxcontrib-htmlhelp==2.0.5
|
168 |
+
sphinxcontrib-jsmath==1.0.1
|
169 |
+
sphinxcontrib-qthelp==1.0.7
|
170 |
+
sphinxcontrib-serializinghtml==1.1.10
|
171 |
+
stack-data==0.6.3
|
172 |
+
sympy==1.12
|
173 |
+
terminado==0.18.0
|
174 |
+
threadpoolctl==3.2.0
|
175 |
+
timm==0.9.12
|
176 |
+
tinycss2==1.2.1
|
177 |
+
tokenizers==0.15.2
|
178 |
+
tomli==2.0.1
|
179 |
+
torch==2.2.0
|
180 |
+
torchvision==0.17.0
|
181 |
+
tornado==6.4
|
182 |
+
tqdm==4.66.1
|
183 |
+
traitlets==5.14.1
|
184 |
+
transformers==4.37.2
|
185 |
+
transparent-background==1.2.12
|
186 |
+
triton==2.2.0
|
187 |
+
typer==0.9.0
|
188 |
+
types-python-dateutil==2.8.19.20240106
|
189 |
+
typing-extensions==4.9.0
|
190 |
+
tzdata==2023.4
|
191 |
+
uri-template==1.3.0
|
192 |
+
urllib3==2.2.0
|
193 |
+
wandb==0.16.3
|
194 |
+
wcwidth==0.2.13
|
195 |
+
webcolors==1.13
|
196 |
+
webencodings==0.5.1
|
197 |
+
websocket-client==1.7.0
|
198 |
+
wget==3.2
|
199 |
+
wheel==0.42.0
|
200 |
+
widgetsnbextension==4.0.10
|
201 |
+
xxhash==3.4.1
|
202 |
+
yarl==1.9.4
|