james-burton commited on Mar 21, 2024

Commit

3752cdf

verified ·

1 Parent(s): abf2826

End of training

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
.gitkeep +0 -0
0.1-testing.ipynb +0 -0
0.10-rethinking_OM_splits.ipynb +770 -0
0.11-testing_bm_split_sizes.ipynb +644 -0
0.12-get_wandb_results.ipynb +0 -0
0.13-bm_dates_col.ipynb +0 -0
0.2-testing_image_scraping.ipynb +140 -0
0.3-testing_csv_join.ipynb +973 -0
0.4-testing_tif_images.ipynb +71 -0
0.5-testing_transparent_background.ipynb +321 -0
0.7Mahnaz-efficientnet.ipynb +492 -0
0.8-testing_segmented_data.ipynb +0 -0
0.9-testing_om_datasets.ipynb +459 -0
1.0-checking_dataset_size.ipynb +559 -0
1.1-exploring_OM_image_matching.ipynb +0 -0
2.0-assessing_OM_dataset.ipynb +1468 -0
3.0-efficientnet_example.ipynb +1062 -0
4.0-assessing_BM_dataset.ipynb +0 -0
5.0-assessing_date_prediction.ipynb +0 -0
README.md +47 -0
config.json +88 -0
material_min3.csv +13 -0
material_min3_max1.csv +13 -0
material_min4.csv +13 -0
material_min4_max1.csv +13 -0
material_min5.csv +13 -0
material_min5_max1.csv +13 -0
material_min6.csv +13 -0
material_min6_max1.csv +13 -0
material_x_plus3Ds.csv +13 -0
model.safetensors +3 -0
object_name_min3.csv +1 -0
object_name_min3_max1.csv +1 -0
object_name_min4.csv +19 -0
object_name_min4_max1.csv +19 -0
object_name_min5.csv +1 -0
object_name_min5_max1.csv +1 -0
object_name_min6.csv +1 -0
object_name_min6_max1.csv +1 -0
preprocessor_config.json +29 -0
results.pkl +3 -0
results_local.pkl +3 -0
training_args.bin +3 -0
wandb/debug-cli.james.log +0 -0
wandb/debug-internal.log +0 -0
wandb/debug.log +0 -0
wandb/run-20240214_112422-hfwsgqj3/files/config.yaml +0 -0
wandb/run-20240214_112422-hfwsgqj3/files/output.log +33 -0
wandb/run-20240214_112422-hfwsgqj3/files/requirements.txt +202 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+wandb/run-20240214_112422-hfwsgqj3/run-hfwsgqj3.wandb filter=lfs diff=lfs merge=lfs -text

.gitkeep ADDED Viewed

File without changes

0.1-testing.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

0.10-rethinking_OM_splits.ipynb ADDED Viewed

	@@ -0,0 +1,770 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import os\n",
+    "from sklearn.model_selection import train_test_split"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n",
+    "obj2info = pd.read_csv(\"../data/processed/OM_obj_to_info.csv\")\n",
+    "\n",
+    "\n",
+    "# Could eventually do something with these columns, but need cleaning first\n",
+    "obj2info.drop(\n",
+    "    columns=[\"number_of_parts\", \"production.date.start\", \"production.date.end\", \"obj_num_old\"],\n",
+    "    inplace=True,\n",
+    ")\n",
+    "\n",
+    "file2obj[\"image\"] = file2obj.apply(lambda x: os.path.join(x[\"root\"], x[\"file\"]), axis=1)\n",
+    "# file2obj.rename(columns={\"obj_num\": \"label\"}, inplace=True)\n",
+    "\n",
+    "join_df = file2obj[[\"obj_num\", \"file\", \"image\", \"root\"]].merge(\n",
+    "    obj2info, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0        data/raw/images/fulling_mill/1985\n",
+       "1        data/raw/images/fulling_mill/1985\n",
+       "2        data/raw/images/fulling_mill/1985\n",
+       "3        data/raw/images/fulling_mill/1985\n",
+       "4        data/raw/images/fulling_mill/1985\n",
+       "                       ...                \n",
+       "37300        data/raw/images/egyptian/2014\n",
+       "37301        data/raw/images/egyptian/2014\n",
+       "37302        data/raw/images/egyptian/2014\n",
+       "37303        data/raw/images/egyptian/1963\n",
+       "37304        data/raw/images/egyptian/1963\n",
+       "Name: root, Length: 37305, dtype: object"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "file2obj[\"root\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>obj_num</th>\n",
+       "      <th>description</th>\n",
+       "      <th>object_name</th>\n",
+       "      <th>other_name</th>\n",
+       "      <th>material</th>\n",
+       "      <th>production.period</th>\n",
+       "      <th>production.place</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>eg3</td>\n",
+       "      <td>squat shouldered jar, no rim</td>\n",
+       "      <td>bowls</td>\n",
+       "      <td>bowl</td>\n",
+       "      <td>limestone</td>\n",
+       "      <td>1st Dynasty</td>\n",
+       "      <td>Egypt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>eg64</td>\n",
+       "      <td>axe-head</td>\n",
+       "      <td>axes: woodworking tools</td>\n",
+       "      <td>axe-head</td>\n",
+       "      <td>granite</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Egypt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>eg71</td>\n",
+       "      <td>the working end of a fish tail knife with pres...</td>\n",
+       "      <td>knives</td>\n",
+       "      <td>knife</td>\n",
+       "      <td>Flint/Chert</td>\n",
+       "      <td>Naqada II</td>\n",
+       "      <td>Egypt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>eg75</td>\n",
+       "      <td>seated figure of priest holding unrolled papyr...</td>\n",
+       "      <td>Human Figurine</td>\n",
+       "      <td>imhotep figurine</td>\n",
+       "      <td>bronze</td>\n",
+       "      <td>Late Period</td>\n",
+       "      <td>Egypt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>durom.1971.78</td>\n",
+       "      <td>seated woman, inset eyes (lost), headdress had...</td>\n",
+       "      <td>Human Figurine</td>\n",
+       "      <td>Hathor figurine</td>\n",
+       "      <td>bronze</td>\n",
+       "      <td>Late Period</td>\n",
+       "      <td>Egypt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12349</th>\n",
+       "      <td>durma.2020.3.2562</td>\n",
+       "      <td>A silver Roman coin which is a part of the Pie...</td>\n",
+       "      <td>coins</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>metal</td>\n",
+       "      <td>Roman</td>\n",
+       "      <td>Rome</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12350</th>\n",
+       "      <td>durma.2020.3.2060</td>\n",
+       "      <td>A silver Roman coin which is a part of the Pie...</td>\n",
+       "      <td>coins</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>metal</td>\n",
+       "      <td>Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12351</th>\n",
+       "      <td>durma.2020.3.1446</td>\n",
+       "      <td>A silver Roman coin which is a part of the Pie...</td>\n",
+       "      <td>coins</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>metal</td>\n",
+       "      <td>Roman</td>\n",
+       "      <td>Rome</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12352</th>\n",
+       "      <td>durma.2020.3.2042</td>\n",
+       "      <td>A silver Roman coin which is a part of the Pie...</td>\n",
+       "      <td>coins</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>metal</td>\n",
+       "      <td>Roman</td>\n",
+       "      <td>Rome</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12353</th>\n",
+       "      <td>durma.2020.3.2072</td>\n",
+       "      <td>A silver Roman coin which is a part of the Pie...</td>\n",
+       "      <td>coins</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>metal</td>\n",
+       "      <td>Roman</td>\n",
+       "      <td>Rome</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>11673 rows × 7 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 obj_num                                        description  \\\n",
+       "0                    eg3                       squat shouldered jar, no rim   \n",
+       "1                   eg64                                           axe-head   \n",
+       "2                   eg71  the working end of a fish tail knife with pres...   \n",
+       "3                   eg75  seated figure of priest holding unrolled papyr...   \n",
+       "4          durom.1971.78  seated woman, inset eyes (lost), headdress had...   \n",
+       "...                  ...                                                ...   \n",
+       "12349  durma.2020.3.2562  A silver Roman coin which is a part of the Pie...   \n",
+       "12350  durma.2020.3.2060  A silver Roman coin which is a part of the Pie...   \n",
+       "12351  durma.2020.3.1446  A silver Roman coin which is a part of the Pie...   \n",
+       "12352  durma.2020.3.2042  A silver Roman coin which is a part of the Pie...   \n",
+       "12353  durma.2020.3.2072  A silver Roman coin which is a part of the Pie...   \n",
+       "\n",
+       "                   object_name        other_name     material  \\\n",
+       "0                        bowls              bowl    limestone   \n",
+       "1      axes: woodworking tools          axe-head      granite   \n",
+       "2                       knives             knife  Flint/Chert   \n",
+       "3               Human Figurine  imhotep figurine       bronze   \n",
+       "4               Human Figurine   Hathor figurine       bronze   \n",
+       "...                        ...               ...          ...   \n",
+       "12349                    coins               NaN        metal   \n",
+       "12350                    coins               NaN        metal   \n",
+       "12351                    coins               NaN        metal   \n",
+       "12352                    coins               NaN        metal   \n",
+       "12353                    coins               NaN        metal   \n",
+       "\n",
+       "      production.period production.place  \n",
+       "0           1st Dynasty            Egypt  \n",
+       "1                   NaN            Egypt  \n",
+       "2             Naqada II            Egypt  \n",
+       "3           Late Period            Egypt  \n",
+       "4           Late Period            Egypt  \n",
+       "...                 ...              ...  \n",
+       "12349             Roman             Rome  \n",
+       "12350             Roman              NaN  \n",
+       "12351             Roman             Rome  \n",
+       "12352             Roman             Rome  \n",
+       "12353             Roman             Rome  \n",
+       "\n",
+       "[11673 rows x 7 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "obj2info.dropna(subset=[\"material\", \"description\"], inplace=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "label_col = \"material\"\n",
+    "\n",
+    "o2i_lim = obj2info.dropna(subset=[label_col, \"description\"], inplace=False)\n",
+    "\n",
+    "num_counts = o2i_lim[label_col].value_counts()\n",
+    "for lower_lim in [3]:\n",
+    "    o2i_lim = o2i_lim[o2i_lim[label_col].isin(num_counts[num_counts > lower_lim].index)]\n",
+    "train, val_test = train_test_split(\n",
+    "    o2i_lim, stratify=o2i_lim[label_col], test_size=0.4, random_state=42\n",
+    ")\n",
+    "val, test = train_test_split(\n",
+    "    val_test, stratify=val_test[label_col], test_size=0.8, random_state=42\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import Dataset, DatasetDict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds = Dataset.from_pandas(join_df).to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>obj_num</th>\n",
+       "      <th>file</th>\n",
+       "      <th>image</th>\n",
+       "      <th>root</th>\n",
+       "      <th>description</th>\n",
+       "      <th>object_name</th>\n",
+       "      <th>other_name</th>\n",
+       "      <th>material</th>\n",
+       "      <th>production.period</th>\n",
+       "      <th>production.place</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>durma.1985.15.68</td>\n",
+       "      <td>1985.15.68.jpg</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985/1985.15.68.jpg</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985</td>\n",
+       "      <td>2 fragments of a bowl with open fret work at t...</td>\n",
+       "      <td>None</td>\n",
+       "      <td>Rim Sherds</td>\n",
+       "      <td>pottery</td>\n",
+       "      <td>Post-Medieval</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>durma.1985.52.37</td>\n",
+       "      <td>1985.52.37.ff2.jpg</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985/1985.52.37.f...</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985</td>\n",
+       "      <td>Reconstructed small vessel (many pieces with s...</td>\n",
+       "      <td>pottery</td>\n",
+       "      <td>Pottery</td>\n",
+       "      <td>pottery</td>\n",
+       "      <td>Roman</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>durma.1985.81.4496</td>\n",
+       "      <td>1985.81.4496 d2.jpg</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985/1985.81.4496...</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985</td>\n",
+       "      <td>Fragment of a Samian beaker. Panell decoration...</td>\n",
+       "      <td>vessels</td>\n",
+       "      <td>pottery</td>\n",
+       "      <td>pottery</td>\n",
+       "      <td>Roman</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>durma.1985.9.1</td>\n",
+       "      <td>1985.9.1.1-d4.jpg</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985/1985.9.1.1-d...</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985</td>\n",
+       "      <td>2 Fragmentary Saxon Cinerary Urns + 1 relative...</td>\n",
+       "      <td>None</td>\n",
+       "      <td>Cinerary Urns</td>\n",
+       "      <td>pottery</td>\n",
+       "      <td>Saxon</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>durma.1985.52.37</td>\n",
+       "      <td>1985.52.37.sf2.jpg</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985/1985.52.37.s...</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985</td>\n",
+       "      <td>Reconstructed small vessel (many pieces with s...</td>\n",
+       "      <td>pottery</td>\n",
+       "      <td>Pottery</td>\n",
+       "      <td>pottery</td>\n",
+       "      <td>Roman</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37300</th>\n",
+       "      <td>durom.2014.1.2</td>\n",
+       "      <td>2014.1.2 bb.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/2014/2014.1.2 bb.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/2014</td>\n",
+       "      <td>One of a collection of 162 flint tools. Brown,...</td>\n",
+       "      <td>blades</td>\n",
+       "      <td>None</td>\n",
+       "      <td>Flint/Chert</td>\n",
+       "      <td>Neolithic Period</td>\n",
+       "      <td>Egypt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37301</th>\n",
+       "      <td>durom.2014.1.71</td>\n",
+       "      <td>2014.1.71 ll.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/2014/2014.1.71 ll.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/2014</td>\n",
+       "      <td>One of a collection of 162 flint tools. Large,...</td>\n",
+       "      <td>axes: woodworking tools</td>\n",
+       "      <td>None</td>\n",
+       "      <td>Flint/Chert</td>\n",
+       "      <td>Neolithic Period</td>\n",
+       "      <td>Egypt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37302</th>\n",
+       "      <td>durom.2014.1.2</td>\n",
+       "      <td>2014.1.2 rr.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/2014/2014.1.2 rr.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/2014</td>\n",
+       "      <td>One of a collection of 162 flint tools. Brown,...</td>\n",
+       "      <td>blades</td>\n",
+       "      <td>None</td>\n",
+       "      <td>Flint/Chert</td>\n",
+       "      <td>Neolithic Period</td>\n",
+       "      <td>Egypt</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37303</th>\n",
+       "      <td>durom.1963.4</td>\n",
+       "      <td>1963.4.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/1963/1963.4.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/1963</td>\n",
+       "      <td>The woman is dressed in Qing dynasty style and...</td>\n",
+       "      <td>figures</td>\n",
+       "      <td>牙雕母婴像</td>\n",
+       "      <td>ivory</td>\n",
+       "      <td>late Qing dynasty</td>\n",
+       "      <td>China</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37304</th>\n",
+       "      <td>durom.1963.4</td>\n",
+       "      <td>1963.4.2.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/1963/1963.4.2.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/1963</td>\n",
+       "      <td>The woman is dressed in Qing dynasty style and...</td>\n",
+       "      <td>figures</td>\n",
+       "      <td>牙雕母婴像</td>\n",
+       "      <td>ivory</td>\n",
+       "      <td>late Qing dynasty</td>\n",
+       "      <td>China</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>37305 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  obj_num                 file  \\\n",
+       "0        durma.1985.15.68       1985.15.68.jpg   \n",
+       "1        durma.1985.52.37   1985.52.37.ff2.jpg   \n",
+       "2      durma.1985.81.4496  1985.81.4496 d2.jpg   \n",
+       "3          durma.1985.9.1    1985.9.1.1-d4.jpg   \n",
+       "4        durma.1985.52.37   1985.52.37.sf2.jpg   \n",
+       "...                   ...                  ...   \n",
+       "37300      durom.2014.1.2      2014.1.2 bb.jpg   \n",
+       "37301     durom.2014.1.71     2014.1.71 ll.jpg   \n",
+       "37302      durom.2014.1.2      2014.1.2 rr.jpg   \n",
+       "37303        durom.1963.4           1963.4.jpg   \n",
+       "37304        durom.1963.4         1963.4.2.jpg   \n",
+       "\n",
+       "                                                   image  \\\n",
+       "0       data/raw/images/fulling_mill/1985/1985.15.68.jpg   \n",
+       "1      data/raw/images/fulling_mill/1985/1985.52.37.f...   \n",
+       "2      data/raw/images/fulling_mill/1985/1985.81.4496...   \n",
+       "3      data/raw/images/fulling_mill/1985/1985.9.1.1-d...   \n",
+       "4      data/raw/images/fulling_mill/1985/1985.52.37.s...   \n",
+       "...                                                  ...   \n",
+       "37300      data/raw/images/egyptian/2014/2014.1.2 bb.jpg   \n",
+       "37301     data/raw/images/egyptian/2014/2014.1.71 ll.jpg   \n",
+       "37302      data/raw/images/egyptian/2014/2014.1.2 rr.jpg   \n",
+       "37303           data/raw/images/egyptian/1963/1963.4.jpg   \n",
+       "37304         data/raw/images/egyptian/1963/1963.4.2.jpg   \n",
+       "\n",
+       "                                    root  \\\n",
+       "0      data/raw/images/fulling_mill/1985   \n",
+       "1      data/raw/images/fulling_mill/1985   \n",
+       "2      data/raw/images/fulling_mill/1985   \n",
+       "3      data/raw/images/fulling_mill/1985   \n",
+       "4      data/raw/images/fulling_mill/1985   \n",
+       "...                                  ...   \n",
+       "37300      data/raw/images/egyptian/2014   \n",
+       "37301      data/raw/images/egyptian/2014   \n",
+       "37302      data/raw/images/egyptian/2014   \n",
+       "37303      data/raw/images/egyptian/1963   \n",
+       "37304      data/raw/images/egyptian/1963   \n",
+       "\n",
+       "                                             description  \\\n",
+       "0      2 fragments of a bowl with open fret work at t...   \n",
+       "1      Reconstructed small vessel (many pieces with s...   \n",
+       "2      Fragment of a Samian beaker. Panell decoration...   \n",
+       "3      2 Fragmentary Saxon Cinerary Urns + 1 relative...   \n",
+       "4      Reconstructed small vessel (many pieces with s...   \n",
+       "...                                                  ...   \n",
+       "37300  One of a collection of 162 flint tools. Brown,...   \n",
+       "37301  One of a collection of 162 flint tools. Large,...   \n",
+       "37302  One of a collection of 162 flint tools. Brown,...   \n",
+       "37303  The woman is dressed in Qing dynasty style and...   \n",
+       "37304  The woman is dressed in Qing dynasty style and...   \n",
+       "\n",
+       "                   object_name     other_name     material  production.period  \\\n",
+       "0                         None     Rim Sherds      pottery      Post-Medieval   \n",
+       "1                      pottery        Pottery      pottery              Roman   \n",
+       "2                      vessels        pottery      pottery              Roman   \n",
+       "3                         None  Cinerary Urns      pottery              Saxon   \n",
+       "4                      pottery        Pottery      pottery              Roman   \n",
+       "...                        ...            ...          ...                ...   \n",
+       "37300                   blades           None  Flint/Chert   Neolithic Period   \n",
+       "37301  axes: woodworking tools           None  Flint/Chert   Neolithic Period   \n",
+       "37302                   blades           None  Flint/Chert   Neolithic Period   \n",
+       "37303                  figures          牙雕母婴像        ivory  late Qing dynasty   \n",
+       "37304                  figures          牙雕母婴像        ivory  late Qing dynasty   \n",
+       "\n",
+       "      production.place  \n",
+       "0                 None  \n",
+       "1                 None  \n",
+       "2                 None  \n",
+       "3                 None  \n",
+       "4                 None  \n",
+       "...                ...  \n",
+       "37300            Egypt  \n",
+       "37301            Egypt  \n",
+       "37302            Egypt  \n",
+       "37303            China  \n",
+       "37304            China  \n",
+       "\n",
+       "[37305 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(6819, 7) (2370, 7) (2370, 7) (11559, 7)\n",
+      "(19246, 10) (6743, 10) (7078, 10) (37305, 10)\n"
+     ]
+    }
+   ],
+   "source": [
+    "index_col = \"obj_num\"\n",
+    "text_col = \"obj_num\"\n",
+    "label_col = \"material\"\n",
+    "lower_lim = 3\n",
+    "problem_type = \"image\"\n",
+    "\n",
+    "\n",
+    "o2i_lim = (\n",
+    "    ds.drop_duplicates(subset=[index_col, label_col], inplace=False)\n",
+    "    .dropna(subset=[text_col, label_col], inplace=False)\n",
+    "    .drop(columns=[\"root\", \"file\", \"image\"], inplace=False)\n",
+    ")\n",
+    "\n",
+    "\n",
+    "num_counts = o2i_lim[label_col].value_counts()\n",
+    "o2i_lim = o2i_lim[o2i_lim[label_col].isin(num_counts[num_counts > lower_lim].index)]\n",
+    "\n",
+    "train, val_test = train_test_split(\n",
+    "    o2i_lim, stratify=o2i_lim[label_col], test_size=0.41, random_state=42\n",
+    ")\n",
+    "val, test = train_test_split(\n",
+    "    val_test, stratify=val_test[label_col], test_size=0.5, random_state=42\n",
+    ")\n",
+    "print(train.shape, val.shape, test.shape, o2i_lim.shape)\n",
+    "\n",
+    "if problem_type == \"image\":\n",
+    "    train = train.merge(\n",
+    "        ds[[\"obj_num\", \"root\", \"file\", \"image\"]], left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n",
+    "    )\n",
+    "    val = val.merge(\n",
+    "        ds[[\"obj_num\", \"root\", \"file\", \"image\"]], left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n",
+    "    )\n",
+    "    test = test.merge(\n",
+    "        ds[[\"obj_num\", \"root\", \"file\", \"image\"]], left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n",
+    "    )\n",
+    "    print(train.shape, val.shape, test.shape, ds.shape)\n",
+    "\n",
+    "# ds_dict = DatasetDict({\"train\": Dataset.from_pandas(train), \"val\": Dataset.from_pandas(val), \"test\": Dataset.from_pandas(test)})\n",
+    "# ds_dict\n",
+    "\n",
+    "# if problem_type == \"image\":\n",
+    "\n",
+    "# o2i_lim_ds = o2i_lim_ds.train_test_split(test_size=0.3, stratify_by_column=label_col, seed=42)\n",
+    "# o2i_lim_ds_valtest = o2i_lim_ds[\"test\"].train_test_split(test_size=0.5, stratify_by_column=label_col, seed=42)\n",
+    "# o2i_lim_ds = DatasetDict({\"train\": o2i_lim_ds[\"train\"], \"val\": o2i_lim_ds_valtest[\"train\"], \"test\": o2i_lim_ds_valtest[\"test\"]})\n",
+    "\n",
+    "# if problem_type == \"image\":\n",
+    "#     file2obj = ds[[\"obj_num\", \"file\", \"image\", \"root\"]].drop_duplicates(subset=[\"obj_num\"], inplace=False)\n",
+    "#     train = o2i_lim_ds[\"train\"].merge(file2obj, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\")\n",
+    "#     val = o2i_lim_ds[\"val\"].merge(file2obj, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\")\n",
+    "#     test = o2i_lim_ds[\"test\"].merge(file2obj, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\")\n",
+    "#     o2i_lim_ds = DatasetDict({\"train\": train, \"val\": val, \"test\": test})\n",
+    "# o2i_lim_ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>obj_num</th>\n",
+       "      <th>description</th>\n",
+       "      <th>object_name</th>\n",
+       "      <th>other_name</th>\n",
+       "      <th>material</th>\n",
+       "      <th>production.period</th>\n",
+       "      <th>production.place</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [obj_num, description, object_name, other_name, material, production.period, production.place]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "o2i_lim"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cols_to_drop = [\"col1\", \"col2\", \"col3\"]\n",
+    "ds = ds.drop(cols_to_drop, axis=1, errors=\"ignore\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds_lim = ds_all.dropna(subset=[\"image\", args.label_col], inplace=False)\n",
+    "if \"3D\" in args.dataset:\n",
+    "    ds_lim = ds_all[ds_all[\"original\"]]\n",
+    "\n",
+    "num_counts = ds_lim[args.label_col].value_counts()\n",
+    "ds_lim = ds_lim[ds_lim[args.label_col].isin(num_counts[num_counts > args.lower_lim].index)]\n",
+    "\n",
+    "train, val_test = train_test_split(\n",
+    "    ds_lim,\n",
+    "    stratify=ds_lim[args.label_col],\n",
+    "    test_size=2 * args.testset_size,\n",
+    "    random_state=42,\n",
+    ")\n",
+    "val, test = train_test_split(\n",
+    "    val_test, stratify=val_test[args.label_col], test_size=0.5, random_state=42\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ArtifactClassification",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

0.11-testing_bm_split_sizes.ipynb ADDED Viewed

	@@ -0,0 +1,644 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import os\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from datasets import load_dataset\n",
+    "from artifact_classification.utils import ConfigLoader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Updating with:\n",
+      "{'config': 'testing', 'fast_dev_run': True, 'dataset': 'james-burton/BritishMuseum', 'wandb_proj_name': 'British Museum', 'model_base': 'google/efficientnet-b3', 'problem_type': 'image', 'lower_lim': 5, 'label_col': 'Object type'}\n",
+      "\n",
+      "\n",
+      "{'config': 'testing', 'fast_dev_run': True, 'do_train': True, 'do_predict': True, 'batch_size': 128, 'output_root': 'models/', 'num_epochs': 100, 'early_stopping_patience': 5, 'grad_accumulation_steps': 1, 'seed': 42, 'logging_steps': 10, 'lr_scheduler': 'linear', 'warmup_ratio': 0, 'weight_decay': 0, 'device': 'cuda', 'num_workers': 1, 'resume_from_checkpoint': False, 'predict_batch_size': 16, 'save_total_limit': 1, 'lr': 5e-05, 'pytorch2_0': True, 'max_length': 512, 'text_column': 'Description', 'fp16': True, 'testset_size': 0.1, 'dataset': 'james-burton/BritishMuseum', 'wandb_proj_name': 'British Museum', 'model_base': 'google/efficientnet-b3', 'problem_type': 'image', 'lower_lim': 5, 'label_col': 'Object type'}\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cca9a5e0c5f2487ea3ad65c183da9c90",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Resolving data files:   0%|          | 0/22 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "431f0e5b4ec84c8693b5c8c18525f810",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Resolving data files:   0%|          | 0/22 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "afda95f374c1487584af43d91ba321df",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading dataset shards:   0%|          | 0/21 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "config = \"testing\"\n",
+    "args = ConfigLoader(config, \"../configs/train_bm_configs.yaml\", \"../configs/train_bm_default.yaml\")\n",
+    "\n",
+    "############################## Load dataset ##############################\n",
+    "# Load dataset, filter out na inputs and labels and encode labels (as label column can change)\n",
+    "\n",
+    "\n",
+    "label_cols = [\"Object type\", \"Culture\", \"Materials\", \"Production place\"]\n",
+    "split_sizes = [0.1, 0.12, 0.13, 0.15, 0.2]\n",
+    "\n",
+    "ds_lim = load_dataset(args.dataset)[\"train\"].to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Split size 0.1 failed\n",
+      "Split size 0.12 failed\n",
+      "Label col Object type Split size 0.13 passed\n",
+      "Label col Object type Split size 0.15 passed\n",
+      "Label col Object type Split size 0.2 passed\n",
+      "Split size 0.1 failed\n",
+      "Split size 0.12 failed\n",
+      "Label col Culture Split size 0.13 passed\n",
+      "Label col Culture Split size 0.15 passed\n",
+      "Label col Culture Split size 0.2 passed\n",
+      "Split size 0.1 failed\n",
+      "Split size 0.12 failed\n",
+      "Label col Materials Split size 0.13 passed\n",
+      "Label col Materials Split size 0.15 passed\n",
+      "Label col Materials Split size 0.2 passed\n",
+      "Split size 0.1 failed\n",
+      "Split size 0.12 failed\n",
+      "Label col Production place Split size 0.13 passed\n",
+      "Label col Production place Split size 0.15 passed\n",
+      "Label col Production place Split size 0.2 passed\n"
+     ]
+    }
+   ],
+   "source": [
+    "def test_split_size(split_size, label_col, ds_lim):\n",
+    "    try:\n",
+    "        ds_lim.dropna(subset=[label_col])\n",
+    "        num_counts = ds_lim[label_col].value_counts()\n",
+    "        ds_lim = ds_lim[ds_lim[label_col].isin(num_counts[num_counts > args.lower_lim].index)]\n",
+    "\n",
+    "        train, val_test = train_test_split(\n",
+    "            ds_lim,\n",
+    "            stratify=ds_lim[label_col],\n",
+    "            test_size=2 * split_size,\n",
+    "            random_state=42,\n",
+    "        )\n",
+    "        val, test = train_test_split(\n",
+    "            val_test, stratify=val_test[label_col], test_size=0.5, random_state=42\n",
+    "        )\n",
+    "        print(f\"Label col {label_col} Split size {split_size} passed\")\n",
+    "    except ValueError:\n",
+    "        print(f\"Split size {split_size} failed\")\n",
+    "\n",
+    "\n",
+    "for label_col in label_cols:\n",
+    "    for split_size in split_sizes:\n",
+    "        test_split_size(split_size, label_col, ds_lim)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import yaml\n",
+    "\n",
+    "with open(\"../configs/train_configs.yaml\", \"r\") as file:\n",
+    "    configs = list(yaml.safe_load_all(file))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'om3-white_material_bm-pretrn om3-white_name_bm-pretrn om3-3Dwhite_material_bm-pretrn om3-3Dwhite_name_bm-pretrn om3-3Dwhite-1frame_material_bm-pretrn om3-3Dwhite-1frame_name_bm-pretrn om4-white_material_bm-pretrn om4-white_name_bm-pretrn om4-3Dwhite_material_bm-pretrn om4-3Dwhite_name_bm-pretrn om4-3Dwhite-1frame_material_bm-pretrn om4-3Dwhite-1frame_name_bm-pretrn om5-white_material_bm-pretrn om5-white_name_bm-pretrn om5-3Dwhite_material_bm-pretrn om5-3Dwhite_name_bm-pretrn om5-3Dwhite-1frame_material_bm-pretrn om5-3Dwhite-1frame_name_bm-pretrn om6-white_material_bm-pretrn om6-white_name_bm-pretrn om6-3Dwhite_material_bm-pretrn om6-3Dwhite_name_bm-pretrn om6-3Dwhite-1frame_material_bm-pretrn om6-3Dwhite-1frame_name_bm-pretrn'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\" \".join(\n",
+    "    [cfg[\"config\"] for cfg in configs if \"bm\" in cfg[\"config\"] and \"num\" not in cfg[\"config\"]]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'config': 'testing',\n",
+       "  'fast_dev_run': True,\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm3_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Object type',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Materials',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Culture',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3txt_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'microsoft/deberta-v3-base',\n",
+       "  'problem_type': 'text',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Materials',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3txt_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'microsoft/deberta-v3-base',\n",
+       "  'problem_type': 'text',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Culture',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3-white_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum-white',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Object type',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3-white_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum-white',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Materials',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3-white_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum-white',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Culture',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3-3Dwhite_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Object type',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3-3Dwhite_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Materials',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3-3Dwhite_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Culture',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3-3Dwhite-1frame_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Object type',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3-3Dwhite-1frame_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Materials',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm3-3Dwhite-1frame_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 3,\n",
+       "  'label_col': 'Culture',\n",
+       "  'testset_size': 0.205},\n",
+       " {'config': 'bm4_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm4_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm4_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm4txt_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'microsoft/deberta-v3-base',\n",
+       "  'problem_type': 'text',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm4txt_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'microsoft/deberta-v3-base',\n",
+       "  'problem_type': 'text',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm4-white_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum-white',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm4-white_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum-white',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm4-white_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum-white',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm4-3Dwhite_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm4-3Dwhite_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm4-3Dwhite_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm4-3Dwhite-1frame_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm4-3Dwhite-1frame_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm4-3Dwhite-1frame_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 4,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm5_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm5_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm5_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm5txt_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'microsoft/deberta-v3-base',\n",
+       "  'problem_type': 'text',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm5txt_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'microsoft/deberta-v3-base',\n",
+       "  'problem_type': 'text',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm5-white_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum-white',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm5-white_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum-white',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm5-white_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum-white',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm5-3Dwhite_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm5-3Dwhite_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm5-3Dwhite_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm5-3Dwhite-1frame_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm5-3Dwhite-1frame_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm5-3Dwhite-1frame_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 5,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm6_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm6_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm6_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm6txt_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'microsoft/deberta-v3-base',\n",
+       "  'problem_type': 'text',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm6txt_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'microsoft/deberta-v3-base',\n",
+       "  'problem_type': 'text',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm6-white_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum-white',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm6-white_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum-white',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm6-white_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum-white',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm6-3Dwhite_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm6-3Dwhite_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm6-3Dwhite_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Culture'},\n",
+       " {'config': 'bm6-3Dwhite-1frame_type',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Object type'},\n",
+       " {'config': 'bm6-3Dwhite-1frame_material',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Materials'},\n",
+       " {'config': 'bm6-3Dwhite-1frame_culture',\n",
+       "  'dataset': 'james-burton/BritishMuseum-3Dwhite-1frame',\n",
+       "  'wandb_proj_name': 'British Museum',\n",
+       "  'model_base': 'google/efficientnet-b3',\n",
+       "  'problem_type': 'image',\n",
+       "  'lower_lim': 6,\n",
+       "  'label_col': 'Culture'}]"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "configs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ArtifactClassification",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

0.12-get_wandb_results.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

0.13-bm_dates_col.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

0.2-testing_image_scraping.ipynb ADDED Viewed

	@@ -0,0 +1,140 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1894,1101.507\n",
+      "https://media.britishmuseum.org/media/Repository/Documents/2014_10/6_14/c5015a41_782e_4eb7_badf_a3bc00f54f2c/preview_00426109_001.jpg\n",
+      "Image downloaded successfully!\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/james/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'media.britishmuseum.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1903,1215.10\n",
+      "https://media.britishmuseum.org/media/Repository/Documents/2014_10/15_13/532668b9_0af1_4402_8e13_a3c500e1907c/preview_00944260_001.jpg\n",
+      "Image downloaded successfully!\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/james/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'media.britishmuseum.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[9], line 27\u001b[0m\n\u001b[1;32m     25\u001b[0m         \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to download image. Status code:\u001b[39m\u001b[38;5;124m\"\u001b[39m, response\u001b[38;5;241m.\u001b[39mstatus_code)\n\u001b[1;32m     26\u001b[0m     \u001b[38;5;66;03m# wait 20 seconds\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m     \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m20\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     28\u001b[0m \u001b[38;5;66;03m# response = requests.get(url, headers=headers)\u001b[39;00m\n\u001b[1;32m     29\u001b[0m \n\u001b[1;32m     30\u001b[0m \u001b[38;5;66;03m# if response.status_code == 200:\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     34\u001b[0m \u001b[38;5;66;03m# else:\u001b[39;00m\n\u001b[1;32m     35\u001b[0m \u001b[38;5;66;03m#     print(\"Failed to download image. Status code:\", response.status_code)\u001b[39;00m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "import pandas as pd\n",
+    "import time\n",
+    "\n",
+    "url = \"http://media.britishmuseum.org/media/Repository/Documents/2020_2/25_11/8772f2ea_b08f_46cf_8af2_ab6c00c10b84/preview_DSC_0760.jpg\"\n",
+    "headers = {\n",
+    "    \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3\"\n",
+    "}\n",
+    "\n",
+    "\n",
+    "df = pd.read_csv(\"../data/raw/BM_csv_files/3000BC-AD500/europe_999BC-600.csv\")\n",
+    "df[\"Museum number\"] = df[\"Museum number\"].str.replace(r\"^No: \", \"\", regex=True)\n",
+    "\n",
+    "for index, row in df.iterrows():\n",
+    "    print(row[\"Museum number\"])\n",
+    "    url = row[\"Image\"]\n",
+    "    print(url)\n",
+    "    response = requests.get(url, verify=False)\n",
+    "    if response.status_code == 200:\n",
+    "        with open(f\"../data/raw/BM_images/{row['Museum number']}.jpg\", \"wb\") as f:\n",
+    "            f.write(response.content)\n",
+    "        print(\"Image downloaded successfully!\")\n",
+    "    else:\n",
+    "        print(\"Failed to download image. Status code:\", response.status_code)\n",
+    "    # wait 20 seconds\n",
+    "    time.sleep(20)\n",
+    "# response = requests.get(url, headers=headers)\n",
+    "\n",
+    "# if response.status_code == 200:\n",
+    "#     with open(\"image.jpg\", \"wb\") as f:\n",
+    "#         f.write(response.content)\n",
+    "#     print(\"Image downloaded successfully!\")\n",
+    "# else:\n",
+    "#     print(\"Failed to download image. Status code:\", response.status_code)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "460"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(df[\"Museum number\"] == \"null\").sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ArtifactClassification",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

0.3-testing_csv_join.ipynb ADDED Viewed

	@@ -0,0 +1,973 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_751668/3571106454.py:2: DeprecationWarning: \n",
+      "Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n",
+      "(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n",
+      "but was not found to be installed on your system.\n",
+      "If this would cause problems for you,\n",
+      "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n",
+      "        \n",
+      "  import pandas as pd\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Europe\n",
+    "input_filepath = \"../data/raw\"\n",
+    "csv_files = os.listdir(f\"{input_filepath}/BM_csv_files/3000BC-AD500/\")\n",
+    "europe_csv_files = [file for file in csv_files if \"africa\" in file.lower()]\n",
+    "\n",
+    "if europe_csv_files:\n",
+    "    # europe_csv_path = f\"{output_filepath}/BM_images/europe/\"\n",
+    "    # if not os.path.exists(europe_csv_path):\n",
+    "    #     os.makedirs(europe_csv_path)\n",
+    "\n",
+    "    europe_df = pd.DataFrame()\n",
+    "    for csv_file in europe_csv_files:\n",
+    "        csv_path = f\"{input_filepath}/BM_csv_files/3000BC-AD500/{csv_file}\"\n",
+    "        df = pd.read_csv(csv_path)\n",
+    "        europe_df = pd.concat([europe_df, df], ignore_index=True)\n",
+    "\n",
+    "    # europe_df.drop_duplicates(inplace=True)\n",
+    "    # europe_df.to_csv(f\"{europe_csv_path}/europe.csv\", index=False)\n",
+    "    # print(\"Europe CSV file created successfully!\")\n",
+    "else:\n",
+    "    print(\"No Europe CSV files found.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "region = \"africa\"\n",
+    "region_csv_files = [file for file in csv_files if region in file.lower()]\n",
+    "region_df = pd.concat(\n",
+    "    [\n",
+    "        pd.read_csv(f\"{input_filepath}/BM_csv_files/3000BC-AD500/{file}\")\n",
+    "        for file in region_csv_files\n",
+    "    ]\n",
+    ")\n",
+    "region_df[\"Museum number\"] = region_df[\"Museum number\"].str.replace(r\"^No: \", \"\", regex=True)\n",
+    "region_df.drop_duplicates(inplace=True)\n",
+    "region_df = region_df[region_df[\"Museum number\"] != \"null\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Image</th>\n",
+       "      <th>Object type</th>\n",
+       "      <th>Museum number</th>\n",
+       "      <th>Title</th>\n",
+       "      <th>Denomination</th>\n",
+       "      <th>Escapement</th>\n",
+       "      <th>Description</th>\n",
+       "      <th>Producer name</th>\n",
+       "      <th>School/style</th>\n",
+       "      <th>State</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Acq date</th>\n",
+       "      <th>Acq notes (acq)</th>\n",
+       "      <th>Acq notes (exc)</th>\n",
+       "      <th>Dept</th>\n",
+       "      <th>BM/Big number</th>\n",
+       "      <th>Reg number</th>\n",
+       "      <th>Add ids</th>\n",
+       "      <th>Cat no</th>\n",
+       "      <th>Banknote serial number</th>\n",
+       "      <th>Joined objects</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>acorn lekythos</td>\n",
+       "      <td>No: 1888,0601.716</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Attic red-figured pottery acorn lekythos, rest...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1888</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Excavated 1885-1886.</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1888,0601.716</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>acroterion</td>\n",
+       "      <td>No: 1886,0401.45</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Fragment of a marble corner palmetto with bird...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1886,0401.45</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>acroterion</td>\n",
+       "      <td>No: 1886,0401.1215</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Fragment of a marble acroterion palmetto. Two ...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1886</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1886,0401.1215</td>\n",
+       "      <td>Miscellaneous number: 1886,0401.44</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>adze; hoe</td>\n",
+       "      <td>No: null</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Iron adze or hoe.</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Excavated 1885-1886 by Petrie.</td>\n",
+       "      <td>External</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Miscellaneous number: 1886.XI.5 (Publication p...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>aegis; votive offering</td>\n",
+       "      <td>No: null</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Solid-cast fragmentary Menat-counterweight of ...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Excavated 1884-1885. 1885: excavated by the Eg...</td>\n",
+       "      <td>External</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Miscellaneous number: 86.339 (Accession Number...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44921</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>whetstone</td>\n",
+       "      <td>No: null</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Whetstone. Well worn on both sides; dull beige.</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>External</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Miscellaneous number: 2478 (Accession Number)</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44922</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>whetstone</td>\n",
+       "      <td>No: null</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Whetstone (?), made out of sandstone, in the s...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1886</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Excavated 1884-1885. 1885: excavated by Willia...</td>\n",
+       "      <td>External</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Miscellaneous number: 86.185 (Accession Number...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44923</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>whistle</td>\n",
+       "      <td>No: EA22513</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>A terracotta whistle, still working, roughly i...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1885</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Egypt and Sudan</td>\n",
+       "      <td>EA22513</td>\n",
+       "      <td>1885,0101.361</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44924</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>whistle</td>\n",
+       "      <td>No: 1906,0301.7</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Hand-modelled terracotta whistle, still workin...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1906</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Excavated May 1886.</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1906,0301.7</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44925</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>null; plate</td>\n",
+       "      <td>No: null</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Body of North Ionian Late Wild Goat Style pott...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>External</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Miscellaneous number: 26.2.35 (Accession Number)</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>44926 rows × 47 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                   Image  \\\n",
+       "0      https://media.britishmuseum.org/media/Reposito...   \n",
+       "1      https://media.britishmuseum.org/media/Reposito...   \n",
+       "2      https://media.britishmuseum.org/media/Reposito...   \n",
+       "3      https://media.britishmuseum.org/media/Reposito...   \n",
+       "4      https://media.britishmuseum.org/media/Reposito...   \n",
+       "...                                                  ...   \n",
+       "44921  https://media.britishmuseum.org/media/Reposito...   \n",
+       "44922  https://media.britishmuseum.org/media/Reposito...   \n",
+       "44923  https://media.britishmuseum.org/media/Reposito...   \n",
+       "44924  https://media.britishmuseum.org/media/Reposito...   \n",
+       "44925  https://media.britishmuseum.org/media/Reposito...   \n",
+       "\n",
+       "                  Object type       Museum number Title Denomination  \\\n",
+       "0              acorn lekythos   No: 1888,0601.716   NaN          NaN   \n",
+       "1                  acroterion    No: 1886,0401.45   NaN          NaN   \n",
+       "2                  acroterion  No: 1886,0401.1215   NaN          NaN   \n",
+       "3                   adze; hoe            No: null   NaN          NaN   \n",
+       "4      aegis; votive offering            No: null   NaN          NaN   \n",
+       "...                       ...                 ...   ...          ...   \n",
+       "44921               whetstone            No: null   NaN          NaN   \n",
+       "44922               whetstone            No: null   NaN          NaN   \n",
+       "44923                 whistle         No: EA22513   NaN          NaN   \n",
+       "44924                 whistle     No: 1906,0301.7   NaN          NaN   \n",
+       "44925             null; plate            No: null   NaN          NaN   \n",
+       "\n",
+       "       Escapement                                        Description  \\\n",
+       "0             NaN  Attic red-figured pottery acorn lekythos, rest...   \n",
+       "1             NaN  Fragment of a marble corner palmetto with bird...   \n",
+       "2             NaN  Fragment of a marble acroterion palmetto. Two ...   \n",
+       "3             NaN                                  Iron adze or hoe.   \n",
+       "4             NaN  Solid-cast fragmentary Menat-counterweight of ...   \n",
+       "...           ...                                                ...   \n",
+       "44921         NaN    Whetstone. Well worn on both sides; dull beige.   \n",
+       "44922         NaN  Whetstone (?), made out of sandstone, in the s...   \n",
+       "44923         NaN  A terracotta whistle, still working, roughly i...   \n",
+       "44924         NaN  Hand-modelled terracotta whistle, still workin...   \n",
+       "44925         NaN  Body of North Ionian Late Wild Goat Style pott...   \n",
+       "\n",
+       "      Producer name  School/style State  ... Acq date Acq notes (acq)  \\\n",
+       "0               NaN           NaN   NaN  ...     1888             NaN   \n",
+       "1               NaN           NaN   NaN  ...      NaN             NaN   \n",
+       "2               NaN           NaN   NaN  ...     1886             NaN   \n",
+       "3               NaN           NaN   NaN  ...      NaN             NaN   \n",
+       "4               NaN           NaN   NaN  ...      NaN             NaN   \n",
+       "...             ...           ...   ...  ...      ...             ...   \n",
+       "44921           NaN           NaN   NaN  ...      NaN             NaN   \n",
+       "44922           NaN           NaN   NaN  ...     1886             NaN   \n",
+       "44923           NaN           NaN   NaN  ...     1885             NaN   \n",
+       "44924           NaN           NaN   NaN  ...     1906             NaN   \n",
+       "44925           NaN           NaN   NaN  ...      NaN             NaN   \n",
+       "\n",
+       "                                         Acq notes (exc)             Dept  \\\n",
+       "0                                   Excavated 1885-1886.  Greek and Roman   \n",
+       "1                                                    NaN  Greek and Roman   \n",
+       "2                                                    NaN  Greek and Roman   \n",
+       "3                         Excavated 1885-1886 by Petrie.         External   \n",
+       "4      Excavated 1884-1885. 1885: excavated by the Eg...         External   \n",
+       "...                                                  ...              ...   \n",
+       "44921                                                NaN         External   \n",
+       "44922  Excavated 1884-1885. 1885: excavated by Willia...         External   \n",
+       "44923                                                NaN  Egypt and Sudan   \n",
+       "44924                                Excavated May 1886.  Greek and Roman   \n",
+       "44925                                                NaN         External   \n",
+       "\n",
+       "      BM/Big number      Reg number  \\\n",
+       "0               NaN   1888,0601.716   \n",
+       "1               NaN    1886,0401.45   \n",
+       "2               NaN  1886,0401.1215   \n",
+       "3               NaN             NaN   \n",
+       "4               NaN             NaN   \n",
+       "...             ...             ...   \n",
+       "44921           NaN             NaN   \n",
+       "44922           NaN             NaN   \n",
+       "44923       EA22513   1885,0101.361   \n",
+       "44924           NaN     1906,0301.7   \n",
+       "44925           NaN             NaN   \n",
+       "\n",
+       "                                                 Add ids Cat no  \\\n",
+       "0                                                    NaN    NaN   \n",
+       "1                                                    NaN    NaN   \n",
+       "2                     Miscellaneous number: 1886,0401.44    NaN   \n",
+       "3      Miscellaneous number: 1886.XI.5 (Publication p...    NaN   \n",
+       "4      Miscellaneous number: 86.339 (Accession Number...    NaN   \n",
+       "...                                                  ...    ...   \n",
+       "44921      Miscellaneous number: 2478 (Accession Number)    NaN   \n",
+       "44922  Miscellaneous number: 86.185 (Accession Number...    NaN   \n",
+       "44923                                                NaN    NaN   \n",
+       "44924                                                NaN    NaN   \n",
+       "44925   Miscellaneous number: 26.2.35 (Accession Number)    NaN   \n",
+       "\n",
+       "      Banknote serial number Joined objects  \n",
+       "0                        NaN            NaN  \n",
+       "1                        NaN            NaN  \n",
+       "2                        NaN            NaN  \n",
+       "3                        NaN            NaN  \n",
+       "4                        NaN            NaN  \n",
+       "...                      ...            ...  \n",
+       "44921                    NaN            NaN  \n",
+       "44922                    NaN            NaN  \n",
+       "44923                    NaN            NaN  \n",
+       "44924                    NaN            NaN  \n",
+       "44925                    NaN            NaN  \n",
+       "\n",
+       "[44926 rows x 47 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "europe_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "region_df.drop_duplicates(inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Image</th>\n",
+       "      <th>Object type</th>\n",
+       "      <th>Museum number</th>\n",
+       "      <th>Title</th>\n",
+       "      <th>Denomination</th>\n",
+       "      <th>Escapement</th>\n",
+       "      <th>Description</th>\n",
+       "      <th>Producer name</th>\n",
+       "      <th>School/style</th>\n",
+       "      <th>State</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Acq date</th>\n",
+       "      <th>Acq notes (acq)</th>\n",
+       "      <th>Acq notes (exc)</th>\n",
+       "      <th>Dept</th>\n",
+       "      <th>BM/Big number</th>\n",
+       "      <th>Reg number</th>\n",
+       "      <th>Add ids</th>\n",
+       "      <th>Cat no</th>\n",
+       "      <th>Banknote serial number</th>\n",
+       "      <th>Joined objects</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>acorn lekythos</td>\n",
+       "      <td>1888,0601.716</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Attic red-figured pottery acorn lekythos, rest...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1888</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Excavated 1885-1886.</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1888,0601.716</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>acroterion</td>\n",
+       "      <td>1886,0401.45</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Fragment of a marble corner palmetto with bird...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1886,0401.45</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>acroterion</td>\n",
+       "      <td>1886,0401.1215</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Fragment of a marble acroterion palmetto. Two ...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1886</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1886,0401.1215</td>\n",
+       "      <td>Miscellaneous number: 1886,0401.44</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>alabastron</td>\n",
+       "      <td>1894,1101.213</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Core-formed glass alabastron.\\r\\nOpaque orange...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1894</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1894,1101.213</td>\n",
+       "      <td>Miscellaneous number: DBH.0056 (Harden number)</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>alabastron</td>\n",
+       "      <td>132114</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Large baggy alabastron of horizontal banded, t...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1857</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Middle East</td>\n",
+       "      <td>132114</td>\n",
+       "      <td>1857,1220.1</td>\n",
+       "      <td>Miscellaneous number: 416 (paper label attache...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9301</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>vessel-fitting; lekane</td>\n",
+       "      <td>1886,0401.1218</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Chian pottery plastic head, originally attache...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1886</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Excavated 1884-1885.</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1886,0401.1218</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9302</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>vessel-fitting; lid</td>\n",
+       "      <td>1886,0401.1429</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Plastic double head of Chian, probably black-f...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1886</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Excavated 1884-1885.</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1886,0401.1429</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9304</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>volute krater</td>\n",
+       "      <td>1924,1201.41</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Volute handle and rim sherd (consisting of 3 f...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1924,1201.41</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9310</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>volute krater</td>\n",
+       "      <td>1924,1201.40</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Sherd (mended from two fragments) of Laconian ...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1924,1201.40</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9317</th>\n",
+       "      <td>https://media.britishmuseum.org/media/Reposito...</td>\n",
+       "      <td>wall-painting</td>\n",
+       "      <td>1886,0401.67</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Stucco wall fragment, with marks of pointed to...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1886</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Greek and Roman</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1886,0401.67</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>19570 rows × 47 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                  Image  \\\n",
+       "0     https://media.britishmuseum.org/media/Reposito...   \n",
+       "1     https://media.britishmuseum.org/media/Reposito...   \n",
+       "2     https://media.britishmuseum.org/media/Reposito...   \n",
+       "7     https://media.britishmuseum.org/media/Reposito...   \n",
+       "8     https://media.britishmuseum.org/media/Reposito...   \n",
+       "...                                                 ...   \n",
+       "9301  https://media.britishmuseum.org/media/Reposito...   \n",
+       "9302  https://media.britishmuseum.org/media/Reposito...   \n",
+       "9304  https://media.britishmuseum.org/media/Reposito...   \n",
+       "9310  https://media.britishmuseum.org/media/Reposito...   \n",
+       "9317  https://media.britishmuseum.org/media/Reposito...   \n",
+       "\n",
+       "                 Object type   Museum number Title Denomination  Escapement  \\\n",
+       "0             acorn lekythos   1888,0601.716   NaN          NaN         NaN   \n",
+       "1                 acroterion    1886,0401.45   NaN          NaN         NaN   \n",
+       "2                 acroterion  1886,0401.1215   NaN          NaN         NaN   \n",
+       "7                 alabastron   1894,1101.213   NaN          NaN         NaN   \n",
+       "8                 alabastron          132114   NaN          NaN         NaN   \n",
+       "...                      ...             ...   ...          ...         ...   \n",
+       "9301  vessel-fitting; lekane  1886,0401.1218   NaN          NaN         NaN   \n",
+       "9302     vessel-fitting; lid  1886,0401.1429   NaN          NaN         NaN   \n",
+       "9304           volute krater    1924,1201.41   NaN          NaN         NaN   \n",
+       "9310           volute krater    1924,1201.40   NaN          NaN         NaN   \n",
+       "9317           wall-painting    1886,0401.67   NaN          NaN         NaN   \n",
+       "\n",
+       "                                            Description Producer name  \\\n",
+       "0     Attic red-figured pottery acorn lekythos, rest...           NaN   \n",
+       "1     Fragment of a marble corner palmetto with bird...           NaN   \n",
+       "2     Fragment of a marble acroterion palmetto. Two ...           NaN   \n",
+       "7     Core-formed glass alabastron.\\r\\nOpaque orange...           NaN   \n",
+       "8     Large baggy alabastron of horizontal banded, t...           NaN   \n",
+       "...                                                 ...           ...   \n",
+       "9301  Chian pottery plastic head, originally attache...           NaN   \n",
+       "9302  Plastic double head of Chian, probably black-f...           NaN   \n",
+       "9304  Volute handle and rim sherd (consisting of 3 f...           NaN   \n",
+       "9310  Sherd (mended from two fragments) of Laconian ...           NaN   \n",
+       "9317  Stucco wall fragment, with marks of pointed to...           NaN   \n",
+       "\n",
+       "      School/style State  ... Acq date Acq notes (acq)       Acq notes (exc)  \\\n",
+       "0              NaN   NaN  ...     1888             NaN  Excavated 1885-1886.   \n",
+       "1              NaN   NaN  ...      NaN             NaN                   NaN   \n",
+       "2              NaN   NaN  ...     1886             NaN                   NaN   \n",
+       "7              NaN   NaN  ...     1894             NaN                   NaN   \n",
+       "8              NaN   NaN  ...     1857             NaN                   NaN   \n",
+       "...            ...   ...  ...      ...             ...                   ...   \n",
+       "9301           NaN   NaN  ...     1886             NaN  Excavated 1884-1885.   \n",
+       "9302           NaN   NaN  ...     1886             NaN  Excavated 1884-1885.   \n",
+       "9304           NaN   NaN  ...      NaN             NaN                   NaN   \n",
+       "9310           NaN   NaN  ...      NaN             NaN                   NaN   \n",
+       "9317           NaN   NaN  ...     1886             NaN                   NaN   \n",
+       "\n",
+       "                 Dept BM/Big number      Reg number  \\\n",
+       "0     Greek and Roman           NaN   1888,0601.716   \n",
+       "1     Greek and Roman           NaN    1886,0401.45   \n",
+       "2     Greek and Roman           NaN  1886,0401.1215   \n",
+       "7     Greek and Roman           NaN   1894,1101.213   \n",
+       "8         Middle East        132114     1857,1220.1   \n",
+       "...               ...           ...             ...   \n",
+       "9301  Greek and Roman           NaN  1886,0401.1218   \n",
+       "9302  Greek and Roman           NaN  1886,0401.1429   \n",
+       "9304  Greek and Roman           NaN    1924,1201.41   \n",
+       "9310  Greek and Roman           NaN    1924,1201.40   \n",
+       "9317  Greek and Roman           NaN    1886,0401.67   \n",
+       "\n",
+       "                                                Add ids Cat no  \\\n",
+       "0                                                   NaN    NaN   \n",
+       "1                                                   NaN    NaN   \n",
+       "2                    Miscellaneous number: 1886,0401.44    NaN   \n",
+       "7        Miscellaneous number: DBH.0056 (Harden number)    NaN   \n",
+       "8     Miscellaneous number: 416 (paper label attache...    NaN   \n",
+       "...                                                 ...    ...   \n",
+       "9301                                                NaN    NaN   \n",
+       "9302                                                NaN    NaN   \n",
+       "9304                                                NaN    NaN   \n",
+       "9310                                                NaN    NaN   \n",
+       "9317                                                NaN    NaN   \n",
+       "\n",
+       "     Banknote serial number Joined objects  \n",
+       "0                       NaN            NaN  \n",
+       "1                       NaN            NaN  \n",
+       "2                       NaN            NaN  \n",
+       "7                       NaN            NaN  \n",
+       "8                       NaN            NaN  \n",
+       "...                     ...            ...  \n",
+       "9301                    NaN            NaN  \n",
+       "9302                    NaN            NaN  \n",
+       "9304                    NaN            NaN  \n",
+       "9310                    NaN            NaN  \n",
+       "9317                    NaN            NaN  \n",
+       "\n",
+       "[19570 rows x 47 columns]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "region_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ArtifactClassification",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

0.4-testing_tif_images.ipynb ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "im = Image.open(\"../data/raw/images/castle/1924/1924_4_738a_small.tif\")\n",
+    "name = str(\"../data/raw/images/castle/1924/1924_4_738a_small.tif\").rstrip(\".tif\")\n",
+    "im.save(\"image\" + \".jpg\", \"JPEG\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)\n",
+      "Failed to load module: /home/james/snap/code/common/.cache/gio-modules/libgiolibproxy.so\n",
+      "eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE\n"
+     ]
+    }
+   ],
+   "source": [
+    "im.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ArtifactClassification",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

0.5-testing_transparent_background.ipynb ADDED Viewed

	@@ -0,0 +1,321 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cv2\n",
+    "import numpy as np\n",
+    "\n",
+    "from PIL import Image\n",
+    "from transparent_background import Remover\n",
+    "import pandas as pd\n",
+    "from tqdm import tqdm\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Settings -> Mode=base-nightly, Device=cuda:0, Torchscript=disabled\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Load model\n",
+    "# remover = Remover() # default setting\n",
+    "# remover = Remover(mode='fast', jit=True, device='cuda:0', ckpt='~/latest.pth', url=\"https://drive.google.com/file/d/13oBl5MTVcWER3YU4fSxW3ATlVfueFQPY/view?usp=share_link\", ckpt_name=\"ckpt_base.pth\")\n",
+    "remover = Remover(mode=\"base-nightly\")  # nightly release checkpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Usage for image\n",
+    "img = Image.open(\"../data/raw/images/egyptian/1953/1953.1-tt.jpg\").convert(\"RGB\")  # read image\n",
+    "\n",
+    "out = remover.process(img)  # default setting - transparent background\n",
+    "# out = remover.process(img, type='rgba') # same as above\n",
+    "# out = remover.process(img, type='map') # object map only\n",
+    "# out = remover.process(img, type='green') # image matting - green screen\n",
+    "# out = remover.process(img, type='white')  # change backround with white color\n",
+    "# out = remover.process(img, type=[255, 0, 0]) # change background with color code [255, 0, 0]\n",
+    "# out = remover.process(img, type='blur') # blur background\n",
+    "# out = remover.process(img, type='overlay') # overlay object map onto the image\n",
+    "# out = remover.process(img, type='samples/background.jpg') # use another image as a background\n",
+    "\n",
+    "# out = remover.process(img, threshold=0.5) # use threhold parameter for hard prediction.\n",
+    "\n",
+    "out.save(\"output.png\")  # save result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "img_df = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n",
+    "img_df[\"full_path\"] = img_df.apply(lambda row: os.path.join(row[\"root\"], row[\"file\"]), axis=1)\n",
+    "img_df[\"new_root\"] = img_df[\"root\"].apply(\n",
+    "    lambda x: x.replace(\"data/raw/images/\", \"data/processed/OM_images_white/\")\n",
+    ")\n",
+    "img_df[\"new_full_path\"] = img_df.apply(lambda row: os.path.join(row[\"new_root\"], row[\"file\"]), axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>file</th>\n",
+       "      <th>root</th>\n",
+       "      <th>obj_num</th>\n",
+       "      <th>full_path</th>\n",
+       "      <th>new_root</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1985.15.68.jpg</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985</td>\n",
+       "      <td>durma.1985.15.68</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985/1985.15.68.jpg</td>\n",
+       "      <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1985.52.37.ff2.jpg</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985</td>\n",
+       "      <td>durma.1985.52.37</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985/1985.52.37.f...</td>\n",
+       "      <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1985.81.4496 d2.jpg</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985</td>\n",
+       "      <td>durma.1985.81.4496</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985/1985.81.4496...</td>\n",
+       "      <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1985.9.1.1-d4.jpg</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985</td>\n",
+       "      <td>durma.1985.9.1</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985/1985.9.1.1-d...</td>\n",
+       "      <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1985.52.37.sf2.jpg</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985</td>\n",
+       "      <td>durma.1985.52.37</td>\n",
+       "      <td>data/raw/images/fulling_mill/1985/1985.52.37.s...</td>\n",
+       "      <td>data/processed/OM_images_white/fulling_mill/1985</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39239</th>\n",
+       "      <td>2014.1.2 bb.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/2014</td>\n",
+       "      <td>durom.2014.1.2</td>\n",
+       "      <td>data/raw/images/egyptian/2014/2014.1.2 bb.jpg</td>\n",
+       "      <td>data/processed/OM_images_white/egyptian/2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39240</th>\n",
+       "      <td>2014.1.71 ll.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/2014</td>\n",
+       "      <td>durom.2014.1.71</td>\n",
+       "      <td>data/raw/images/egyptian/2014/2014.1.71 ll.jpg</td>\n",
+       "      <td>data/processed/OM_images_white/egyptian/2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39241</th>\n",
+       "      <td>2014.1.2 rr.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/2014</td>\n",
+       "      <td>durom.2014.1.2</td>\n",
+       "      <td>data/raw/images/egyptian/2014/2014.1.2 rr.jpg</td>\n",
+       "      <td>data/processed/OM_images_white/egyptian/2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39242</th>\n",
+       "      <td>1963.4.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/1963</td>\n",
+       "      <td>durom.1963.4</td>\n",
+       "      <td>data/raw/images/egyptian/1963/1963.4.jpg</td>\n",
+       "      <td>data/processed/OM_images_white/egyptian/1963</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39243</th>\n",
+       "      <td>1963.4.2.jpg</td>\n",
+       "      <td>data/raw/images/egyptian/1963</td>\n",
+       "      <td>durom.1963.4</td>\n",
+       "      <td>data/raw/images/egyptian/1963/1963.4.2.jpg</td>\n",
+       "      <td>data/processed/OM_images_white/egyptian/1963</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>39244 rows × 5 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                      file                               root  \\\n",
+       "0           1985.15.68.jpg  data/raw/images/fulling_mill/1985   \n",
+       "1       1985.52.37.ff2.jpg  data/raw/images/fulling_mill/1985   \n",
+       "2      1985.81.4496 d2.jpg  data/raw/images/fulling_mill/1985   \n",
+       "3        1985.9.1.1-d4.jpg  data/raw/images/fulling_mill/1985   \n",
+       "4       1985.52.37.sf2.jpg  data/raw/images/fulling_mill/1985   \n",
+       "...                    ...                                ...   \n",
+       "39239      2014.1.2 bb.jpg      data/raw/images/egyptian/2014   \n",
+       "39240     2014.1.71 ll.jpg      data/raw/images/egyptian/2014   \n",
+       "39241      2014.1.2 rr.jpg      data/raw/images/egyptian/2014   \n",
+       "39242           1963.4.jpg      data/raw/images/egyptian/1963   \n",
+       "39243         1963.4.2.jpg      data/raw/images/egyptian/1963   \n",
+       "\n",
+       "                  obj_num                                          full_path  \\\n",
+       "0        durma.1985.15.68   data/raw/images/fulling_mill/1985/1985.15.68.jpg   \n",
+       "1        durma.1985.52.37  data/raw/images/fulling_mill/1985/1985.52.37.f...   \n",
+       "2      durma.1985.81.4496  data/raw/images/fulling_mill/1985/1985.81.4496...   \n",
+       "3          durma.1985.9.1  data/raw/images/fulling_mill/1985/1985.9.1.1-d...   \n",
+       "4        durma.1985.52.37  data/raw/images/fulling_mill/1985/1985.52.37.s...   \n",
+       "...                   ...                                                ...   \n",
+       "39239      durom.2014.1.2      data/raw/images/egyptian/2014/2014.1.2 bb.jpg   \n",
+       "39240     durom.2014.1.71     data/raw/images/egyptian/2014/2014.1.71 ll.jpg   \n",
+       "39241      durom.2014.1.2      data/raw/images/egyptian/2014/2014.1.2 rr.jpg   \n",
+       "39242        durom.1963.4           data/raw/images/egyptian/1963/1963.4.jpg   \n",
+       "39243        durom.1963.4         data/raw/images/egyptian/1963/1963.4.2.jpg   \n",
+       "\n",
+       "                                               new_root  \n",
+       "0      data/processed/OM_images_white/fulling_mill/1985  \n",
+       "1      data/processed/OM_images_white/fulling_mill/1985  \n",
+       "2      data/processed/OM_images_white/fulling_mill/1985  \n",
+       "3      data/processed/OM_images_white/fulling_mill/1985  \n",
+       "4      data/processed/OM_images_white/fulling_mill/1985  \n",
+       "...                                                 ...  \n",
+       "39239      data/processed/OM_images_white/egyptian/2014  \n",
+       "39240      data/processed/OM_images_white/egyptian/2014  \n",
+       "39241      data/processed/OM_images_white/egyptian/2014  \n",
+       "39242      data/processed/OM_images_white/egyptian/1963  \n",
+       "39243      data/processed/OM_images_white/egyptian/1963  \n",
+       "\n",
+       "[39244 rows x 5 columns]"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "img_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 84/39244 [00:06<52:59, 12.32it/s] \n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[26], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m index, row \u001b[38;5;129;01min\u001b[39;00m tqdm(img_df\u001b[38;5;241m.\u001b[39miterrows(), total\u001b[38;5;241m=\u001b[39mimg_df\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m]):\n\u001b[1;32m      2\u001b[0m     img \u001b[38;5;241m=\u001b[39m Image\u001b[38;5;241m.\u001b[39mopen(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m../\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m+\u001b[39mrow[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfull_path\u001b[39m\u001b[38;5;124m'\u001b[39m])\u001b[38;5;241m.\u001b[39mconvert(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mRGB\u001b[39m\u001b[38;5;124m'\u001b[39m)  \u001b[38;5;66;03m# read image\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m     out \u001b[38;5;241m=\u001b[39m \u001b[43mremover\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mtype\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mwhite\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# change backround with white color\u001b[39;00m\n\u001b[1;32m      4\u001b[0m     \u001b[38;5;66;03m# make sure the directory exists\u001b[39;00m\n\u001b[1;32m      5\u001b[0m     os\u001b[38;5;241m.\u001b[39mmakedirs(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m../\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m+\u001b[39mrow[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnew_root\u001b[39m\u001b[38;5;124m'\u001b[39m], exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
+      "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transparent_background/Remover.py:154\u001b[0m, in \u001b[0;36mRemover.process\u001b[0;34m(self, img, type, threshold)\u001b[0m\n\u001b[1;32m    137\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    138\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[1;32m    139\u001b[0m \u001b[38;5;124;03m    img (PIL.Image): input image as PIL.Image type\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    151\u001b[0m \n\u001b[1;32m    152\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    153\u001b[0m shape \u001b[38;5;241m=\u001b[39m img\u001b[38;5;241m.\u001b[39msize[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m--> 154\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtransform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    155\u001b[0m x \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39munsqueeze(\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m    156\u001b[0m x \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n",
+      "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/torchvision/transforms/transforms.py:95\u001b[0m, in \u001b[0;36mCompose.__call__\u001b[0;34m(self, img)\u001b[0m\n\u001b[1;32m     93\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, img):\n\u001b[1;32m     94\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m t \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransforms:\n\u001b[0;32m---> 95\u001b[0m         img \u001b[38;5;241m=\u001b[39m \u001b[43mt\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     96\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m img\n",
+      "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transparent_background/utils.py:105\u001b[0m, in \u001b[0;36mnormalize.__call__\u001b[0;34m(self, img)\u001b[0m\n\u001b[1;32m    103\u001b[0m img \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdiv\n\u001b[1;32m    104\u001b[0m img \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmean\n\u001b[0;32m--> 105\u001b[0m img \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstd\n\u001b[1;32m    107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m img\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "for index, row in tqdm(img_df.iterrows(), total=img_df.shape[0]):\n",
+    "    img = Image.open('../' + row['full_path']).convert('RGB')  # read image\n",
+    "    out = remover.process(img, type='white')  # change backround with white color\n",
+    "    # make sure the directory exists\n",
+    "    os.makedirs('../' + row['new_root'], exist_ok=True)\n",
+    "    out.save('../' + row['new_full_path'])  # save result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ArtifactClassification",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

0.7Mahnaz-efficientnet.ipynb ADDED Viewed

	@@ -0,0 +1,492 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Finetuning efficientNet"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Lets try the model that is trending for image classification on HuggingFace: efficientnet_b2.ra_in1k"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0a87f155dc5d480c8b68caf0c69f69cd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading readme:   0%|          | 0.00/5.16k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8b705267629c4028bc48465ab583337b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data:   0%|          | 0.00/120M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c825ef39efb24233acabac80abb960fa",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data:   0%|          | 0.00/23.9M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d41e4e130864414dba3e419eada3941d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating train split:   0%|          | 0/50000 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7e5cfe4b7b7244beb6a2b19d24fa9c63",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "cifar10dataset = load_dataset(\"cifar10\", split=\"train\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cifar10dataset = cifar10dataset.train_test_split(test_size=0.2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labels = cifar10dataset[\"train\"].features[\"label\"].names\n",
+    "label2id, id2label = dict(), dict()\n",
+    "for i, label in enumerate(labels):\n",
+    "    label2id[label] = str(i)\n",
+    "    id2label[str(i)] = label"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoImageProcessor\n",
+    "\n",
+    "# import timm\n",
+    "# model = timm.create_model(\"hf_hub:timm/efficientnet_b2.ra_in1k\", pretrained=True)\n",
+    "\n",
+    "checkpoint = \"google/efficientnet-b3\"\n",
+    "image_processor = AutoImageProcessor.from_pretrained(checkpoint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor\n",
+    "\n",
+    "normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)\n",
+    "size = (\n",
+    "    image_processor.size[\"shortest_edge\"]\n",
+    "    if \"shortest_edge\" in image_processor.size\n",
+    "    else (image_processor.size[\"height\"], image_processor.size[\"width\"])\n",
+    ")\n",
+    "_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def transforms(examples):\n",
+    "    examples[\"pixel_values\"] = [_transforms(img.convert(\"RGB\")) for img in examples[\"img\"]]\n",
+    "    del examples[\"img\"]\n",
+    "    return examples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import os\n",
+    "\n",
+    "file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n",
+    "file2obj[\"image\"] = file2obj.apply(lambda x: os.path.join(\"..\", x[\"root\"], x[\"file\"]), axis=1)\n",
+    "\n",
+    "# Group by 'obj_num' and count occurrences\n",
+    "obj_num_counts = file2obj[\"obj_num\"].value_counts()\n",
+    "\n",
+    "# Filter rows where 'obj_num' appears more than twice\n",
+    "file2obj_3 = file2obj[file2obj[\"obj_num\"].isin(obj_num_counts[obj_num_counts > 2].index)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "352630377c4f42adad3b161fd95e7545",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Casting to class labels:   0%|          | 0/25725 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from datasets import Dataset, Image, DatasetDict\n",
+    "\n",
+    "\n",
+    "ds = Dataset.from_pandas(file2obj_3[[\"image\", \"obj_num\"]], preserve_index=False).cast_column(\n",
+    "    \"image\", Image()\n",
+    ")\n",
+    "ds = ds.class_encode_column(\"obj_num\")\n",
+    "trainval_test = ds.train_test_split(stratify_by_column=\"obj_num\", test_size=0.16)\n",
+    "train_val = trainval_test[\"train\"].train_test_split(\n",
+    "    stratify_by_column=\"obj_num\", test_size=16 / 84\n",
+    ")\n",
+    "ds = DatasetDict(\n",
+    "    {\"train\": train_val[\"train\"], \"valid\": train_val[\"test\"], \"test\": trainval_test[\"test\"]}\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cifar10dataset = cifar10dataset.with_transform(transforms)\n",
+    "# cifar10dataset = ds.map(transforms)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import DefaultDataCollator\n",
+    "\n",
+    "data_collator = DefaultDataCollator()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import evaluate\n",
+    "\n",
+    "accuracy = evaluate.load(\"accuracy\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "def compute_metrics(eval_pred):\n",
+    "    predictions, labels = eval_pred\n",
+    "    predictions = np.argmax(predictions, axis=1)\n",
+    "    return accuracy.compute(predictions=predictions, references=labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:\n",
+      "- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([10, 1536]) in the model instantiated\n",
+      "- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([10]) in the model instantiated\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoModelForImageClassification, TrainingArguments, Trainer\n",
+    "\n",
+    "model = AutoModelForImageClassification.from_pretrained(\n",
+    "    checkpoint,\n",
+    "    num_labels=len(labels),\n",
+    "    ignore_mismatched_sizes=True,\n",
+    "    id2label=id2label,\n",
+    "    label2id=label2id,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "torch.cuda.empty_cache()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mjameswburton18\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.16.3"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/home/james/CodingProjects/ArcPostDoc/ArtifactClassification/notebooks/wandb/run-20240214_115817-fyblqcba</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/jameswburton18/huggingface/runs/fyblqcba' target=\"_blank\">passionate-lovebird-214</a></strong> to <a href='https://wandb.ai/jameswburton18/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/jameswburton18/huggingface' target=\"_blank\">https://wandb.ai/jameswburton18/huggingface</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/jameswburton18/huggingface/runs/fyblqcba' target=\"_blank\">https://wandb.ai/jameswburton18/huggingface/runs/fyblqcba</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a8012d9b2c7b47c5aa2533983016d3a2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/3750 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'loss': 2.3286, 'learning_rate': 1.3333333333333334e-06, 'epoch': 0.01}\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[13], line 28\u001b[0m\n\u001b[1;32m      1\u001b[0m training_args \u001b[38;5;241m=\u001b[39m TrainingArguments(\n\u001b[1;32m      2\u001b[0m     output_dir\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcifar10_efficientnet\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m      3\u001b[0m     remove_unused_columns\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     15\u001b[0m     push_to_hub\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m     16\u001b[0m )\n\u001b[1;32m     18\u001b[0m trainer \u001b[38;5;241m=\u001b[39m Trainer(\n\u001b[1;32m     19\u001b[0m     model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m     20\u001b[0m     args\u001b[38;5;241m=\u001b[39mtraining_args,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     25\u001b[0m     compute_metrics\u001b[38;5;241m=\u001b[39mcompute_metrics,\n\u001b[1;32m     26\u001b[0m )\n\u001b[0;32m---> 28\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/trainer.py:1539\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1537\u001b[0m         hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m   1538\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1539\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1540\u001b[0m \u001b[43m        \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1541\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1542\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1543\u001b[0m \u001b[43m        \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1544\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/trainer.py:1881\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   1878\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1879\u001b[0m     tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss_step\n\u001b[0;32m-> 1881\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcurrent_flos \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mfloat\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloating_point_ops\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m   1883\u001b[0m is_last_step_and_steps_less_than_grad_acc \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m   1884\u001b[0m     steps_in_epoch \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m args\u001b[38;5;241m.\u001b[39mgradient_accumulation_steps \u001b[38;5;129;01mand\u001b[39;00m (step \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m==\u001b[39m steps_in_epoch\n\u001b[1;32m   1885\u001b[0m )\n\u001b[1;32m   1887\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m   1888\u001b[0m     total_batched_samples \u001b[38;5;241m%\u001b[39m args\u001b[38;5;241m.\u001b[39mgradient_accumulation_steps \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m   1889\u001b[0m     \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1893\u001b[0m     \u001b[38;5;66;03m# the `or` condition of `is_last_step_and_steps_less_than_grad_acc` is not covered\u001b[39;00m\n\u001b[1;32m   1894\u001b[0m     \u001b[38;5;66;03m# in accelerate. So, explicitly enable sync gradients to True in that case.\u001b[39;00m\n",
+      "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/trainer.py:3543\u001b[0m, in \u001b[0;36mTrainer.floating_point_ops\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m   3530\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   3531\u001b[0m \u001b[38;5;124;03mFor models that inherit from [`PreTrainedModel`], uses that method to compute the number of floating point\u001b[39;00m\n\u001b[1;32m   3532\u001b[0m \u001b[38;5;124;03moperations for every backward + forward pass. If using another model, either implement such a method in the\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   3540\u001b[0m \u001b[38;5;124;03m    `int`: The number of floating-point operations.\u001b[39;00m\n\u001b[1;32m   3541\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   3542\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfloating_point_ops\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 3543\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloating_point_ops\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3544\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   3545\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n",
+      "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/modeling_utils.py:1154\u001b[0m, in \u001b[0;36mModuleUtilsMixin.floating_point_ops\u001b[0;34m(self, input_dict, exclude_embeddings)\u001b[0m\n\u001b[1;32m   1130\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfloating_point_ops\u001b[39m(\n\u001b[1;32m   1131\u001b[0m     \u001b[38;5;28mself\u001b[39m, input_dict: Dict[\u001b[38;5;28mstr\u001b[39m, Union[torch\u001b[38;5;241m.\u001b[39mTensor, Any]], exclude_embeddings: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m   1132\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mint\u001b[39m:\n\u001b[1;32m   1133\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   1134\u001b[0m \u001b[38;5;124;03m    Get number of (optionally, non-embeddings) floating-point operations for the forward and backward passes of a\u001b[39;00m\n\u001b[1;32m   1135\u001b[0m \u001b[38;5;124;03m    batch with this transformer model. Default approximation neglects the quadratic dependency on the number of\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1151\u001b[0m \u001b[38;5;124;03m        `int`: The number of floating-point operations.\u001b[39;00m\n\u001b[1;32m   1152\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1154\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m6\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestimate_tokens(input_dict) \u001b[38;5;241m*\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_parameters\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexclude_embeddings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude_embeddings\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/modeling_utils.py:1078\u001b[0m, in \u001b[0;36mModuleUtilsMixin.num_parameters\u001b[0;34m(self, only_trainable, exclude_embeddings)\u001b[0m\n\u001b[1;32m   1063\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   1064\u001b[0m \u001b[38;5;124;03mGet number of (optionally, trainable or non-embeddings) parameters in the module.\u001b[39;00m\n\u001b[1;32m   1065\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1074\u001b[0m \u001b[38;5;124;03m    `int`: The number of parameters.\u001b[39;00m\n\u001b[1;32m   1075\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   1077\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exclude_embeddings:\n\u001b[0;32m-> 1078\u001b[0m     embedding_param_names \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m   1079\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.weight\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name, module_type \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_modules() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(module_type, nn\u001b[38;5;241m.\u001b[39mEmbedding)\n\u001b[1;32m   1080\u001b[0m     ]\n\u001b[1;32m   1081\u001b[0m     total_parameters \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m   1082\u001b[0m         parameter \u001b[38;5;28;01mfor\u001b[39;00m name, parameter \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_parameters() \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m embedding_param_names\n\u001b[1;32m   1083\u001b[0m     ]\n\u001b[1;32m   1084\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/modeling_utils.py:1079\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m   1063\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   1064\u001b[0m \u001b[38;5;124;03mGet number of (optionally, trainable or non-embeddings) parameters in the module.\u001b[39;00m\n\u001b[1;32m   1065\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1074\u001b[0m \u001b[38;5;124;03m    `int`: The number of parameters.\u001b[39;00m\n\u001b[1;32m   1075\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   1077\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exclude_embeddings:\n\u001b[1;32m   1078\u001b[0m     embedding_param_names \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m-> 1079\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.weight\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name, module_type \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_modules() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmodule_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mEmbedding\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1080\u001b[0m     ]\n\u001b[1;32m   1081\u001b[0m     total_parameters \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m   1082\u001b[0m         parameter \u001b[38;5;28;01mfor\u001b[39;00m name, parameter \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_parameters() \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m embedding_param_names\n\u001b[1;32m   1083\u001b[0m     ]\n\u001b[1;32m   1084\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"cifar10_efficientnet\",\n",
+    "    remove_unused_columns=False,\n",
+    "    evaluation_strategy=\"epoch\",\n",
+    "    save_strategy=\"epoch\",\n",
+    "    learning_rate=5e-5,\n",
+    "    per_device_train_batch_size=8,  # memory error with 16\n",
+    "    gradient_accumulation_steps=4,\n",
+    "    per_device_eval_batch_size=8,\n",
+    "    num_train_epochs=3,\n",
+    "    warmup_ratio=0.1,\n",
+    "    logging_steps=10,\n",
+    "    load_best_model_at_end=True,\n",
+    "    metric_for_best_model=\"accuracy\",\n",
+    "    push_to_hub=False,\n",
+    ")\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    data_collator=data_collator,\n",
+    "    train_dataset=cifar10dataset[\"train\"],\n",
+    "    eval_dataset=cifar10dataset[\"test\"],\n",
+    "    tokenizer=image_processor,\n",
+    "    compute_metrics=compute_metrics,\n",
+    ")\n",
+    "\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Evaluation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = trainer.evaluate()\n",
+    "print(results)\n",
+    "\n",
+    "test_results = trainer.predict("
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv_bloom-classifier",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

0.8-testing_segmented_data.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

0.9-testing_om_datasets.ipynb ADDED Viewed

	@@ -0,0 +1,459 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "import os\n",
+    "from pathlib import Path\n",
+    "import click\n",
+    "from dotenv import find_dotenv, load_dotenv\n",
+    "\n",
+    "from datasets import load_dataset, ClassLabel\n",
+    "import numpy as np\n",
+    "import wandb\n",
+    "import yaml\n",
+    "from transformers.trainer_callback import EarlyStoppingCallback\n",
+    "from artifact_classification.utils import ConfigLoader\n",
+    "from torchvision.transforms import (\n",
+    "    Compose,\n",
+    "    Normalize,\n",
+    "    ToTensor,\n",
+    "    CenterCrop,\n",
+    "    Resize,\n",
+    ")\n",
+    "from transformers import (\n",
+    "    AutoImageProcessor,\n",
+    "    AutoModelForImageClassification,\n",
+    "    TrainingArguments,\n",
+    "    Trainer,\n",
+    "    DefaultDataCollator,\n",
+    "    AutoModelForSequenceClassification,\n",
+    "    DataCollatorWithPadding,\n",
+    "    AutoTokenizer,\n",
+    ")\n",
+    "from sklearn.metrics import top_k_accuracy_score\n",
+    "import evaluate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Updating with:\n",
+      "{'config': 'om3txt_name', 'dataset': 'james-burton/OrientalMuseum_min3-name-text', 'wandb_proj_name': 'OrientalMuesumText', 'model_base': 'microsoft/deberta-v3-base', 'problem_type': 'text'}\n",
+      "\n",
+      "\n",
+      "{'config': 'om3txt_name', 'fast_dev_run': False, 'do_train': True, 'do_predict': True, 'batch_size': 16, 'model_base': 'microsoft/deberta-v3-base', 'output_root': 'models/', 'num_epochs': 100, 'early_stopping_patience': 5, 'grad_accumulation_steps': 1, 'seed': 42, 'logging_steps': 10, 'lr_scheduler': 'linear', 'warmup_ratio': 0, 'weight_decay': 0, 'device': 'cuda', 'num_workers': 1, 'resume_from_checkpoint': False, 'predict_batch_size': 16, 'save_total_limit': 1, 'lr': 5e-05, 'pytorch2_0': False, 'max_length': 512, 'text_column': 'description', 'fp16': True, 'dataset': 'james-burton/OrientalMuseum_min3-name-text', 'wandb_proj_name': 'OrientalMuesumText', 'problem_type': 'text'}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "config = \"om3txt_name\"\n",
+    "\n",
+    "# Training args\n",
+    "args = ConfigLoader(config, \"../configs/train_configs.yaml\", \"../configs/train_default.yaml\")\n",
+    "\n",
+    "# # Load dataset, filter out na inputs and labels and encode labels (as label column can change)\n",
+    "# dataset = load_dataset(args.dataset)  # , download_mode=\"force_redownload\")\n",
+    "# dataset = dataset.filter(lambda example: example[args.label_column] is not None)\n",
+    "# if args.problem_type == \"text\":\n",
+    "#     dataset = dataset.filter(lambda example: example[args.text_column] is not None)\n",
+    "# dataset = dataset.rename_column(args.label_column, \"label\")\n",
+    "# if not isinstance(dataset[\"train\"].features[\"label\"], ClassLabel):\n",
+    "#     dataset = dataset.class_encode_column(\"label\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "testing om3_num om3_material om3_name om3txt_material om3txt_name om3-white_num om3-white_material om3-white_name om3-3Dwhite_num om3-3Dwhite_material om3-3Dwhite_name om3-3Dwhite-1frame_num om3-3Dwhite-1frame_material om3-3Dwhite-1frame_name om4_num om4_material om4_name om4txt_material om4txt_name om4-white_num om4-white_material om4-white_name om4-3Dwhite_num om4-3Dwhite_material om4-3Dwhite_name om4-3Dwhite-1frame_num om4-3Dwhite-1frame_material om4-3Dwhite-1frame_name om5_num om5_material om5_name om5txt_material om5txt_name om5-white_num om5-white_material om5-white_name om5-3Dwhite_num om5-3Dwhite_material om5-3Dwhite_name om5-3Dwhite-1frame_num om5-3Dwhite-1frame_material om5-3Dwhite-1frame_name om6_num om6_material om6_name om6txt_material om6txt_name om6-white_num om6-white_material om6-white_name om6-3Dwhite_num om6-3Dwhite_material om6-3Dwhite_name om6-3Dwhite-1frame_num om6-3Dwhite-1frame_material om6-3Dwhite-1frame_name om3-3DwhiteTVT_num om3-3DwhiteTVT_material om3-3DwhiteTVT_name\n"
+     ]
+    }
+   ],
+   "source": [
+    "import yaml\n",
+    "\n",
+    "with open(\"../configs/train_configs.yaml\", \"r\") as file:\n",
+    "    configs = list(yaml.safe_load_all(file))\n",
+    "\n",
+    "config_names = \" \".join([cfg[\"config\"] for cfg in configs])\n",
+    "print(config_names)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'testing om3_material om3_name om3-white_material om3-white_name om3-3Dwhite_material om3-3Dwhite_name om3-3Dwhite-1frame_material om3-3Dwhite-1frame_name om4_material om4_name om4-white_material om4-white_name om4-3Dwhite_material om4-3Dwhite_name om4-3Dwhite-1frame_material om4-3Dwhite-1frame_name om5_material om5_name om5-white_material om5-white_name om5-3Dwhite_material om5-3Dwhite_name om5-3Dwhite-1frame_material om5-3Dwhite-1frame_name om6_material om6_name om6-white_material om6-white_name om6-3Dwhite_material om6-3Dwhite_name om6-3Dwhite-1frame_material om6-3Dwhite-1frame_name om3-3DwhiteTVT_material om3-3DwhiteTVT_name'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\" \".join(\n",
+    "    [cfg[\"config\"] for cfg in configs if not (\"txt\" in cfg[\"config\"] or \"num\" in cfg[\"config\"])]\n",
+    ")\n",
+    "# \" \".join([cfg[\"config\"] for cfg in configs if \"1frame\" in cfg[\"config\"]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "l2i = {\n",
+    "    \"Album Painting\": 0,\n",
+    "    \"Animal Figurine\": 1,\n",
+    "    \"Animal Mummy\": 2,\n",
+    "    \"Animal bone\": 3,\n",
+    "    \"Belt Hook\": 4,\n",
+    "    \"Blouse\": 5,\n",
+    "    \"Bolt\": 6,\n",
+    "    \"Box\": 7,\n",
+    "    \"Brush Pot\": 8,\n",
+    "    \"Cap\": 9,\n",
+    "    \"Case\": 10,\n",
+    "    \"Clay pipe (smoking)\": 11,\n",
+    "    \"Cosmetic and Medical Equipment and Implements\": 12,\n",
+    "    \"Cup And Saucer\": 13,\n",
+    "    \"DVDs\": 14,\n",
+    "    \"Dagger\": 15,\n",
+    "    \"Disc\": 16,\n",
+    "    \"Domestic Equipment and Utensils\": 17,\n",
+    "    \"Earring\": 18,\n",
+    "    \"Finger Ring\": 19,\n",
+    "    \"Funerary Cone\": 20,\n",
+    "    \"Funerary goods\": 21,\n",
+    "    \"Funerary money\": 22,\n",
+    "    \"Hanging\": 23,\n",
+    "    \"Heart Scarab\": 24,\n",
+    "    \"Human Figurine\": 25,\n",
+    "    \"Inkstick\": 26,\n",
+    "    \"Kite\": 27,\n",
+    "    \"Kohl Pot\": 28,\n",
+    "    \"Letter\": 29,\n",
+    "    \"Manuscript Page\": 30,\n",
+    "    \"Mat\": 31,\n",
+    "    \"Mica Painting\": 32,\n",
+    "    \"Miniature Painting\": 33,\n",
+    "    \"Mortar\": 34,\n",
+    "    \"Mummy Label\": 35,\n",
+    "    \"Oracle Bone\": 36,\n",
+    "    \"Ostraka\": 37,\n",
+    "    \"Palette\": 38,\n",
+    "    \"Panel\": 39,\n",
+    "    \"Part\": 40,\n",
+    "    \"Pendant\": 41,\n",
+    "    \"Pipe\": 42,\n",
+    "    \"Pith Painting\": 43,\n",
+    "    \"Plaque\": 44,\n",
+    "    \"Plate\": 45,\n",
+    "    \"Scarab Seal\": 46,\n",
+    "    \"Scarf\": 47,\n",
+    "    \"Screen\": 48,\n",
+    "    \"Seal\": 49,\n",
+    "    \"Slide\": 50,\n",
+    "    \"Stand\": 51,\n",
+    "    \"Thangka\": 52,\n",
+    "    \"Water Dropper\": 53,\n",
+    "    \"Water Pot\": 54,\n",
+    "    \"Woodblock Print\": 55,\n",
+    "    \"accessories\": 56,\n",
+    "    \"albums\": 57,\n",
+    "    \"amulets\": 58,\n",
+    "    \"animation cels\": 59,\n",
+    "    \"animation drawings\": 60,\n",
+    "    \"armor\": 61,\n",
+    "    \"arrowheads\": 62,\n",
+    "    \"axes: woodworking tools\": 63,\n",
+    "    \"badges\": 64,\n",
+    "    \"bags\": 65,\n",
+    "    \"bandages\": 66,\n",
+    "    \"baskets\": 67,\n",
+    "    \"beads\": 68,\n",
+    "    \"bells\": 69,\n",
+    "    \"belts\": 70,\n",
+    "    \"blades\": 71,\n",
+    "    \"books\": 72,\n",
+    "    \"bottles\": 73,\n",
+    "    \"bowls\": 74,\n",
+    "    \"boxes\": 75,\n",
+    "    \"bracelets\": 76,\n",
+    "    \"brick\": 77,\n",
+    "    \"brooches\": 78,\n",
+    "    \"brush washers\": 79,\n",
+    "    \"buckets\": 80,\n",
+    "    \"buckles\": 81,\n",
+    "    \"calligraphy\": 82,\n",
+    "    \"canopic jars\": 83,\n",
+    "    \"cards\": 84,\n",
+    "    \"carvings\": 85,\n",
+    "    \"chains\": 86,\n",
+    "    \"chessmen\": 87,\n",
+    "    \"chopsticks\": 88,\n",
+    "    \"claypipe\": 89,\n",
+    "    \"cloth\": 90,\n",
+    "    \"clothing\": 91,\n",
+    "    \"coats\": 92,\n",
+    "    \"coins\": 93,\n",
+    "    \"collar\": 94,\n",
+    "    \"compact discs\": 95,\n",
+    "    \"containers\": 96,\n",
+    "    \"coverings\": 97,\n",
+    "    \"covers\": 98,\n",
+    "    \"cups\": 99,\n",
+    "    \"deity figurine\": 100,\n",
+    "    \"diagrams\": 101,\n",
+    "    \"dishes\": 102,\n",
+    "    \"dolls\": 103,\n",
+    "    \"drawings\": 104,\n",
+    "    \"dresses\": 105,\n",
+    "    \"drums\": 106,\n",
+    "    \"earrings\": 107,\n",
+    "    \"embroidery\": 108,\n",
+    "    \"ensembles\": 109,\n",
+    "    \"envelopes\": 110,\n",
+    "    \"equipment for personal use: grooming, hygiene and health care\": 111,\n",
+    "    \"ewers\": 112,\n",
+    "    \"fans\": 113,\n",
+    "    \"figures\": 114,\n",
+    "    \"figurines\": 115,\n",
+    "    \"flags\": 116,\n",
+    "    \"flasks\": 117,\n",
+    "    \"furniture components\": 118,\n",
+    "    \"gaming counters\": 119,\n",
+    "    \"glassware\": 120,\n",
+    "    \"hairpins\": 121,\n",
+    "    \"handles\": 122,\n",
+    "    \"harnesses\": 123,\n",
+    "    \"hats\": 124,\n",
+    "    \"headdresses\": 125,\n",
+    "    \"heads\": 126,\n",
+    "    \"incense burners\": 127,\n",
+    "    \"inlays\": 128,\n",
+    "    \"jackets\": 129,\n",
+    "    \"jars\": 130,\n",
+    "    \"jewelry\": 131,\n",
+    "    \"juglets\": 132,\n",
+    "    \"jugs\": 133,\n",
+    "    \"keys\": 134,\n",
+    "    \"kimonos\": 135,\n",
+    "    \"knives\": 136,\n",
+    "    \"lamps\": 137,\n",
+    "    \"lanterns\": 138,\n",
+    "    \"lids\": 139,\n",
+    "    \"maces\": 140,\n",
+    "    \"masks\": 141,\n",
+    "    \"medals\": 142,\n",
+    "    \"mirrors\": 143,\n",
+    "    \"models\": 144,\n",
+    "    \"mounts\": 145,\n",
+    "    \"nails\": 146,\n",
+    "    \"necklaces\": 147,\n",
+    "    \"needles\": 148,\n",
+    "    \"netsukes\": 149,\n",
+    "    \"ornaments\": 150,\n",
+    "    \"pages\": 151,\n",
+    "    \"paintings\": 152,\n",
+    "    \"paper money\": 153,\n",
+    "    \"pendants\": 154,\n",
+    "    \"petticoats\": 155,\n",
+    "    \"photographs\": 156,\n",
+    "    \"pictures\": 157,\n",
+    "    \"pins\": 158,\n",
+    "    \"playing cards\": 159,\n",
+    "    \"poker\": 160,\n",
+    "    \"postage stamps\": 161,\n",
+    "    \"postcards\": 162,\n",
+    "    \"posters\": 163,\n",
+    "    \"pots\": 164,\n",
+    "    \"pottery\": 165,\n",
+    "    \"prints\": 166,\n",
+    "    \"puppets\": 167,\n",
+    "    \"purses\": 168,\n",
+    "    \"reliefs\": 169,\n",
+    "    \"rings\": 170,\n",
+    "    \"robes\": 171,\n",
+    "    \"rubbings\": 172,\n",
+    "    \"rugs\": 173,\n",
+    "    \"sandals\": 174,\n",
+    "    \"saris\": 175,\n",
+    "    \"sarongs\": 176,\n",
+    "    \"scabbards\": 177,\n",
+    "    \"scaraboids\": 178,\n",
+    "    \"scarabs\": 179,\n",
+    "    \"scrolls\": 180,\n",
+    "    \"seed\": 181,\n",
+    "    \"seppa\": 182,\n",
+    "    \"shadow puppets\": 183,\n",
+    "    \"shawls\": 184,\n",
+    "    \"shell\": 185,\n",
+    "    \"sherds\": 186,\n",
+    "    \"shields\": 187,\n",
+    "    \"shoes\": 188,\n",
+    "    \"sketches\": 189,\n",
+    "    \"skirts\": 190,\n",
+    "    \"snuff bottles\": 191,\n",
+    "    \"socks\": 192,\n",
+    "    \"spatulas\": 193,\n",
+    "    \"spoons\": 194,\n",
+    "    \"statues\": 195,\n",
+    "    \"statuettes\": 196,\n",
+    "    \"stelae\": 197,\n",
+    "    \"straps\": 198,\n",
+    "    \"studs\": 199,\n",
+    "    \"swords\": 200,\n",
+    "    \"tablets\": 201,\n",
+    "    \"tacks\": 202,\n",
+    "    \"tea bowls\": 203,\n",
+    "    \"teapots\": 204,\n",
+    "    \"tiles\": 205,\n",
+    "    \"tools\": 206,\n",
+    "    \"toys\": 207,\n",
+    "    \"trays\": 208,\n",
+    "    \"tubes\": 209,\n",
+    "    \"tweezers\": 210,\n",
+    "    \"underwear\": 211,\n",
+    "    \"unidentified\": 212,\n",
+    "    \"ushabti\": 213,\n",
+    "    \"utensils\": 214,\n",
+    "    \"vases\": 215,\n",
+    "    \"vessels\": 216,\n",
+    "    \"weight\": 217,\n",
+    "    \"weights\": 218,\n",
+    "    \"whorls\": 219,\n",
+    "    \"wood blocks\": 220,\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# json dump\n",
+    "with open(\"l2i.json\", \"w\") as f:\n",
+    "    json.dump({str(v): k for k, v in l2i.items()}, f)\n",
+    "# {str(v): k for k, v in l2i.items()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoConfig"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "005c080fdcf141acaa30ba191a8c8f3c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/10.9k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "config = AutoConfig.from_pretrained(\"james-burton/om6txt_name\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)\n",
+      "Failed to load module: /home/james/snap/code/common/.cache/gio-modules/libgiolibproxy.so\n",
+      "eog: symbol lookup error: /snap/core20/current/lib/x86_64-linux-gnu/libpthread.so.0: undefined symbol: __libc_pthread_init, version GLIBC_PRIVATE\n"
+     ]
+    }
+   ],
+   "source": [
+    "from PIL import Image\n",
+    "\n",
+    "image_path = \"../data/processed/OM_3Dimages_white/egyptian/1951/1951.42-tt_2.png\"\n",
+    "image = Image.open(image_path)\n",
+    "image.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ArtifactClassification",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

1.0-checking_dataset_size.ipynb ADDED Viewed

	@@ -0,0 +1,559 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import os\n",
+    "import re"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Loaded variable 'df' from URI: /home/james/CodingProjects/ArcPostDoc/HeDAP-imagesearch/Durham_University_Museums_data (1).xlsx\n",
+    "df = pd.read_excel(\n",
+    "    r\"/home/james/CodingProjects/ArcPostDoc/HeDAP-imagesearch/Durham_University_Museums_data (1).xlsx\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>object_number</th>\n",
+       "      <th>object_name</th>\n",
+       "      <th>other_name</th>\n",
+       "      <th>reproduction.reference</th>\n",
+       "      <th>description</th>\n",
+       "      <th>label.text</th>\n",
+       "      <th>material</th>\n",
+       "      <th>technique</th>\n",
+       "      <th>physical_description</th>\n",
+       "      <th>number_of_parts</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Unnamed: 25</th>\n",
+       "      <th>Unnamed: 26</th>\n",
+       "      <th>Unnamed: 27</th>\n",
+       "      <th>Unnamed: 28</th>\n",
+       "      <th>Unnamed: 29</th>\n",
+       "      <th>Unnamed: 30</th>\n",
+       "      <th>Unnamed: 31</th>\n",
+       "      <th>Unnamed: 32</th>\n",
+       "      <th>Unnamed: 33</th>\n",
+       "      <th>Unnamed: 34</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>EG1</td>\n",
+       "      <td>Kohl Pot</td>\n",
+       "      <td>jar</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Shouldered, squat, incised kohl jar, blackened...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>limestone</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>EG2</td>\n",
+       "      <td>cups</td>\n",
+       "      <td>beaker</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>slightly concave beaker with flaring rim and c...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>travertine</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>EG3</td>\n",
+       "      <td>bowls</td>\n",
+       "      <td>bowl</td>\n",
+       "      <td>../images/egyptian/eg/eg3-409-d1.jpg</td>\n",
+       "      <td>squat shouldered jar, no rim</td>\n",
+       "      <td>&lt;SPAN lang=en-GB style='FONT-SIZE: 12pt; FONT-...</td>\n",
+       "      <td>limestone</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>EG4</td>\n",
+       "      <td>bottles</td>\n",
+       "      <td>jar</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>necked, globular jar with flared rim</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>travertine</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>EG5</td>\n",
+       "      <td>bottles</td>\n",
+       "      <td>jar</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>necked, globular jar with narrow rim, plus sto...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>travertine</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60081</th>\n",
+       "      <td>DURMA.2020.3.2072</td>\n",
+       "      <td>coins</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>../images/fulling_mill/2020/DURMA.2020.3.2072-...</td>\n",
+       "      <td>A silver Roman coin which is a part of the Pie...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>metal</td>\n",
+       "      <td>hammering</td>\n",
+       "      <td>A silver denarius of Elagabalus dating to the ...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60082</th>\n",
+       "      <td>DUROM.2021.286</td>\n",
+       "      <td>postcards</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Portrait orientation postcard for the 1996 Ind...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>paper</td>\n",
+       "      <td>printing</td>\n",
+       "      <td>Digital printed onto paper/card</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60083</th>\n",
+       "      <td>DUROM.2021.287</td>\n",
+       "      <td>postcards</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Pair of landscape orientation postcard sized p...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>paper</td>\n",
+       "      <td>printing</td>\n",
+       "      <td>Digital print on card</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60084</th>\n",
+       "      <td>DUROM.2021.289</td>\n",
+       "      <td>posters</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Portrait orientation poster for the 1996 India...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>paper</td>\n",
+       "      <td>printing</td>\n",
+       "      <td>digital print on gloss paper</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60085</th>\n",
+       "      <td>DUROM.2021.288</td>\n",
+       "      <td>posters</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Portrait orientation poster for the 1996 India...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>paper</td>\n",
+       "      <td>printing</td>\n",
+       "      <td>Digital print on paper</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>60086 rows × 35 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           object_number object_name other_name  \\\n",
+       "0                    EG1    Kohl Pot        jar   \n",
+       "1                    EG2        cups     beaker   \n",
+       "2                    EG3       bowls       bowl   \n",
+       "3                    EG4     bottles        jar   \n",
+       "4                    EG5     bottles        jar   \n",
+       "...                  ...         ...        ...   \n",
+       "60081  DURMA.2020.3.2072       coins        NaN   \n",
+       "60082     DUROM.2021.286   postcards        NaN   \n",
+       "60083     DUROM.2021.287   postcards        NaN   \n",
+       "60084     DUROM.2021.289     posters        NaN   \n",
+       "60085     DUROM.2021.288     posters        NaN   \n",
+       "\n",
+       "                                  reproduction.reference  \\\n",
+       "0                                                    NaN   \n",
+       "1                                                    NaN   \n",
+       "2                   ../images/egyptian/eg/eg3-409-d1.jpg   \n",
+       "3                                                    NaN   \n",
+       "4                                                    NaN   \n",
+       "...                                                  ...   \n",
+       "60081  ../images/fulling_mill/2020/DURMA.2020.3.2072-...   \n",
+       "60082                                                NaN   \n",
+       "60083                                                NaN   \n",
+       "60084                                                NaN   \n",
+       "60085                                                NaN   \n",
+       "\n",
+       "                                             description  \\\n",
+       "0      Shouldered, squat, incised kohl jar, blackened...   \n",
+       "1      slightly concave beaker with flaring rim and c...   \n",
+       "2                           squat shouldered jar, no rim   \n",
+       "3                   necked, globular jar with flared rim   \n",
+       "4      necked, globular jar with narrow rim, plus sto...   \n",
+       "...                                                  ...   \n",
+       "60081  A silver Roman coin which is a part of the Pie...   \n",
+       "60082  Portrait orientation postcard for the 1996 Ind...   \n",
+       "60083  Pair of landscape orientation postcard sized p...   \n",
+       "60084  Portrait orientation poster for the 1996 India...   \n",
+       "60085  Portrait orientation poster for the 1996 India...   \n",
+       "\n",
+       "                                              label.text    material  \\\n",
+       "0                                                    NaN   limestone   \n",
+       "1                                                    NaN  travertine   \n",
+       "2      <SPAN lang=en-GB style='FONT-SIZE: 12pt; FONT-...   limestone   \n",
+       "3                                                    NaN  travertine   \n",
+       "4                                                    NaN  travertine   \n",
+       "...                                                  ...         ...   \n",
+       "60081                                                NaN       metal   \n",
+       "60082                                                NaN       paper   \n",
+       "60083                                                NaN       paper   \n",
+       "60084                                                NaN       paper   \n",
+       "60085                                                NaN       paper   \n",
+       "\n",
+       "       technique                               physical_description  \\\n",
+       "0            NaN                                                NaN   \n",
+       "1            NaN                                                NaN   \n",
+       "2            NaN                                                NaN   \n",
+       "3            NaN                                                NaN   \n",
+       "4            NaN                                                NaN   \n",
+       "...          ...                                                ...   \n",
+       "60081  hammering  A silver denarius of Elagabalus dating to the ...   \n",
+       "60082   printing                    Digital printed onto paper/card   \n",
+       "60083   printing                              Digital print on card   \n",
+       "60084   printing                       digital print on gloss paper   \n",
+       "60085   printing                             Digital print on paper   \n",
+       "\n",
+       "      number_of_parts  ... Unnamed: 25 Unnamed: 26 Unnamed: 27 Unnamed: 28  \\\n",
+       "0                 NaN  ...         NaN         NaN         NaN         NaN   \n",
+       "1                 NaN  ...         NaN         NaN         NaN         NaN   \n",
+       "2                 NaN  ...         NaN         NaN         NaN         NaN   \n",
+       "3                 NaN  ...         NaN         NaN         NaN         NaN   \n",
+       "4                 NaN  ...         NaN         NaN         NaN         NaN   \n",
+       "...               ...  ...         ...         ...         ...         ...   \n",
+       "60081               1  ...         NaN         NaN         NaN         NaN   \n",
+       "60082               1  ...         NaN         NaN         NaN         NaN   \n",
+       "60083               2  ...         NaN         NaN         NaN         NaN   \n",
+       "60084               1  ...         NaN         NaN         NaN         NaN   \n",
+       "60085               1  ...         NaN         NaN         NaN         NaN   \n",
+       "\n",
+       "      Unnamed: 29 Unnamed: 30 Unnamed: 31 Unnamed: 32 Unnamed: 33 Unnamed: 34  \n",
+       "0             NaN         NaN         NaN         NaN         NaN         NaN  \n",
+       "1             NaN         NaN         NaN         NaN         NaN         NaN  \n",
+       "2             NaN         NaN         NaN         NaN         NaN         NaN  \n",
+       "3             NaN         NaN         NaN         NaN         NaN         NaN  \n",
+       "4             NaN         NaN         NaN         NaN         NaN         NaN  \n",
+       "...           ...         ...         ...         ...         ...         ...  \n",
+       "60081         NaN         NaN         NaN         NaN         NaN         NaN  \n",
+       "60082         NaN         NaN         NaN         NaN         NaN         NaN  \n",
+       "60083         NaN         NaN         NaN         NaN         NaN         NaN  \n",
+       "60084         NaN         NaN         NaN         NaN         NaN         NaN  \n",
+       "60085         NaN         NaN         NaN         NaN         NaN         NaN  \n",
+       "\n",
+       "[60086 rows x 35 columns]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "6625"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df[\"description\"].isna().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 60086 records in the dataset\n",
+      "26809 records have an image location\n",
+      "53461 records have an description\n",
+      "There are 60067 unique museum numbers\n",
+      "There are 46166 unique descriptions\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"There are {len(df)} records in the dataset\")\n",
+    "print(f\"{df['reproduction.reference'].notna().sum()} records have an image location\")\n",
+    "print(f\"{df['description'].notna().sum()} records have an description\")\n",
+    "print(f\"There are {len(df['object_number'].unique())} unique museum numbers\")\n",
+    "print(f\"There are {len(df['description'].unique())} unique descriptions\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total number of images in ../data/raw/images/: 39200\n",
+      "Total number of files in ../data/raw/images/: 39244\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "image_count = 0\n",
+    "file_count = 0\n",
+    "\n",
+    "# Define the root directory\n",
+    "root_dir = \"../data/raw/images/\"\n",
+    "\n",
+    "# Iterate through all subdirectories and files\n",
+    "for root, dirs, files in os.walk(root_dir):\n",
+    "    for file in files:\n",
+    "        file_count += 1\n",
+    "        # Check if the file is an image file\n",
+    "        if file.endswith((\".jpg\", \".jpeg\", \".png\", \".gif\", \".JPG\", \".JPEG\", \".PNG\", \".GIF\")):\n",
+    "            # Increment the image count\n",
+    "            image_count += 1\n",
+    "\n",
+    "print(f\"Total number of images in {root_dir}: {image_count}\")\n",
+    "print(f\"Total number of files in {root_dir}: {file_count}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

1.1-exploring_OM_image_matching.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

2.0-assessing_OM_dataset.ipynb ADDED Viewed

	@@ -0,0 +1,1468 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "obj2info = pd.read_csv(\"../data/processed/OM_obj_to_info.csv\")\n",
+    "file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_counts = file2obj[\"obj_num\"].value_counts()\n",
+    "# file2obj"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "obj_num\n",
+       "durom.1969.406             249\n",
+       "durom.1973.47              191\n",
+       "DUROM.1954.Spalding29.W    112\n",
+       "durom.1960.2332            101\n",
+       "durom.2014.1                76\n",
+       "                          ... \n",
+       "durom.2006.46.32             1\n",
+       "durom.2006.44.16             1\n",
+       "durom.2006.45.194            1\n",
+       "durom.2006.46.13             1\n",
+       "durom.1964.183               1\n",
+       "Name: count, Length: 12642, dtype: int64"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "file_counts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Images per instance</th>\n",
+       "      <th>Number of instances</th>\n",
+       "      <th>Number of images</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3</td>\n",
+       "      <td>696</td>\n",
+       "      <td>2088</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4</td>\n",
+       "      <td>703</td>\n",
+       "      <td>2812</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>5</td>\n",
+       "      <td>360</td>\n",
+       "      <td>1800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>6</td>\n",
+       "      <td>853</td>\n",
+       "      <td>5118</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>7</td>\n",
+       "      <td>471</td>\n",
+       "      <td>3297</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>8</td>\n",
+       "      <td>223</td>\n",
+       "      <td>1784</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>9</td>\n",
+       "      <td>110</td>\n",
+       "      <td>990</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>10+</td>\n",
+       "      <td>456</td>\n",
+       "      <td>7836</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Total</td>\n",
+       "      <td>3872</td>\n",
+       "      <td>25725</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  Images per instance  Number of instances  Number of images\n",
+       "0                   3                  696              2088\n",
+       "1                   4                  703              2812\n",
+       "2                   5                  360              1800\n",
+       "3                   6                  853              5118\n",
+       "4                   7                  471              3297\n",
+       "5                   8                  223              1784\n",
+       "6                   9                  110               990\n",
+       "7                 10+                  456              7836\n",
+       "8               Total                 3872             25725"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "distribution_df = pd.DataFrame()\n",
+    "distribution_df[\"Images per instance\"] = file_counts.value_counts().sort_index().index\n",
+    "distribution_df[\"Number of instances\"] = file_counts.value_counts().sort_index().values\n",
+    "distribution_df[\"Number of images\"] = (\n",
+    "    distribution_df[\"Images per instance\"] * distribution_df[\"Number of instances\"]\n",
+    ")\n",
+    "num_instances_10plus = distribution_df[distribution_df[\"Images per instance\"] >= 10][\n",
+    "    \"Number of instances\"\n",
+    "].sum()\n",
+    "num_images_10plus = distribution_df[distribution_df[\"Images per instance\"] >= 10][\n",
+    "    \"Number of images\"\n",
+    "].sum()\n",
+    "distribution_df = distribution_df[\n",
+    "    (distribution_df[\"Images per instance\"] < 10) & (distribution_df[\"Images per instance\"] > 2)\n",
+    "]\n",
+    "\n",
+    "distribution_df = pd.concat(\n",
+    "    [\n",
+    "        distribution_df,\n",
+    "        pd.DataFrame(\n",
+    "            {\n",
+    "                \"Images per instance\": [\"10+\"],\n",
+    "                \"Number of instances\": [num_instances_10plus],\n",
+    "                \"Number of images\": [num_images_10plus],\n",
+    "            }\n",
+    "        ),\n",
+    "    ],\n",
+    "    ignore_index=True,\n",
+    ")\n",
+    "\n",
+    "# append total\n",
+    "distribution_df = pd.concat(\n",
+    "    [\n",
+    "        distribution_df,\n",
+    "        pd.DataFrame(\n",
+    "            {\n",
+    "                \"Images per instance\": [\"Total\"],\n",
+    "                \"Number of instances\": [distribution_df[\"Number of instances\"].sum()],\n",
+    "                \"Number of images\": [distribution_df[\"Number of images\"].sum()],\n",
+    "            }\n",
+    "        ),\n",
+    "    ],\n",
+    "    ignore_index=True,\n",
+    ")\n",
+    "# distribution_df = distribution_df[['Images per instance', 'Number of images', 'Number of instances']]\n",
+    "distribution_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This distribution broadly follows that from Winterbottom's paper, with a few minor differences. \n",
+    "\n",
+    "I am not expecting it to be exactly the same, as winterbottom did not use the database at all, instead just looked at the images"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Assessing for alternative text labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "full_df = pd.read_excel(\"../data/raw/Durham_University_Museums_data.xlsx\")\n",
+    "full_df = full_df.filter(regex=r\"^(?!Unnamed).*$\")\n",
+    "full_df = full_df.dropna(subset=[\"description\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Column</th>\n",
+       "      <th>Null Percentage</th>\n",
+       "      <th>unique_values</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>object_number</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>53460</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>description</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>1191</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>material</td>\n",
+       "      <td>4.43</td>\n",
+       "      <td>6442</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>object_name</td>\n",
+       "      <td>8.96</td>\n",
+       "      <td>26163</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>alternative_number</td>\n",
+       "      <td>18.30</td>\n",
+       "      <td>46165</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>production.place</td>\n",
+       "      <td>34.42</td>\n",
+       "      <td>3234</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>production.period</td>\n",
+       "      <td>40.90</td>\n",
+       "      <td>414</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>reproduction.reference</td>\n",
+       "      <td>50.23</td>\n",
+       "      <td>76</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>production.date.end</td>\n",
+       "      <td>50.90</td>\n",
+       "      <td>6923</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>production.date.start</td>\n",
+       "      <td>51.04</td>\n",
+       "      <td>127</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>other_name</td>\n",
+       "      <td>58.72</td>\n",
+       "      <td>968</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>number_of_parts</td>\n",
+       "      <td>62.08</td>\n",
+       "      <td>949</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>physical_description</td>\n",
+       "      <td>73.54</td>\n",
+       "      <td>485</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>field_coll.place</td>\n",
+       "      <td>77.88</td>\n",
+       "      <td>812</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>field_coll.method</td>\n",
+       "      <td>83.38</td>\n",
+       "      <td>546</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>content.subject</td>\n",
+       "      <td>87.25</td>\n",
+       "      <td>1449</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>technique</td>\n",
+       "      <td>87.58</td>\n",
+       "      <td>22</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>association.subject</td>\n",
+       "      <td>88.35</td>\n",
+       "      <td>516</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>field_coll.notes</td>\n",
+       "      <td>91.09</td>\n",
+       "      <td>773</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>label.text</td>\n",
+       "      <td>91.69</td>\n",
+       "      <td>78</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>association.person</td>\n",
+       "      <td>95.54</td>\n",
+       "      <td>289</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>content.person.name</td>\n",
+       "      <td>95.89</td>\n",
+       "      <td>247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>association.period</td>\n",
+       "      <td>97.70</td>\n",
+       "      <td>36718</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    Column  Null Percentage  unique_values\n",
+       "0            object_number             0.00          53460\n",
+       "4              description             0.00           1191\n",
+       "6                 material             4.43           6442\n",
+       "1              object_name             8.96          26163\n",
+       "22      alternative_number            18.30          46165\n",
+       "13        production.place            34.42           3234\n",
+       "12       production.period            40.90            414\n",
+       "3   reproduction.reference            50.23             76\n",
+       "11     production.date.end            50.90           6923\n",
+       "10   production.date.start            51.04            127\n",
+       "2               other_name            58.72            968\n",
+       "9          number_of_parts            62.08            949\n",
+       "8     physical_description            73.54            485\n",
+       "14        field_coll.place            77.88            812\n",
+       "16       field_coll.method            83.38            546\n",
+       "18         content.subject            87.25           1449\n",
+       "7                technique            87.58             22\n",
+       "21     association.subject            88.35            516\n",
+       "15        field_coll.notes            91.09            773\n",
+       "5               label.text            91.69             78\n",
+       "20      association.person            95.54            289\n",
+       "17     content.person.name            95.89            247\n",
+       "19      association.period            97.70          36718"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "null_percentage = (full_df.isnull().sum() / len(full_df)) * 100\n",
+    "desc_df = pd.DataFrame(\n",
+    "    {\"Column\": null_percentage.index, \"Null Percentage\": null_percentage.values}\n",
+    ")\n",
+    "desc_df[\"Null Percentage\"] = desc_df[\"Null Percentage\"].round(2)\n",
+    "desc_df = desc_df.sort_values(by=\"Null Percentage\")\n",
+    "desc_df[\"unique_values\"] = full_df.nunique().values\n",
+    "desc_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_distribution(df, column, lower_bound=2):\n",
+    "    distribution = pd.DataFrame()\n",
+    "    col_counts = df[column].value_counts()\n",
+    "    distribution[f\"{column}s per instance\"] = col_counts.value_counts().sort_index().index\n",
+    "    distribution[\"Number of instances\"] = col_counts.value_counts().sort_index().values\n",
+    "    distribution[f\"Number of {column}s\"] = (\n",
+    "        distribution[f\"{column}s per instance\"] * distribution[\"Number of instances\"]\n",
+    "    )\n",
+    "    num_instances_10_50 = distribution[\n",
+    "        (distribution[f\"{column}s per instance\"] >= 10)\n",
+    "        & (distribution[f\"{column}s per instance\"] < 50)\n",
+    "    ][\"Number of instances\"].sum()\n",
+    "    num_images_10_50 = distribution[\n",
+    "        (distribution[f\"{column}s per instance\"] >= 10)\n",
+    "        & (distribution[f\"{column}s per instance\"] < 50)\n",
+    "    ][f\"Number of {column}s\"].sum()\n",
+    "    num_instances_50_100 = distribution[\n",
+    "        (distribution[f\"{column}s per instance\"] >= 50)\n",
+    "        & (distribution[f\"{column}s per instance\"] < 100)\n",
+    "    ][\"Number of instances\"].sum()\n",
+    "    num_images_50_100 = distribution[\n",
+    "        (distribution[f\"{column}s per instance\"] >= 50)\n",
+    "        & (distribution[f\"{column}s per instance\"] < 100)\n",
+    "    ][f\"Number of {column}s\"].sum()\n",
+    "    num_instances_100_1000 = distribution[\n",
+    "        (distribution[f\"{column}s per instance\"] >= 100)\n",
+    "        & (distribution[f\"{column}s per instance\"] < 1000)\n",
+    "    ][\"Number of instances\"].sum()\n",
+    "    num_images_100_1000 = distribution[\n",
+    "        (distribution[f\"{column}s per instance\"] >= 100)\n",
+    "        & (distribution[f\"{column}s per instance\"] < 1000)\n",
+    "    ][f\"Number of {column}s\"].sum()\n",
+    "    num_instances_1000plus = distribution[distribution[f\"{column}s per instance\"] >= 1000][\n",
+    "        \"Number of instances\"\n",
+    "    ].sum()\n",
+    "    num_images_1000plus = distribution[distribution[f\"{column}s per instance\"] >= 1000][\n",
+    "        f\"Number of {column}s\"\n",
+    "    ].sum()\n",
+    "\n",
+    "    distribution = distribution[\n",
+    "        (distribution[f\"{column}s per instance\"] < 10)\n",
+    "        & (distribution[f\"{column}s per instance\"] > lower_bound)\n",
+    "    ]\n",
+    "\n",
+    "    distribution = pd.concat(\n",
+    "        [\n",
+    "            distribution,\n",
+    "            pd.DataFrame(\n",
+    "                {\n",
+    "                    f\"{column}s per instance\": [\"10-50\"],\n",
+    "                    \"Number of instances\": [num_instances_10_50],\n",
+    "                    f\"Number of {column}s\": [num_images_10_50],\n",
+    "                }\n",
+    "            ),\n",
+    "            pd.DataFrame(\n",
+    "                {\n",
+    "                    f\"{column}s per instance\": [\"50-100\"],\n",
+    "                    \"Number of instances\": [num_instances_50_100],\n",
+    "                    f\"Number of {column}s\": [num_images_50_100],\n",
+    "                }\n",
+    "            ),\n",
+    "            pd.DataFrame(\n",
+    "                {\n",
+    "                    f\"{column}s per instance\": [\"100-1000\"],\n",
+    "                    \"Number of instances\": [num_instances_100_1000],\n",
+    "                    f\"Number of {column}s\": [num_images_100_1000],\n",
+    "                }\n",
+    "            ),\n",
+    "            pd.DataFrame(\n",
+    "                {\n",
+    "                    f\"{column}s per instance\": [\"1000+\"],\n",
+    "                    \"Number of instances\": [num_instances_1000plus],\n",
+    "                    f\"Number of {column}s\": [num_images_1000plus],\n",
+    "                }\n",
+    "            ),\n",
+    "        ],\n",
+    "        ignore_index=True,\n",
+    "    )\n",
+    "\n",
+    "    distribution = pd.concat(\n",
+    "        [\n",
+    "            distribution,\n",
+    "            pd.DataFrame(\n",
+    "                {\n",
+    "                    f\"{column}s per instance\": [\"Total\"],\n",
+    "                    \"Number of instances\": [distribution[\"Number of instances\"].sum()],\n",
+    "                    f\"Number of {column}s\": [distribution[f\"Number of {column}s\"].sum()],\n",
+    "                }\n",
+    "            ),\n",
+    "        ],\n",
+    "        ignore_index=True,\n",
+    "    )\n",
+    "    # rename columns\n",
+    "    return distribution"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 107,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>object_names per instance</th>\n",
+       "      <th>Number of instances</th>\n",
+       "      <th>Number of object_names</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3</td>\n",
+       "      <td>93</td>\n",
+       "      <td>279</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4</td>\n",
+       "      <td>57</td>\n",
+       "      <td>228</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>5</td>\n",
+       "      <td>53</td>\n",
+       "      <td>265</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>6</td>\n",
+       "      <td>32</td>\n",
+       "      <td>192</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>7</td>\n",
+       "      <td>27</td>\n",
+       "      <td>189</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>8</td>\n",
+       "      <td>24</td>\n",
+       "      <td>192</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>9</td>\n",
+       "      <td>27</td>\n",
+       "      <td>243</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>10-50</td>\n",
+       "      <td>227</td>\n",
+       "      <td>4921</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>50-100</td>\n",
+       "      <td>51</td>\n",
+       "      <td>3683</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>100-1000</td>\n",
+       "      <td>65</td>\n",
+       "      <td>17027</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>1000+</td>\n",
+       "      <td>7</td>\n",
+       "      <td>20758</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>Total</td>\n",
+       "      <td>663</td>\n",
+       "      <td>47977</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   object_names per instance  Number of instances  Number of object_names\n",
+       "0                          3                   93                     279\n",
+       "1                          4                   57                     228\n",
+       "2                          5                   53                     265\n",
+       "3                          6                   32                     192\n",
+       "4                          7                   27                     189\n",
+       "5                          8                   24                     192\n",
+       "6                          9                   27                     243\n",
+       "7                      10-50                  227                    4921\n",
+       "8                     50-100                   51                    3683\n",
+       "9                   100-1000                   65                   17027\n",
+       "10                     1000+                    7                   20758\n",
+       "11                     Total                  663                   47977"
+      ]
+     },
+     "execution_count": 107,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_distribution(full_df, \"object_name\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>materials per instance</th>\n",
+       "      <th>Number of instances</th>\n",
+       "      <th>Number of materials</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3</td>\n",
+       "      <td>30</td>\n",
+       "      <td>90</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4</td>\n",
+       "      <td>16</td>\n",
+       "      <td>64</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>5</td>\n",
+       "      <td>14</td>\n",
+       "      <td>70</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>6</td>\n",
+       "      <td>9</td>\n",
+       "      <td>54</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>7</td>\n",
+       "      <td>10</td>\n",
+       "      <td>70</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>8</td>\n",
+       "      <td>6</td>\n",
+       "      <td>48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>9</td>\n",
+       "      <td>5</td>\n",
+       "      <td>45</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>10-50</td>\n",
+       "      <td>88</td>\n",
+       "      <td>1975</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>50-100</td>\n",
+       "      <td>21</td>\n",
+       "      <td>1409</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>100-1000</td>\n",
+       "      <td>43</td>\n",
+       "      <td>13030</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>1000+</td>\n",
+       "      <td>12</td>\n",
+       "      <td>34036</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>Total</td>\n",
+       "      <td>254</td>\n",
+       "      <td>50891</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   materials per instance  Number of instances  Number of materials\n",
+       "0                       3                   30                   90\n",
+       "1                       4                   16                   64\n",
+       "2                       5                   14                   70\n",
+       "3                       6                    9                   54\n",
+       "4                       7                   10                   70\n",
+       "5                       8                    6                   48\n",
+       "6                       9                    5                   45\n",
+       "7                   10-50                   88                 1975\n",
+       "8                  50-100                   21                 1409\n",
+       "9                100-1000                   43                13030\n",
+       "10                  1000+                   12                34036\n",
+       "11                  Total                  254                50891"
+      ]
+     },
+     "execution_count": 100,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_distribution(full_df, \"material\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Production date could be used for a regression task, and the other fields could be used for a classification task."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Year"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 101,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>production.date.starts per instance</th>\n",
+       "      <th>Number of instances</th>\n",
+       "      <th>Number of production.date.starts</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>275</td>\n",
+       "      <td>275</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>129</td>\n",
+       "      <td>258</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>75</td>\n",
+       "      <td>225</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>72</td>\n",
+       "      <td>288</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>45</td>\n",
+       "      <td>225</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>6</td>\n",
+       "      <td>32</td>\n",
+       "      <td>192</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>7</td>\n",
+       "      <td>20</td>\n",
+       "      <td>140</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>8</td>\n",
+       "      <td>16</td>\n",
+       "      <td>128</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>9</td>\n",
+       "      <td>21</td>\n",
+       "      <td>189</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>10-50</td>\n",
+       "      <td>199</td>\n",
+       "      <td>4226</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>50-100</td>\n",
+       "      <td>39</td>\n",
+       "      <td>2661</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>100-1000</td>\n",
+       "      <td>41</td>\n",
+       "      <td>10259</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>1000+</td>\n",
+       "      <td>4</td>\n",
+       "      <td>7110</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>Total</td>\n",
+       "      <td>968</td>\n",
+       "      <td>26176</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   production.date.starts per instance  Number of instances  \\\n",
+       "0                                    1                  275   \n",
+       "1                                    2                  129   \n",
+       "2                                    3                   75   \n",
+       "3                                    4                   72   \n",
+       "4                                    5                   45   \n",
+       "5                                    6                   32   \n",
+       "6                                    7                   20   \n",
+       "7                                    8                   16   \n",
+       "8                                    9                   21   \n",
+       "9                                10-50                  199   \n",
+       "10                              50-100                   39   \n",
+       "11                            100-1000                   41   \n",
+       "12                               1000+                    4   \n",
+       "13                               Total                  968   \n",
+       "\n",
+       "    Number of production.date.starts  \n",
+       "0                                275  \n",
+       "1                                258  \n",
+       "2                                225  \n",
+       "3                                288  \n",
+       "4                                225  \n",
+       "5                                192  \n",
+       "6                                140  \n",
+       "7                                128  \n",
+       "8                                189  \n",
+       "9                               4226  \n",
+       "10                              2661  \n",
+       "11                             10259  \n",
+       "12                              7110  \n",
+       "13                             26176  "
+      ]
+     },
+     "execution_count": 101,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_distribution(full_df, \"production.date.start\", lower_bound=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>production.date.ends per instance</th>\n",
+       "      <th>Number of instances</th>\n",
+       "      <th>Number of production.date.ends</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>285</td>\n",
+       "      <td>285</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>120</td>\n",
+       "      <td>240</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>63</td>\n",
+       "      <td>189</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>46</td>\n",
+       "      <td>184</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>32</td>\n",
+       "      <td>160</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>6</td>\n",
+       "      <td>37</td>\n",
+       "      <td>222</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>7</td>\n",
+       "      <td>26</td>\n",
+       "      <td>182</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>8</td>\n",
+       "      <td>20</td>\n",
+       "      <td>160</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>9</td>\n",
+       "      <td>19</td>\n",
+       "      <td>171</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>10-50</td>\n",
+       "      <td>210</td>\n",
+       "      <td>4562</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>50-100</td>\n",
+       "      <td>41</td>\n",
+       "      <td>2588</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>100-1000</td>\n",
+       "      <td>47</td>\n",
+       "      <td>11609</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>1000+</td>\n",
+       "      <td>3</td>\n",
+       "      <td>5696</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>Total</td>\n",
+       "      <td>949</td>\n",
+       "      <td>26248</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   production.date.ends per instance  Number of instances  \\\n",
+       "0                                  1                  285   \n",
+       "1                                  2                  120   \n",
+       "2                                  3                   63   \n",
+       "3                                  4                   46   \n",
+       "4                                  5                   32   \n",
+       "5                                  6                   37   \n",
+       "6                                  7                   26   \n",
+       "7                                  8                   20   \n",
+       "8                                  9                   19   \n",
+       "9                              10-50                  210   \n",
+       "10                            50-100                   41   \n",
+       "11                          100-1000                   47   \n",
+       "12                             1000+                    3   \n",
+       "13                             Total                  949   \n",
+       "\n",
+       "    Number of production.date.ends  \n",
+       "0                              285  \n",
+       "1                              240  \n",
+       "2                              189  \n",
+       "3                              184  \n",
+       "4                              160  \n",
+       "5                              222  \n",
+       "6                              182  \n",
+       "7                              160  \n",
+       "8                              171  \n",
+       "9                             4562  \n",
+       "10                            2588  \n",
+       "11                           11609  \n",
+       "12                            5696  \n",
+       "13                           26248  "
+      ]
+     },
+     "execution_count": 102,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_distribution(full_df, \"production.date.end\", lower_bound=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>start_year</th>\n",
+       "      <th>end_year</th>\n",
+       "      <th>year_diff</th>\n",
+       "      <th>mid_year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-3000</td>\n",
+       "      <td>-3000</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-3000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>142</th>\n",
+       "      <td>-600</td>\n",
+       "      <td>-332</td>\n",
+       "      <td>268</td>\n",
+       "      <td>-466</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>143</th>\n",
+       "      <td>-1069</td>\n",
+       "      <td>-716</td>\n",
+       "      <td>353</td>\n",
+       "      <td>-893</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>147</th>\n",
+       "      <td>-716</td>\n",
+       "      <td>-332</td>\n",
+       "      <td>384</td>\n",
+       "      <td>-524</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>148</th>\n",
+       "      <td>-716</td>\n",
+       "      <td>-332</td>\n",
+       "      <td>384</td>\n",
+       "      <td>-524</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60081</th>\n",
+       "      <td>218</td>\n",
+       "      <td>222</td>\n",
+       "      <td>4</td>\n",
+       "      <td>220</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60082</th>\n",
+       "      <td>1996</td>\n",
+       "      <td>1996</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1996</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60083</th>\n",
+       "      <td>2016</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2016</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60084</th>\n",
+       "      <td>1996</td>\n",
+       "      <td>1996</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1996</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60085</th>\n",
+       "      <td>1996</td>\n",
+       "      <td>1996</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1996</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>26016 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       start_year  end_year  year_diff  mid_year\n",
+       "2           -3000     -3000          0     -3000\n",
+       "142          -600      -332        268      -466\n",
+       "143         -1069      -716        353      -893\n",
+       "147          -716      -332        384      -524\n",
+       "148          -716      -332        384      -524\n",
+       "...           ...       ...        ...       ...\n",
+       "60081         218       222          4       220\n",
+       "60082        1996      1996          0      1996\n",
+       "60083        2016      2016          0      2016\n",
+       "60084        1996      1996          0      1996\n",
+       "60085        1996      1996          0      1996\n",
+       "\n",
+       "[26016 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 91,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "year_df = pd.DataFrame()\n",
+    "year_df[\"start_year\"] = full_df[\"production.date.start\"]\n",
+    "year_df[\"end_year\"] = full_df[\"production.date.end\"]\n",
+    "year_df = year_df.dropna()\n",
+    "\n",
+    "non_numeric_instances = year_df[\n",
+    "    pd.to_numeric(year_df[\"start_year\"], errors=\"coerce\").isna()\n",
+    "    | pd.to_numeric(year_df[\"end_year\"], errors=\"coerce\").isna()\n",
+    "]\n",
+    "# get non-numeric instances\n",
+    "year_df = year_df[~year_df.index.isin(non_numeric_instances.index)]\n",
+    "year_df[\"start_year\"] = year_df[\"start_year\"].astype(int)\n",
+    "year_df[\"end_year\"] = year_df[\"end_year\"].astype(int)\n",
+    "year_df[\"year_diff\"] = year_df[\"end_year\"] - year_df[\"start_year\"]\n",
+    "\n",
+    "year_df[\"mid_year\"] = year_df[\"start_year\"] + year_df[\"year_diff\"] / 2\n",
+    "year_df[\"mid_year\"] = year_df[\"mid_year\"].apply(lambda x: int(np.floor(x)))\n",
+    "year_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHHCAYAAACiOWx7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABEHUlEQVR4nO3df3zP9f7/8ft7s723sZ/YZjWzIhQdoTQRsTasRE4dUaFFTtvJIp2UJCpZ0RDJOZkU+XUcdRCWn6Xlxwr5kVJ+xraKmZ/bbK/vH332+nrb8DJv29vcrpeLS72er8f79Xq83k/b7l7v5/s9m2EYhgAAAHBBbhXdAAAAwNWA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEVGLDhw+XzWYrl3O1bdtWbdu2NbdXrVolm82mefPmlcv5e/furTp16pTLucrq+PHjevLJJxUaGiqbzaakpCSnn+NS5txms2n48OFO7wGorAhNwFVi2rRpstls5h8vLy+FhYUpNjZW48eP17Fjx5xynoMHD2r48OHatGmTU47nTK7cmxVvvPGGpk2bpr///e/66KOP9Nhjj523tk6dOrLZbIqOji51/7/+9S/z78LGjRuvSL9vvvmmbDabli5dWur+Tp06yd/fXwcPHrwi5wdcjgHgqpCammpIMkaMGGF89NFHxtSpU4033njDiImJMWw2mxEREWFs3rzZ4TEFBQXGqVOnLuk8GzZsMCQZqampl/S4vLw8Iy8vz9xeuXKlIcmYO3fuJR2nrL3l5+cbp0+fdtq5roQWLVoYd911l6XaiIgIw8vLy3BzczMOHTpUYn+bNm0MLy8vQ5KxYcMGc/xS5lyS8corr5x3f35+vtG4cWPjhhtuME6ePOmwb86cOYYkY+LEiZbOBVQG3GkCrjIdO3bUo48+qj59+mjIkCFaunSpvvjiC2VnZ6tz5846deqUWVulShV5eXld0X5OnjwpSfL09JSnp+cVPdeFeHh4yG63V9j5rcjOzlZAQIDl+rvuukvVqlXT7NmzHcYPHDigL7/8UnFxcSUe48w59/Dw0JQpU7Rnzx6NHDnSHD927JiSkpJ05513qn///k4514UUFRXp9OnTV/w8wMUQmoBKoF27dnr55Ze1d+9effzxx+Z4aetb0tLS1KpVKwUEBKhatWqqX7++XnzxRUl/rkO6/fbbJUl9+vQxX/6ZNm2apD/XLTVq1EgZGRm6++675ePjYz723DVNxQoLC/Xiiy8qNDRUVatWVefOnbV//36Hmjp16qh3794lHnv2MS/WW2lrmk6cOKFBgwYpPDxcdrtd9evX19tvvy3DMBzqbDabEhMTtWDBAjVq1Eh2u1233HKLlixZUvoTfo7s7GzFx8crJCREXl5e+stf/qIPP/zQ3F+8vmv37t1atGiR2fuePXsueFwvLy89+OCDmjlzpsP4J598osDAQMXGxpZ4TGlznpeXp2effVY1a9aUr6+vOnfurAMHDli6tuJg9Pbbb2v79u2SpKFDhyo7O1tTpkyRm5ubcnJylJSUZD7PdevW1ejRo1VUVORwrLffflstW7ZU9erV5e3trWbNmpW65q14PmbMmKFbbrlFdrvd8lwAV1KVim4AgHM89thjevHFF7Vs2TL17du31Jpt27bpvvvu06233qoRI0bIbrdr165dWrt2rSSpYcOGGjFihIYNG6Z+/fqpdevWkqSWLVuax/jjjz/UsWNHde/eXY8++qhCQkIu2Nfrr78um82mf/7zn8rOzlZKSoqio6O1adMmeXt7W74+K72dzTAMde7cWStXrlR8fLyaNGmipUuXavDgwfr111/1zjvvONR/9dVXmj9/vp5++mn5+vpq/Pjx6tatm/bt26fq1auft69Tp06pbdu22rVrlxITExUZGam5c+eqd+/eysnJ0YABA9SwYUN99NFHevbZZ3X99ddr0KBBkqSaNWte9Lp79OihmJgY/fzzz7rxxhslSTNnztRf//pXeXh4WHrunnzySX388cfq0aOHWrZsqRUrVpR6l+p8Ro0apQULFuipp55SSkqKJk6cqMGDB6tx48Y6efKk2rRpo19//VVPPfWUateura+//lpDhgzRoUOHlJKSYh5n3Lhx6ty5s3r27Kn8/HzNmjVLDz30kBYuXFiinxUrVmjOnDlKTExUjRo1XH6RP64RFf36IABritc0nb1+5Vz+/v7GbbfdZm6/8sorxtlf5u+8844hyfjtt9/Oe4wLrRtq06aNIcmYPHlyqfvatGljbhevabruuuuM3Nxcc7x4Lcy4cePMsYiICKNXr14XPeaFeuvVq5cRERFhbi9YsMCQZLz22msOdX/9618Nm81m7Nq1yxyTZHh6ejqMbd682ZBkTJgwocS5zpaSkmJIMj7++GNzLD8/34iKijKqVavmcO0RERFGXFzcBY93bu2ZM2eM0NBQY+TIkYZhGMb27dsNScbq1atL/Ttx7pxv2rTJkGQ8/fTTDsfv0aPHRdc0nW3evHmGJCMoKMhhjdPIkSONqlWrGj/++KND/QsvvGC4u7sb+/btM8fOXReVn59vNGrUyGjXrp3DuCTDzc3N2LZtm6XegPLCy3NAJVKtWrULvouueD3Np59+WuKlE6vsdrv69Oljuf7xxx+Xr6+vuf3Xv/5VtWrV0uLFi8t0fqsWL14sd3d3PfPMMw7jgwYNkmEY+vzzzx3Go6OjzTs5knTrrbfKz89Pv/zyy0XPExoaqkceecQc8/Dw0DPPPKPjx49r9erVl3Ud7u7uevjhh/XJJ59IkmbMmKHw8HDzTtvFFD/P5z4Pl/pxB926dVOnTp10+PBhTZw40bxLOHfuXLVu3VqBgYH6/fffzT/R0dEqLCzUmjVrzGOcfWfxyJEjOnr0qFq3bq1vv/22xPnatGmjm2+++ZJ6BK40QhNQiRw/ftwhoJzrb3/7m+666y49+eSTCgkJUffu3TVnzpxLClDXXXfdJS34rlevnsO2zWZT3bp1L7qe53Lt3btXYWFhJZ6Phg0bmvvPVrt27RLHCAwM1JEjRy56nnr16snNzfHb6fnOUxY9evTQ9u3btXnzZs2cOVPdu3e3/FlMe/fulZubm0MglKT69etfch/Fa8qaN29ujv30009asmSJatas6fCn+KMSsrOzzdqFCxfqzjvvlJeXl4KCglSzZk299957Onr0aIlzRUZGXnJ/wJXGmiagkjhw4ICOHj2qunXrnrfG29tba9as0cqVK7Vo0SItWbJEs2fPVrt27bRs2TK5u7tf9DyXsg7JqvMFgMLCQks9OcP5zmOcs2i8IrRo0UI33nijkpKStHv3bvXo0aOiWzIVFRXp3nvv1fPPP1/q/ptuukmS9OWXX6pz5866++67NWnSJNWqVUseHh5KTU0tsdBdujJ/z4DLRWgCKomPPvpIkkp9R9XZ3Nzc1L59e7Vv315jx47VG2+8oZdeekkrV65UdHS00z9B/KeffnLYNgxDu3bt0q233mqOBQYGKicnp8Rj9+7dqxtuuMHcvpTeIiIi9MUXX+jYsWMOd5t++OEHc78zREREaMuWLSoqKnK42+Ts8zzyyCN67bXX1LBhQzVp0uSS+isqKtLPP//scHdp586dTunrxhtv1PHjx8/7IZzF/vOf/8jLy0tLly51+GiI1NRUp/QBlAdengMqgRUrVmjkyJGKjIxUz549z1t3+PDhEmPFP4Dz8vIkSVWrVpWkUkNMWUyfPt1hndW8efN06NAhdezY0Ry78cYb9c033yg/P98cW7hwYYmPJriU3jp16qTCwkK9++67DuPvvPOObDabw/kvR6dOnZSZmenwWUpnzpzRhAkTVK1aNbVp08Yp53nyySf1yiuvaMyYMZf0uOLrHD9+vMP42e9quxwPP/yw0tPTS/3U8JycHJ05c0bSn3fybDabCgsLzf179uzRggULnNIHUB640wRcZT7//HP98MMPOnPmjLKysrRixQqlpaUpIiJCn3322QU/2HDEiBFas2aN4uLiFBERoezsbE2aNEnXX3+9WrVqJenPABMQEKDJkyfL19dXVatWVYsWLcq8xiQoKEitWrVSnz59lJWVpZSUFNWtW9fhYxGefPJJzZs3Tx06dNDDDz+sn3/+WR9//HGJdTiX0tv999+ve+65Ry+99JL27Nmjv/zlL1q2bJk+/fRTJSUllTh2WfXr10/vv/++evfurYyMDNWpU0fz5s3T2rVrlZKScsE1ZpciIiKiTL8nrkmTJnrkkUc0adIkHT16VC1bttTy5cu1a9cup/Q1ePBgffbZZ7rvvvvUu3dvNWvWTCdOnND333+vefPmac+ePapRo4bi4uI0duxYdejQQT169FB2drYmTpyounXrasuWLU7pBbjSCE3AVWbYsGGS/vwE7qCgIDVu3FgpKSnq06fPRX9Ad+7cWXv27NHUqVP1+++/q0aNGmrTpo1effVV+fv7S/rznV8ffvihhgwZov79++vMmTNKTU0tc2h68cUXtWXLFo0aNUrHjh1T+/btNWnSJPn4+Jg1sbGxGjNmjMaOHaukpCQ1b95cCxcuND/PqNil9Obm5qbPPvtMw4YN0+zZs5Wamqo6derorbfeKnHcy+Ht7a1Vq1bphRde0Icffqjc3FzVr19fqamppX5gZ0WYOnWqatasqRkzZmjBggVq166dFi1apPDw8Ms+to+Pj1avXq033nhDc+fO1fTp0+Xn56ebbrrJ4e9Vu3bt9MEHH+jNN99UUlKSIiMjNXr0aO3Zs4fQhKuGzXCFVY4AAAAujjVNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAI+p8lJioqKdPDgQfn6+jr911AAAIArwzAMHTt2TGFhYSV+8fa5CE1OcvDgQad8UBwAACh/+/fv1/XXX3/BGkKTkxR/EvP+/fvl5+dXwd04T0FBgZYtW6aYmBh5eHhUdDs4C3Pj2pgf18b8uK7ynpvc3FyFh4db+pVHhCYnKX5Jzs/Pr9KFJh8fH/n5+fGNxcUwN66N+XFtzI/rqqi5sbK0hoXgAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFVSq6AQAAULnVeWHRRWv2vBlXDp1cHu40AQAAWFChoWnNmjW6//77FRYWJpvNpgULFjjsNwxDw4YNU61ateTt7a3o6Gj99NNPDjWHDx9Wz5495efnp4CAAMXHx+v48eMONVu2bFHr1q3l5eWl8PBwJScnl+hl7ty5atCggby8vNS4cWMtXrzY6dcLAACuXhUamk6cOKG//OUvmjhxYqn7k5OTNX78eE2ePFnr1q1T1apVFRsbq9OnT5s1PXv21LZt25SWlqaFCxdqzZo16tevn7k/NzdXMTExioiIUEZGht566y0NHz5cU6ZMMWu+/vprPfLII4qPj9d3332nLl26qEuXLtq6deuVu3gAAHBVqdA1TR07dlTHjh1L3WcYhlJSUjR06FA98MADkqTp06crJCRECxYsUPfu3bVjxw4tWbJEGzZsUPPmzSVJEyZMUKdOnfT2228rLCxMM2bMUH5+vqZOnSpPT0/dcsst2rRpk8aOHWuGq3HjxqlDhw4aPHiwJGnkyJFKS0vTu+++q8mTJ5fDMwEAAFydyy4E3717tzIzMxUdHW2O+fv7q0WLFkpPT1f37t2Vnp6ugIAAMzBJUnR0tNzc3LRu3Tp17dpV6enpuvvuu+Xp6WnWxMbGavTo0Tpy5IgCAwOVnp6ugQMHOpw/Nja2xMuFZ8vLy1NeXp65nZubK0kqKChQQUHB5V6+yyi+lsp0TZUFc+PamB/XxvyUL7u7cdGac+ekvObmUs7jsqEpMzNTkhQSEuIwHhISYu7LzMxUcHCww/4qVaooKCjIoSYyMrLEMYr3BQYGKjMz84LnKc2oUaP06quvlhhftmyZfHx8rFziVSUtLa2iW8B5MDeujflxbcxP+Ui+4+I1564lLq+5OXnypOValw1Nrm7IkCEOd6dyc3MVHh6umJgY+fn5VWBnzlVQUKC0tDTde++98vDwqOh2cBbmxrUxP66N+SlfjYYvvWjN1uGxksp/bopfKbLCZUNTaGioJCkrK0u1atUyx7OystSkSROzJjs72+FxZ86c0eHDh83Hh4aGKisry6GmePtiNcX7S2O322W320uMe3h4VMovwMp6XZUBc+PamB/XxvyUj7xC20Vrzp2H8pqbSzmHy35OU2RkpEJDQ7V8+XJzLDc3V+vWrVNUVJQkKSoqSjk5OcrIyDBrVqxYoaKiIrVo0cKsWbNmjcNrlmlpaapfv74CAwPNmrPPU1xTfB4AAIAKDU3Hjx/Xpk2btGnTJkl/Lv7etGmT9u3bJ5vNpqSkJL322mv67LPP9P333+vxxx9XWFiYunTpIklq2LChOnTooL59+2r9+vVau3atEhMT1b17d4WFhUmSevToIU9PT8XHx2vbtm2aPXu2xo0b5/DS2oABA7RkyRKNGTNGP/zwg4YPH66NGzcqMTGxvJ8SAADgoir05bmNGzfqnnvuMbeLg0yvXr00bdo0Pf/88zpx4oT69eunnJwctWrVSkuWLJGXl5f5mBkzZigxMVHt27eXm5ubunXrpvHjx5v7/f39tWzZMiUkJKhZs2aqUaOGhg0b5vBZTi1bttTMmTM1dOhQvfjii6pXr54WLFigRo0alcOzAAAArgYVGpratm0rwzj/2xBtNptGjBihESNGnLcmKChIM2fOvOB5br31Vn355ZcXrHnooYf00EMPXbhhAABwzXLZNU0AAACuhNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYIFLh6bCwkK9/PLLioyMlLe3t2688UaNHDlShmGYNYZhaNiwYapVq5a8vb0VHR2tn376yeE4hw8fVs+ePeXn56eAgADFx8fr+PHjDjVbtmxR69at5eXlpfDwcCUnJ5fLNQIAgKuDS4em0aNH67333tO7776rHTt2aPTo0UpOTtaECRPMmuTkZI0fP16TJ0/WunXrVLVqVcXGxur06dNmTc+ePbVt2zalpaVp4cKFWrNmjfr162fuz83NVUxMjCIiIpSRkaG33npLw4cP15QpU8r1egEAgOuqUtENXMjXX3+tBx54QHFxcZKkOnXq6JNPPtH69esl/XmXKSUlRUOHDtUDDzwgSZo+fbpCQkK0YMECde/eXTt27NCSJUu0YcMGNW/eXJI0YcIEderUSW+//bbCwsI0Y8YM5efna+rUqfL09NQtt9yiTZs2aezYsQ7hCgAAXLtcOjS1bNlSU6ZM0Y8//qibbrpJmzdv1ldffaWxY8dKknbv3q3MzExFR0ebj/H391eLFi2Unp6u7t27Kz09XQEBAWZgkqTo6Gi5ublp3bp16tq1q9LT03X33XfL09PTrImNjdXo0aN15MgRBQYGlugtLy9PeXl55nZubq4kqaCgQAUFBU5/LipK8bVUpmuqLJgb18b8uDbmp3zZ3Y2L1pw7J+U1N5dyHpcOTS+88IJyc3PVoEEDubu7q7CwUK+//rp69uwpScrMzJQkhYSEODwuJCTE3JeZmang4GCH/VWqVFFQUJBDTWRkZIljFO8rLTSNGjVKr776aonxZcuWycfHpyyX69LS0tIqugWcB3Pj2pgf18b8lI/kOy5es3jxYoft8pqbkydPWq516dA0Z84czZgxQzNnzjRfMktKSlJYWJh69epVob0NGTJEAwcONLdzc3MVHh6umJgY+fn5VWBnzlVQUKC0tDTde++98vDwqOh2cBbmxrUxP66N+SlfjYYvvWjN1uGxksp/bopfKbLCpUPT4MGD9cILL6h79+6SpMaNG2vv3r0aNWqUevXqpdDQUElSVlaWatWqZT4uKytLTZo0kSSFhoYqOzvb4bhnzpzR4cOHzceHhoYqKyvLoaZ4u7jmXHa7XXa7vcS4h4dHpfwCrKzXVRkwN66N+XFtzE/5yCu0XbTm3Hkor7m5lHO49LvnTp48KTc3xxbd3d1VVFQkSYqMjFRoaKiWL19u7s/NzdW6desUFRUlSYqKilJOTo4yMjLMmhUrVqioqEgtWrQwa9asWePwumZaWprq169f6ktzAADg2uPSoen+++/X66+/rkWLFmnPnj3673//q7Fjx6pr166SJJvNpqSkJL322mv67LPP9P333+vxxx9XWFiYunTpIklq2LChOnTooL59+2r9+vVau3atEhMT1b17d4WFhUmSevToIU9PT8XHx2vbtm2aPXu2xo0b5/DyGwAAuLa59MtzEyZM0Msvv6ynn35a2dnZCgsL01NPPaVhw4aZNc8//7xOnDihfv36KScnR61atdKSJUvk5eVl1syYMUOJiYlq37693Nzc1K1bN40fP97c7+/vr2XLlikhIUHNmjVTjRo1NGzYMD5uAAAAmFw6NPn6+iolJUUpKSnnrbHZbBoxYoRGjBhx3pqgoCDNnDnzgue69dZb9eWXX5a1VQAAUMm59MtzAAAAroLQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAVlCk2//PKLs/sAAABwaWUKTXXr1tU999yjjz/+WKdPn3Z2TwAAAC6nTKHp22+/1a233qqBAwcqNDRUTz31lNavX+/s3gAAAFxGmUJTkyZNNG7cOB08eFBTp07VoUOH1KpVKzVq1Ehjx47Vb7/95uw+AQAAKtRlLQSvUqWKHnzwQc2dO1ejR4/Wrl279Nxzzyk8PFyPP/64Dh065Kw+AQAAKtRlhaaNGzfq6aefVq1atTR27Fg999xz+vnnn5WWlqaDBw/qgQcecFafAAAAFapMoWns2LFq3LixWrZsqYMHD2r69Onau3evXnvtNUVGRqp169aaNm2avv3228tu8Ndff9Wjjz6q6tWry9vbW40bN9bGjRvN/YZhaNiwYapVq5a8vb0VHR2tn376yeEYhw8fVs+ePeXn56eAgADFx8fr+PHjDjVbtmxR69at5eXlpfDwcCUnJ1927wAAoPIoU2h677331KNHD+3du1cLFizQfffdJzc3x0MFBwfrgw8+uKzmjhw5orvuukseHh76/PPPtX37do0ZM0aBgYFmTXJyssaPH6/Jkydr3bp1qlq1qmJjYx3e1dezZ09t27ZNaWlpWrhwodasWaN+/fqZ+3NzcxUTE6OIiAhlZGTorbfe0vDhwzVlypTL6h8AAFQeVcryoHPv5JTG09NTvXr1KsvhTaNHj1Z4eLhSU1PNscjISPP/DcNQSkqKhg4dar4UOH36dIWEhGjBggXq3r27duzYoSVLlmjDhg1q3ry5JGnChAnq1KmT3n77bYWFhWnGjBnKz8/X1KlT5enpqVtuuUWbNm3S2LFjHcIVAAC4dpUpNKWmpqpatWp66KGHHMbnzp2rkydPXnZYKvbZZ58pNjZWDz30kFavXq3rrrtOTz/9tPr27StJ2r17tzIzMxUdHW0+xt/fXy1atFB6erq6d++u9PR0BQQEmIFJkqKjo+Xm5qZ169apa9euSk9P19133y1PT0+zJjY2VqNHj9aRI0cc7mwVy8vLU15enrmdm5srSSooKFBBQYFTrt8VFF9LZbqmyoK5cW3Mj2tjfsqX3d24aM25c1Jec3Mp5ylTaBo1apTef//9EuPBwcHq16+f00LTL7/8ovfee08DBw7Uiy++qA0bNuiZZ54x72JlZmZKkkJCQhweFxISYu7LzMxUcHCww/4qVaooKCjIoebsO1hnHzMzM7PU0DRq1Ci9+uqrJcaXLVsmHx+fMl6x60pLS6voFnAezI1rY35cG/NTPpLvuHjN4sWLHbbLa25OnjxpubZMoWnfvn0lQoYkRUREaN++fWU5ZKmKiorUvHlzvfHGG5Kk2267TVu3btXkyZOdFszKasiQIRo4cKC5nZubq/DwcMXExMjPz68CO3OugoICpaWl6d5775WHh0dFt4OzMDeujflxbcxP+Wo0fOlFa7YOj5VU/nNT/EqRFWUKTcHBwdqyZYvq1KnjML5582ZVr169LIcsVa1atXTzzTc7jDVs2FD/+c9/JEmhoaGSpKysLNWqVcusycrKUpMmTcya7Oxsh2OcOXNGhw8fNh8fGhqqrKwsh5ri7eKac9ntdtnt9hLjHh4elfILsLJeV2XA3Lg25se1MT/lI6/QdtGac+ehvObmUs5RpnfPPfLII3rmmWe0cuVKFRYWqrCwUCtWrNCAAQPUvXv3shyyVHfddZd27tzpMPbjjz8qIiJC0p+LwkNDQ7V8+XJzf25urtatW6eoqChJUlRUlHJycpSRkWHWrFixQkVFRWrRooVZs2bNGofXNdPS0lS/fv1SX5oDAADXnjKFppEjR6pFixZq3769vL295e3trZiYGLVr1858Kc0Znn32WX3zzTd64403tGvXLs2cOVNTpkxRQkKCJMlmsykpKUmvvfaaPvvsM33//fd6/PHHFRYWpi5dukj6885Uhw4d1LdvX61fv15r165VYmKiunfvrrCwMElSjx495Onpqfj4eG3btk2zZ8/WuHHjHF5+AwAA17YyvTzn6emp2bNna+TIkdq8ebP5oZPFd4Cc5fbbb9d///tfDRkyRCNGjFBkZKRSUlLUs2dPs+b555/XiRMn1K9fP+Xk5KhVq1ZasmSJvLy8zJoZM2YoMTFR7du3l5ubm7p166bx48eb+/39/bVs2TIlJCSoWbNmqlGjhoYNG8bHDQAAAFOZQlOxm266STfddJOzeinVfffdp/vuu++8+202m0aMGKERI0actyYoKEgzZ8684HluvfVWffnll2XuEwAAVG5lCk2FhYWaNm2ali9fruzsbBUVFTnsX7FihVOaAwAAcBVlCk0DBgzQtGnTFBcXp0aNGslmu/iqeAAAgKtZmULTrFmzNGfOHHXq1MnZ/QAAALikMr17ztPTU3Xr1nV2LwAAAC6rTKFp0KBBGjdunAzj4r9LBgAAoDIo08tzX331lVauXKnPP/9ct9xyS4lP05w/f75TmgMAAHAVZQpNAQEB6tq1q7N7AQAAcFllCk2pqanO7gMAAMCllWlNk/TnL7394osv9P777+vYsWOSpIMHD+r48eNOaw4AAMBVlOlO0969e9WhQwft27dPeXl5uvfee+Xr66vRo0crLy9PkydPdnafAAAAFapMd5oGDBig5s2b68iRI/L29jbHu3btquXLlzutOQAAAFdRpjtNX375pb7++mt5eno6jNepU0e//vqrUxoDAABwJWW601RUVKTCwsIS4wcOHJCvr+9lNwUAAOBqyhSaYmJilJKSYm7bbDYdP35cr7zyCr9aBQAAVEplenluzJgxio2N1c0336zTp0+rR48e+umnn1SjRg198sknzu4RAACgwpUpNF1//fXavHmzZs2apS1btuj48eOKj49Xz549HRaGAwAAVBZlCk2SVKVKFT366KPO7AUAAMBllSk0TZ8+/YL7H3/88TI1AwAA4KrKFJoGDBjgsF1QUKCTJ0/K09NTPj4+hCYAAFDplOndc0eOHHH4c/z4ce3cuVOtWrViITgAAKiUyvy7585Vr149vfnmmyXuQgEAAFQGTgtN0p+Lww8ePOjMQwIAALiEMq1p+uyzzxy2DcPQoUOH9O677+quu+5ySmMAAACupEyhqUuXLg7bNptNNWvWVLt27TRmzBhn9AUAAOBSyhSaioqKnN0HAACAS3PqmiYAAIDKqkx3mgYOHGi5duzYsWU5BQAAgEspU2j67rvv9N1336mgoED169eXJP34449yd3dX06ZNzTqbzeacLgEAACpYmULT/fffL19fX3344YcKDAyU9OcHXvbp00etW7fWoEGDnNokAABARSvTmqYxY8Zo1KhRZmCSpMDAQL322mu8ew4AAFRKZQpNubm5+u2330qM//bbbzp27NhlNwUAAOBqyhSaunbtqj59+mj+/Pk6cOCADhw4oP/85z+Kj4/Xgw8+6OweAQAAKlyZ1jRNnjxZzz33nHr06KGCgoI/D1SliuLj4/XWW285tUEAAABXUKbQ5OPjo0mTJumtt97Szz//LEm68cYbVbVqVac2BwAA4Cou68MtDx06pEOHDqlevXqqWrWqDMNwVl8AAAAupUx3mv744w89/PDDWrlypWw2m3766SfdcMMNio+PV2BgIO+gAwAAl6TOC4skSXZ3Q8l3SI2GL1VeoePnPe55M64iWjOV6U7Ts88+Kw8PD+3bt08+Pj7m+N/+9jctWbLEac0BAAC4ijLdaVq2bJmWLl2q66+/3mG8Xr162rt3r1MaAwAAcCVlutN04sQJhztMxQ4fPiy73X7ZTQEAALiaMoWm1q1ba/r06ea2zWZTUVGRkpOTdc899zitOQAAAFdRppfnkpOT1b59e23cuFH5+fl6/vnntW3bNh0+fFhr1651do8AAAAVrkx3mho1aqQff/xRrVq10gMPPKATJ07owQcf1Hfffacbb7zR2T0CAABUuEu+01RQUKAOHTpo8uTJeumll65ETwAAAC7nku80eXh4aMuWLVeiFwAAAJdVppfnHn30UX3wwQfO7gUAAMBllWkh+JkzZzR16lR98cUXatasWYnfOTd27FinNAcAAOAqLik0/fLLL6pTp462bt2qpk2bSpJ+/PFHhxqbzVbaQwEAAK5qlxSa6tWrp0OHDmnlypWS/vy1KePHj1dISMgVaQ4AAMBVXNKaJsMwHLY///xznThxwqkNAQAAuKIyLQQvdm6IAgAAqKwuKTTZbLYSa5ZYwwQAAK4Fl7SmyTAM9e7d2/ylvKdPn1b//v1LvHtu/vz5zusQAADABVxSaOrVq5fD9qOPPurUZgAAAFzVJYWm1NTUK9UHAACAS7usheAAAADXCkITAACABYQmAAAACwhNAAAAFlxVoenNN9+UzWZTUlKSOXb69GklJCSoevXqqlatmrp166asrCyHx+3bt09xcXHy8fFRcHCwBg8erDNnzjjUrFq1Sk2bNpXdblfdunU1bdq0crgiAABwtbhqQtOGDRv0/vvv69Zbb3UYf/bZZ/W///1Pc+fO1erVq3Xw4EE9+OCD5v7CwkLFxcUpPz9fX3/9tT788ENNmzZNw4YNM2t2796tuLg43XPPPdq0aZOSkpL05JNPaunSpeV2fQAAwLVdFaHp+PHj6tmzp/71r38pMDDQHD969Kg++OADjR07Vu3atVOzZs2Umpqqr7/+Wt98840kadmyZdq+fbs+/vhjNWnSRB07dtTIkSM1ceJE5efnS5ImT56syMhIjRkzRg0bNlRiYqL++te/6p133qmQ6wUAAK7nqghNCQkJiouLU3R0tMN4RkaGCgoKHMYbNGig2rVrKz09XZKUnp6uxo0bKyQkxKyJjY1Vbm6utm3bZtace+zY2FjzGAAAAJf04ZYVYdasWfr222+1YcOGEvsyMzPl6empgIAAh/GQkBBlZmaaNWcHpuL9xfsuVJObm6tTp07J29u7xLnz8vKUl5dnbufm5kqSCgoKVFBQcIlX6bqKr6UyXVNlwdy4NubHtTE/5cvublivdTMc/nu2KzFfl3JMlw5N+/fv14ABA5SWliYvL6+KbsfBqFGj9Oqrr5YYX7ZsmXx8fCqgoysrLS2tolvAeTA3ro35cW3MT/lIvuPSHzOyeVGJscWLFzuhG0cnT560XOvSoSkjI0PZ2dlq2rSpOVZYWKg1a9bo3Xff1dKlS5Wfn6+cnByHu01ZWVkKDQ2VJIWGhmr9+vUOxy1+d93ZNee+4y4rK0t+fn6l3mWSpCFDhmjgwIHmdm5ursLDwxUTEyM/P7+yX7SLKSgoUFpamu699155eHhUdDs4C3Pj2pgf18b8lK9Gw62/scruZmhk8yK9vNFNeUU2h31bh8c6uzXzlSIrXDo0tW/fXt9//73DWJ8+fdSgQQP985//VHh4uDw8PLR8+XJ169ZNkrRz507t27dPUVFRkqSoqCi9/vrrys7OVnBwsKQ//2Xh5+enm2++2aw5N72mpaWZxyiN3W6X3W4vMe7h4VEpvwAr63VVBsyNa2N+XBvzUz7yCm0XLzr3MUW2Eo+7EnN1Kcd06dDk6+urRo0aOYxVrVpV1atXN8fj4+M1cOBABQUFyc/PT//4xz8UFRWlO++8U5IUExOjm2++WY899piSk5OVmZmpoUOHKiEhwQw9/fv317vvvqvnn39eTzzxhFasWKE5c+Zo0aJF5XvBAADAZbl0aLLinXfekZubm7p166a8vDzFxsZq0qRJ5n53d3ctXLhQf//73xUVFaWqVauqV69eGjFihFkTGRmpRYsW6dlnn9W4ceN0/fXX69///rdiY51/GxAAAFydrrrQtGrVKodtLy8vTZw4URMnTjzvYyIiIi66eKxt27b67rvvnNEiAACohK6Kz2kCAACoaIQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMCCKhXdAAAArqrOC4suWrPnzbhy6ASugDtNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGBBlYpuAACAitJo+FLlFdoqug1cJbjTBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAUuHZpGjRql22+/Xb6+vgoODlaXLl20c+dOh5rTp08rISFB1atXV7Vq1dStWzdlZWU51Ozbt09xcXHy8fFRcHCwBg8erDNnzjjUrFq1Sk2bNpXdblfdunU1bdq0K315AADgKuLSoWn16tVKSEjQN998o7S0NBUUFCgmJkYnTpwwa5599ln973//09y5c7V69WodPHhQDz74oLm/sLBQcXFxys/P19dff60PP/xQ06ZN07Bhw8ya3bt3Ky4uTvfcc482bdqkpKQkPfnkk1q6dGm5Xi8AAHBdVSq6gQtZsmSJw/a0adMUHBysjIwM3X333Tp69Kg++OADzZw5U+3atZMkpaamqmHDhvrmm2905513atmyZdq+fbu++OILhYSEqEmTJho5cqT++c9/avjw4fL09NTkyZMVGRmpMWPGSJIaNmyor776Su+8845iY2PL/boBAIDrcek7Tec6evSoJCkoKEiSlJGRoYKCAkVHR5s1DRo0UO3atZWeni5JSk9PV+PGjRUSEmLWxMbGKjc3V9u2bTNrzj5GcU3xMQAAAFz6TtPZioqKlJSUpLvuukuNGjWSJGVmZsrT01MBAQEOtSEhIcrMzDRrzg5MxfuL912oJjc3V6dOnZK3t3eJfvLy8pSXl2du5+bmSpIKCgpUUFBwGVfqWoqvpTJdU2XB3Lg25se1Fc+L3c1w2rFwfnZ3689z8ZyUNjdX4rm+lGNeNaEpISFBW7du1VdffVXRrUj6c5H6q6++WmJ82bJl8vHxqYCOrqy0tLSKbgHnwdy4NubHtY1sXnTZx1i8eLETOqncku+49MeUNjdX4rk+efKk5dqrIjQlJiZq4cKFWrNmja6//npzPDQ0VPn5+crJyXG425SVlaXQ0FCzZv369Q7HK3533dk1577jLisrS35+fqXeZZKkIUOGaODAgeZ2bm6uwsPDFRMTIz8/v7JfrIspKChQWlqa7r33Xnl4eFR0OzgLc+PamB/XVjw/L290U16R7bKOtXU4a18vptFw62+ssrsZGtm8qNS5uRLPdfErRVa4dGgyDEP/+Mc/9N///lerVq1SZGSkw/5mzZrJw8NDy5cvV7du3SRJO3fu1L59+xQVFSVJioqK0uuvv67s7GwFBwdL+vNffn5+frr55pvNmnPTa1pamnmM0tjtdtnt9hLjHh4elfIbZGW9rsqAuXFtzI9ryyuyKa/w8kIT83txZXmOS5ubK/FcX8oxXTo0JSQkaObMmfr000/l6+trrkHy9/eXt7e3/P39FR8fr4EDByooKEh+fn76xz/+oaioKN15552SpJiYGN1888167LHHlJycrMzMTA0dOlQJCQlm6Onfv7/effddPf/883riiSe0YsUKzZkzR4sWLaqwawcAAK7Fpd8999577+no0aNq27atatWqZf6ZPXu2WfPOO+/ovvvuU7du3XT33XcrNDRU8+fPN/e7u7tr4cKFcnd3V1RUlB599FE9/vjjGjFihFkTGRmpRYsWKS0tTX/5y180ZswY/fvf/+bjBgAAgMml7zQZxsVX23t5eWnixImaOHHieWsiIiIuunisbdu2+u677y65RwAAcG1w6TtNAAAAroLQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACxw6c9pAgAArq3OC9fOb8/gThMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgAaEJAADAAkITAACABYQmAAAACwhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCAACwgNAEAABgQZWKbgAAADhPnRcWWarb82bcFe6k8uFOEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYUKWiGwAAAOWvzguLLlqz5824cujk6kFoAgDgKmEl6ODKITQBQCXFnQTAuQhNAAC4AFe8i+SKPVUkQhOuGfyrG7h2XOzr3e5uKPmOcmoGlQbvnjvHxIkTVadOHXl5ealFixZav359RbcEAABcAHeazjJ79mwNHDhQkydPVosWLZSSkqLY2Fjt3LlTwcHBFd0eAFzVuNuLqx2h6Sxjx45V37591adPH0nS5MmTtWjRIk2dOlUvvPBCBXcHoCJV1h/4rrZmxdX6saKy/t1ASYSm/5Ofn6+MjAwNGTLEHHNzc1N0dLTS09MrsDMAl8PVfgg76wesq10XLoz5qhwITf/n999/V2FhoUJCQhzGQ0JC9MMPP5Soz8vLU15enrl99OhRSdLhw4dVUFBwZZstRwUFBTp58qT++OMPeXh4VHQ7l6XKmRMXrfnjjz/KoRPnqIi5aTFq+UVr1g1pXw6d/MlKP+X5Ta7uc3PM/7e7GRp6W5GavDRfeUW2S+rn7OOcD9+8L0+VIkMnTxapSoGbCs+aH1S8C83NlfgefezYMUmSYRgX783pZ79GjBo1Sq+++mqJ8cjIyAroBs5SY0xFd3D14zn8/3pUdAO4IObHdZ1vbq7k95djx47J39//gjWEpv9To0YNubu7Kysry2E8KytLoaGhJeqHDBmigQMHmttFRUU6fPiwqlevLput8vyrJTc3V+Hh4dq/f7/8/Pwquh2chblxbcyPa2N+XFd5z41hGDp27JjCwsIuWkto+j+enp5q1qyZli9fri5dukj6MwgtX75ciYmJJertdrvsdrvDWEBAQDl0WjH8/Pz4xuKimBvXxvy4NubHdZXn3FzsDlMxQtNZBg4cqF69eql58+a64447lJKSohMnTpjvpgMAANcuQtNZ/va3v+m3337TsGHDlJmZqSZNmmjJkiUlFocDAIBrD6HpHImJiaW+HHetstvteuWVV0q8FImKx9y4NubHtTE/rsuV58ZmWHmPHQAAwDWO3z0HAABgAaEJAADAAkITAACABYQmAAAACwhN17BFixapRYsW8vb2VmBgoPmhnsX27dunuLg4+fj4KDg4WIMHD9aZM2ccalatWqWmTZvKbrerbt26mjZtWonzTJw4UXXq1JGXl5datGih9evXX8Grqlzy8vLUpEkT2Ww2bdq0yWHfli1b1Lp1a3l5eSk8PFzJycklHj937lw1aNBAXl5eaty4sRYvXuyw3zAMDRs2TLVq1ZK3t7eio6P1008/XclLuqrt2bNH8fHxioyMlLe3t2688Ua98sorys/Pd6hjblwb35OuvFGjRun222+Xr6+vgoOD1aVLF+3cudOh5vTp00pISFD16tVVrVo1devWrcRv5XDWzyGnMXBNmjdvnhEYGGi89957xs6dO41t27YZs2fPNvefOXPGaNSokREdHW189913xuLFi40aNWoYQ4YMMWt++eUXw8fHxxg4cKCxfft2Y8KECYa7u7uxZMkSs2bWrFmGp6enMXXqVGPbtm1G3759jYCAACMrK6tcr/dq9cwzzxgdO3Y0JBnfffedOX706FEjJCTE6Nmzp7F161bjk08+Mby9vY3333/frFm7dq3h7u5uJCcnG9u3bzeGDh1qeHh4GN9//71Z8+abbxr+/v7GggULjM2bNxudO3c2IiMjjVOnTpXnZV41Pv/8c6N3797G0qVLjZ9//tn49NNPjeDgYGPQoEFmDXPj2vieVD5iY2ON1NRUY+vWrcamTZuMTp06GbVr1zaOHz9u1vTv398IDw83li9fbmzcuNG48847jZYtW5r7nfVzyJkITdeggoIC47rrrjP+/e9/n7dm8eLFhpubm5GZmWmOvffee4afn5+Rl5dnGIZhPP/888Ytt9zi8Li//e1vRmxsrLl9xx13GAkJCeZ2YWGhERYWZowaNcpZl1NpLV682GjQoIGxbdu2EqFp0qRJRmBgoDkXhmEY//znP4369eub2w8//LARFxfncMwWLVoYTz31lGEYhlFUVGSEhoYab731lrk/JyfHsNvtxieffHKFrqrySU5ONiIjI81t5sa18T2pYmRnZxuSjNWrVxuG8effZw8PD2Pu3LlmzY4dOwxJRnp6umEYzvs55Ey8PHcN+vbbb/Xrr7/Kzc1Nt912m2rVqqWOHTtq69atZk16eroaN27s8GnosbGxys3N1bZt28ya6Ohoh2PHxsYqPT1dkpSfn6+MjAyHGjc3N0VHR5s1KF1WVpb69u2rjz76SD4+PiX2p6en6+6775anp6c5Fhsbq507d+rIkSNmzYXmZ/fu3crMzHSo8ff3V4sWLZifS3D06FEFBQWZ28yN6+J7UsU5evSoJJlfKxkZGSooKHCYiwYNGqh27drmXDjj55CzEZquQb/88oskafjw4Ro6dKgWLlyowMBAtW3bVocPH5YkZWZmlvj1McXbmZmZF6zJzc3VqVOn9Pvvv6uwsLDUmuJjoCTDMNS7d2/1799fzZs3L7Xmcubn7P1nP660GlzYrl27NGHCBD311FPmGHPjuvieVDGKioqUlJSku+66S40aNZL0599xT0/PEr/o/tyvg8v9OeRshKZK5IUXXpDNZrvgnx9++EFFRUWSpJdeekndunVTs2bNlJqaKpvNprlz51bwVVReVudnwoQJOnbsmIYMGVLRLV8zrM7N2X799Vd16NBBDz30kPr27VtBnQOuLyEhQVu3btWsWbMqupXLxu+eq0QGDRqk3r17X7Dmhhtu0KFDhyRJN998szlut9t1ww03aN++fZKk0NDQEu8oKX5XQ2hoqPnfc9/pkJWVJT8/P3l7e8vd3V3u7u6l1hQf41pidX5WrFih9PT0Er93qXnz5urZs6c+/PDD8z730sXn5+z9xWO1atVyqGnSpMklX9/VzOrcFDt48KDuuecetWzZUlOmTHGoY25cV40aNfieVM4SExO1cOFCrVmzRtdff705Hhoaqvz8fOXk5DjcbTr36+Byfw453RVZKQWXdvToUcNutzssBM/PzzeCg4PNd/gUL8A7+x0l77//vuHn52ecPn3aMIw/F+A1atTI4diPPPJIiYXgiYmJ5nZhYaFx3XXXsejyAvbu3Wt8//335p+lS5cakox58+YZ+/fvNwzj/y82zs/PNx83ZMiQEouN77vvPodjR0VFlVhs/Pbbb5v7i/9usNj4/A4cOGDUq1fP6N69u3HmzJkS+5kb18b3pPJRVFRkJCQkGGFhYcaPP/5YYn/xQvB58+aZYz/88EOpC8Ev9+eQMxGarlEDBgwwrrvuOmPp0qXGDz/8YMTHxxvBwcHG4cOHDcP4/2/1jImJMTZt2mQsWbLEqFmzZqlv9Rw8eLCxY8cOY+LEiaV+5IDdbjemTZtmbN++3ejXr58REBDg8G4IXNju3btLvHsuJyfHCAkJMR577DFj69atxqxZswwfH58Sb2uvUqWK8fbbbxs7duwwXnnllVLf1h4QEGB8+umnxpYtW4wHHniAt7VfwIEDB4y6desa7du3Nw4cOGAcOnTI/FOMuXFtfE8qH3//+98Nf39/Y9WqVQ5fJydPnjRr+vfvb9SuXdtYsWKFsXHjRiMqKsqIiooy9zvr55AzEZquUfn5+cagQYOM4OBgw9fX14iOjja2bt3qULNnzx6jY8eOhre3t1GjRg1j0KBBRkFBgUPNypUrjSZNmhienp7GDTfcYKSmppY414QJE4zatWsbnp6exh133GF88803V/LSKp3SQpNhGMbmzZuNVq1aGXa73bjuuuuMN998s8Rj58yZY9x0002Gp6enccsttxiLFi1y2F9UVGS8/PLLRkhIiGG324327dsbO3fuvJKXc1VLTU01JJX652zMjWvje9KVd76vk7N/Rpw6dcp4+umnjcDAQMPHx8fo2rWrwz9ADMN5P4ecxfZ/FwcAAIAL4N1zAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgDXhLZt2yopKemCNXXq1FFKSkq59APg6kNoAnBV6t27t2w2m/r3719iX0JCgmw2m8Mv4Z0/f75GjhxZ5vPFx8ercePGys/PdxhfvHixPD099e2335b52ACuDoQmAFet8PBwzZo1S6dOnTLHTp8+rZkzZ6p27doOtUFBQfL19S3zud555x0dO3ZMr7zyijmWk5Ojvn376uWXX1bTpk3LfOzzKSgocPoxAZQdoQnAVatp06YKDw/X/PnzzbH58+erdu3auu222xxqz315Ljs7W/fff7+8vb0VGRmpGTNmXPBcfn5+Sk1N1ZgxY7Ru3TpJUlJSkq677joNGTJE+/fv18MPP6yAgAAFBQXpgQce0J49e8zHb9iwQffee69q1Kghf39/tWnTpsTdKZvNpvfee0+dO3dW1apV9frrr5fxmQFwJRCaAFzVnnjiCaWmpprbU6dOVZ8+fS76uN69e2v//v1auXKl5s2bp0mTJik7O/uCj7nnnnv09NNPq1evXpo7d67mzJmj6dOnyzAMxcbGytfXV19++aXWrl2ratWqqUOHDubLeceOHVOvXr301Vdf6ZtvvlG9evXUqVMnHTt2zOEcw4cPV9euXfX999/riSeeKMMzAuBKqVLRDQDA5Xj00Uc1ZMgQ7d27V5K0du1azZo1S6tWrTrvY3788Ud9/vnnWr9+vW6//XZJ0gcffKCGDRte9HyjRo3SkiVL1L17d40ZM0YNGjTQxx9/rKKiIv373/+WzWaTJKWmpiogIECrVq1STEyM2rVr53CcKVOmKCAgQKtXr9Z9991njvfo0cNS6ANQ/rjTBOCqVrNmTcXFxWnatGlKTU1VXFycatSoccHH7NixQ1WqVFGzZs3MsQYNGiggIOCi5/P29tZzzz0nHx8fDRgwQJK0efNm7dq1S76+vqpWrZqqVaumoKAgnT59Wj///LMkKSsrS3379lW9evXk7+8vPz8/HT9+XPv27XM4fvPmzS/xGQBQXrjTBOCq98QTTygxMVGSNHHixCt+vipVqsjd3d28q3T8+HE1a9as1HVRNWvWlCT16tVLf/zxh8aNG6eIiAjZ7XZFRUWVeDde1apVr3j/AMqG0ATgqle8dshmsyk2Nvai9Q0aNNCZM2eUkZFhvjy3c+dO5eTklOn8TZs21ezZsxUcHCw/P79Sa9auXatJkyapU6dOkqT9+/fr999/L9P5AFQMXp4DcNVzd3fXjh07tH37drm7u1+0vn79+urQoYOeeuoprVu3ThkZGXryySfl7e1dpvP37NlTNWrU0AMPPKAvv/xSu3fv1qpVq/TMM8/owIEDkqR69erpo48+0o4dO7Ru3Tr17NmzzOcDUDEITQAqBT8/v/Pe5SlNamqqwsLC1KZNGz344IPq16+fgoODy3RuHx8frVmzRrVr19aDDz6ohg0bKj4+XqdPnzZ7+uCDD3TkyBE1bdpUjz32mJ555pkynw9AxbAZhmFUdBMAAACujjtNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITQAAABYQmgAAACwgNAEAAFhAaAIAALDg/wGKLN6uKdIJRwAAAABJRU5ErkJggg==",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "year_df[\"mid_year\"].hist(bins=50)\n",
+    "plt.xlabel(\"Mid Year\")\n",
+    "plt.ylabel(\"Frequency\")\n",
+    "plt.title(\"Distribution of Mid Year\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Object name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "object_name\n",
+       "sherds             5068\n",
+       "photographs        4729\n",
+       "coins              4609\n",
+       "amulets            2485\n",
+       "Woodblock Print    1386\n",
+       "figures            1316\n",
+       "vessels            1165\n",
+       "bowls               807\n",
+       "Papercut            731\n",
+       "pages               647\n",
+       "Slide               633\n",
+       "jars                566\n",
+       "Seal                554\n",
+       "postcards           541\n",
+       "vases               518\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 106,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "full_df[\"object_name\"].value_counts()[:15]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Material"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "material\n",
+       "pottery                8844\n",
+       "paper                  5822\n",
+       "metal                  3739\n",
+       "photographic paper     3396\n",
+       "faience                2961\n",
+       "                       ... \n",
+       "Shell (Ostrich Egg)       1\n",
+       "serpentinite              1\n",
+       "balsa                     1\n",
+       "maple                     1\n",
+       "fabric art                1\n",
+       "Name: count, Length: 414, dtype: int64"
+      ]
+     },
+     "execution_count": 104,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "full_df[\"material\"].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ArtifactClassification",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

3.0-efficientnet_example.ipynb ADDED Viewed

	@@ -0,0 +1,1062 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import pandas as pd\n",
+    "import os\n",
+    "from datasets import Dataset, Image, DatasetDict\n",
+    "from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor\n",
+    "from transformers import (\n",
+    "    AutoImageProcessor,\n",
+    "    AutoModelForImageClassification,\n",
+    "    TrainingArguments,\n",
+    "    Trainer,\n",
+    "    DefaultDataCollator,\n",
+    ")\n",
+    "import evaluate\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n",
+    "file2obj[\"image\"] = file2obj.apply(lambda x: os.path.join(\"..\", x[\"root\"], x[\"file\"]), axis=1)\n",
+    "file2obj.rename(columns={\"obj_num\": \"label\"}, inplace=True)\n",
+    "\n",
+    "# Group by 'obj_num' and count occurrences\n",
+    "obj_num_counts = file2obj[\"label\"].value_counts()\n",
+    "\n",
+    "# Filter rows where 'obj_num' appears more than twice\n",
+    "file2obj_3 = file2obj[file2obj[\"label\"].isin(obj_num_counts[obj_num_counts > 2].index)]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Form HF dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "59370086a1b64dc5842d9becd9019aad",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Casting to class labels:   0%|          | 0/25725 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ds = Dataset.from_pandas(file2obj_3[[\"image\", \"label\"]], preserve_index=False).cast_column(\n",
+    "    \"image\", Image()\n",
+    ")\n",
+    "ds = ds.class_encode_column(\"label\")\n",
+    "trainval_test = ds.train_test_split(stratify_by_column=\"label\", test_size=0.16)\n",
+    "train_val = trainval_test[\"train\"].train_test_split(stratify_by_column=\"label\", test_size=16 / 84)\n",
+    "ds = DatasetDict(\n",
+    "    {\"train\": train_val[\"train\"], \"valid\": train_val[\"test\"], \"test\": trainval_test[\"test\"]}\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Transform data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "checkpoint = \"google/efficientnet-b3\"\n",
+    "image_processor = AutoImageProcessor.from_pretrained(checkpoint)\n",
+    "\n",
+    "\n",
+    "normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)\n",
+    "size = (\n",
+    "    image_processor.size[\"shortest_edge\"]\n",
+    "    if \"shortest_edge\" in image_processor.size\n",
+    "    else (image_processor.size[\"height\"], image_processor.size[\"width\"])\n",
+    ")\n",
+    "_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])\n",
+    "\n",
+    "\n",
+    "def transforms(examples):\n",
+    "    examples[\"pixel_values\"] = [_transforms(img.convert(\"RGB\")) for img in examples[\"image\"]]\n",
+    "    del examples[\"image\"]\n",
+    "    return examples\n",
+    "\n",
+    "\n",
+    "ds = ds.with_transform(transforms)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Set up model and metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:\n",
+      "- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([3872, 1536]) in the model instantiated\n",
+      "- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3872]) in the model instantiated\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "source": [
+    "labels = ds[\"train\"].features[\"label\"].names\n",
+    "model = AutoModelForImageClassification.from_pretrained(\n",
+    "    checkpoint,\n",
+    "    num_labels=len(labels),\n",
+    "    id2label={str(i): c for i, c in enumerate(labels)},\n",
+    "    label2id={c: str(i) for i, c in enumerate(labels)},\n",
+    "    ignore_mismatched_sizes=True,\n",
+    ")\n",
+    "\n",
+    "data_collator = DefaultDataCollator()\n",
+    "\n",
+    "accuracy = evaluate.load(\"accuracy\")\n",
+    "\n",
+    "\n",
+    "def compute_metrics(eval_pred):\n",
+    "    predictions, labels = eval_pred\n",
+    "    predictions = np.argmax(predictions, axis=1)\n",
+    "    return accuracy.compute(predictions=predictions, references=labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Serializing object of type dict that is 147552 bytes\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Serializing object of type dict that is 147552 bytes\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cb34337db5584dfbbc4a76bb7e724b26",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/274 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'loss': 8.0521, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.04}\n",
+      "{'loss': 8.0927, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.07}\n",
+      "{'loss': 8.1187, 'learning_rate': 4.959349593495935e-05, 'epoch': 0.11}\n",
+      "{'loss': 8.2335, 'learning_rate': 4.75609756097561e-05, 'epoch': 0.15}\n",
+      "{'loss': 8.2531, 'learning_rate': 4.5528455284552844e-05, 'epoch': 0.18}\n",
+      "{'loss': 8.2873, 'learning_rate': 4.3495934959349595e-05, 'epoch': 0.22}\n",
+      "{'loss': 8.2071, 'learning_rate': 4.146341463414634e-05, 'epoch': 0.26}\n",
+      "{'loss': 8.2287, 'learning_rate': 3.943089430894309e-05, 'epoch': 0.29}\n",
+      "{'loss': 8.1928, 'learning_rate': 3.739837398373984e-05, 'epoch': 0.33}\n",
+      "{'loss': 8.2053, 'learning_rate': 3.5365853658536584e-05, 'epoch': 0.36}\n",
+      "{'loss': 8.1621, 'learning_rate': 3.3333333333333335e-05, 'epoch': 0.4}\n",
+      "{'loss': 8.1731, 'learning_rate': 3.130081300813008e-05, 'epoch': 0.44}\n",
+      "{'loss': 8.1447, 'learning_rate': 2.926829268292683e-05, 'epoch': 0.47}\n",
+      "{'loss': 8.1161, 'learning_rate': 2.7235772357723577e-05, 'epoch': 0.51}\n",
+      "{'loss': 8.1081, 'learning_rate': 2.5203252032520324e-05, 'epoch': 0.55}\n",
+      "{'loss': 8.0828, 'learning_rate': 2.3170731707317075e-05, 'epoch': 0.58}\n",
+      "{'loss': 8.1312, 'learning_rate': 2.1138211382113822e-05, 'epoch': 0.62}\n",
+      "{'loss': 8.1627, 'learning_rate': 1.9105691056910573e-05, 'epoch': 0.66}\n",
+      "{'loss': 8.0692, 'learning_rate': 1.707317073170732e-05, 'epoch': 0.69}\n",
+      "{'loss': 7.9943, 'learning_rate': 1.5040650406504067e-05, 'epoch': 0.73}\n",
+      "{'loss': 8.0139, 'learning_rate': 1.3008130081300815e-05, 'epoch': 0.77}\n",
+      "{'loss': 8.027, 'learning_rate': 1.0975609756097562e-05, 'epoch': 0.8}\n",
+      "{'loss': 8.0628, 'learning_rate': 8.94308943089431e-06, 'epoch': 0.84}\n",
+      "{'loss': 8.1264, 'learning_rate': 6.910569105691057e-06, 'epoch': 0.88}\n",
+      "{'loss': 8.0408, 'learning_rate': 4.8780487804878055e-06, 'epoch': 0.91}\n",
+      "{'loss': 8.071, 'learning_rate': 2.8455284552845528e-06, 'epoch': 0.95}\n",
+      "{'loss': 8.1237, 'learning_rate': 8.130081300813009e-07, 'epoch': 0.99}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5915cfa41d474a399ce0f53bc8f6f947",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/65 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'eval_loss': 8.02699089050293, 'eval_accuracy': 0.02575315840621963, 'eval_runtime': 25.2001, 'eval_samples_per_second': 163.333, 'eval_steps_per_second': 2.579, 'epoch': 1.0}\n",
+      "{'train_runtime': 236.2359, 'train_samples_per_second': 74.049, 'train_steps_per_second': 1.16, 'train_loss': 8.129460439194728, 'epoch': 1.0}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=274, training_loss=8.129460439194728, metrics={'train_runtime': 236.2359, 'train_samples_per_second': 74.049, 'train_steps_per_second': 1.16, 'train_loss': 8.129460439194728, 'epoch': 1.0})"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"../models/test\",\n",
+    "    remove_unused_columns=False,\n",
+    "    evaluation_strategy=\"epoch\",\n",
+    "    save_strategy=\"epoch\",\n",
+    "    learning_rate=5e-5,\n",
+    "    per_device_train_batch_size=64,\n",
+    "    # gradient_accumulation_steps=2,\n",
+    "    per_device_eval_batch_size=64,\n",
+    "    num_train_epochs=1,\n",
+    "    warmup_ratio=0.1,\n",
+    "    logging_steps=10,\n",
+    "    load_best_model_at_end=True,\n",
+    "    metric_for_best_model=\"accuracy\",\n",
+    "    push_to_hub=False,\n",
+    "\n",
+    ")\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=ds[\"train\"],  # .select(range(100)),\n",
+    "    eval_dataset=ds[\"valid\"],  # .select(range(100)),\n",
+    "    tokenizer=image_processor,\n",
+    "    compute_metrics=compute_metrics,\n",
+    "    data_collator=data_collator,\n",
+    ")\n",
+    "\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Evaluation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4979f1d5536f4a3e97ecbc36c7eebbfa",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/7 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'eval_loss': 8.275933265686035, 'eval_accuracy': 0.0, 'eval_runtime': 0.6419, 'eval_samples_per_second': 155.791, 'eval_steps_per_second': 10.905, 'epoch': 0.57}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cf6f48e995bf427db3c86d1d988bf752",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/7 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "results = trainer.evaluate()\n",
+    "print(results)\n",
+    "\n",
+    "test_results = trainer.predict(ds[\"test\"].select(range(100)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "EfficientNetForImageClassification(\n",
+       "  (efficientnet): EfficientNetModel(\n",
+       "    (embeddings): EfficientNetEmbeddings(\n",
+       "      (padding): ZeroPad2d((0, 1, 0, 1))\n",
+       "      (convolution): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=valid, bias=False)\n",
+       "      (batchnorm): BatchNorm2d(40, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "      (activation): SiLU()\n",
+       "    )\n",
+       "    (encoder): EfficientNetEncoder(\n",
+       "      (blocks): ModuleList(\n",
+       "        (0): EfficientNetBlock(\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=40, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(40, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(40, 24, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(24, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.0, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (1): EfficientNetBlock(\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=24, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(24, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(24, 6, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(6, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(24, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.007692307692307693, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (2): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(144, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=valid, groups=144, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(144, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(144, 6, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(6, 144, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.015384615384615385, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (3): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=192, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(192, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.02307692307692308, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (4): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=192, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(192, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(32, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.03076923076923077, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (5): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((2, 2, 2, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(192, 192, kernel_size=(5, 5), stride=(2, 2), padding=valid, groups=192, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(192, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(192, 48, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(48, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.038461538461538464, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (6): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(48, 288, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(288, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(288, 288, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=288, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(288, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(288, 12, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(12, 288, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(288, 48, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(48, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.04615384615384616, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (7): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(48, 288, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(288, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(288, 288, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=288, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(288, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(288, 12, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(12, 288, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(288, 48, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(48, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.05384615384615385, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (8): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(48, 288, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(288, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(288, 288, kernel_size=(3, 3), stride=(2, 2), padding=valid, groups=288, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(288, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(288, 12, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(12, 288, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(288, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.06153846153846154, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (9): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=576, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.06923076923076923, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (10): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=576, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.07692307692307693, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (11): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=576, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.08461538461538462, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (12): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=576, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(96, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.09230769230769233, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (13): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(576, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(576, 576, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=576, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(576, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(576, 24, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(24, 576, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(576, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (14): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=816, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(816, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.1076923076923077, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (15): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=816, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(816, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.11538461538461539, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (16): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=816, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(816, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.12307692307692308, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (17): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=816, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(816, 136, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(136, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.13076923076923078, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (18): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(136, 816, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(816, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((2, 2, 2, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(816, 816, kernel_size=(5, 5), stride=(2, 2), padding=valid, groups=816, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(816, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(816, 34, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(34, 816, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(816, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.13846153846153847, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (19): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.14615384615384616, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (20): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.15384615384615385, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (21): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.16153846153846155, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (22): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.16923076923076924, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (23): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((1, 2, 1, 2))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(1392, 232, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(232, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.17692307692307693, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (24): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(232, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(1392, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(1392, 1392, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=1392, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(1392, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(1392, 58, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(58, 1392, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(1392, 384, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(384, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.18461538461538465, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "        (25): EfficientNetBlock(\n",
+       "          (expansion): EfficientNetExpansionLayer(\n",
+       "            (expand_conv): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (expand_bn): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "            (expand_act): SiLU()\n",
+       "          )\n",
+       "          (depthwise_conv): EfficientNetDepthwiseLayer(\n",
+       "            (depthwise_conv_pad): ZeroPad2d((0, 1, 0, 1))\n",
+       "            (depthwise_conv): EfficientNetDepthwiseConv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=2304, bias=False)\n",
+       "            (depthwise_norm): BatchNorm2d(2304, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (depthwise_act): SiLU()\n",
+       "          )\n",
+       "          (squeeze_excite): EfficientNetSqueezeExciteLayer(\n",
+       "            (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
+       "            (reduce): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (expand): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1), padding=same)\n",
+       "            (act_reduce): SiLU()\n",
+       "            (act_expand): Sigmoid()\n",
+       "          )\n",
+       "          (projection): EfficientNetFinalBlockLayer(\n",
+       "            (project_conv): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "            (project_bn): BatchNorm2d(384, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "            (dropout): Dropout(p=0.19230769230769232, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "      (top_conv): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)\n",
+       "      (top_bn): BatchNorm2d(1536, eps=0.001, momentum=0.99, affine=True, track_running_stats=True)\n",
+       "      (top_activation): SiLU()\n",
+       "    )\n",
+       "    (pooler): AvgPool2d(kernel_size=1536, stride=1536, padding=0)\n",
+       "  )\n",
+       "  (dropout): Dropout(p=0.3, inplace=False)\n",
+       "  (classifier): Linear(in_features=1536, out_features=3872, bias=True)\n",
+       ")"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ArtifactClassification",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

4.0-assessing_BM_dataset.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

5.0-assessing_date_prediction.ipynb ADDED Viewed

File without changes

README.md ADDED Viewed

	@@ -0,0 +1,47 @@

+---
+tags:
+- generated_from_trainer
+model-index:
+- name: test
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# test
+This model was trained from scratch on the None dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 128
+- eval_batch_size: 128
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 100
+- mixed_precision_training: Native AMP
+### Framework versions
+- Transformers 4.38.2
+- Pytorch 2.2.1+cu121
+- Datasets 2.18.0
+- Tokenizers 0.15.2

config.json ADDED Viewed

	@@ -0,0 +1,88 @@

+{
+  "_name_or_path": "../models/james-burton/BritishMuseum-white/bm5-white_date_log/rose-blaze-27/checkpoint-5310",
+  "architectures": [
+    "EfficientNetForImageClassification"
+  ],
+  "batch_norm_eps": 0.001,
+  "batch_norm_momentum": 0.99,
+  "depth_coefficient": 1.4,
+  "depth_divisor": 8,
+  "depthwise_padding": [
+    5,
+    18
+  ],
+  "drop_connect_rate": 0.2,
+  "dropout_rate": 0.3,
+  "expand_ratios": [
+    1,
+    6,
+    6,
+    6,
+    6,
+    6,
+    6
+  ],
+  "hidden_act": "swish",
+  "hidden_dim": 1536,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "image_size": 300,
+  "in_channels": [
+    32,
+    16,
+    24,
+    40,
+    80,
+    112,
+    192
+  ],
+  "initializer_range": 0.02,
+  "kernel_sizes": [
+    3,
+    3,
+    5,
+    3,
+    5,
+    5,
+    3
+  ],
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "model_type": "efficientnet",
+  "num_block_repeats": [
+    1,
+    2,
+    2,
+    3,
+    3,
+    4,
+    1
+  ],
+  "num_channels": 3,
+  "num_hidden_layers": 64,
+  "out_channels": [
+    16,
+    24,
+    40,
+    80,
+    112,
+    192,
+    320
+  ],
+  "pooling_type": "mean",
+  "squeeze_expansion_ratio": 0.25,
+  "strides": [
+    1,
+    2,
+    2,
+    2,
+    1,
+    2,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "width_coefficient": 1.2
+}

material_min3.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
+No,white,avg,om3-white_material,0.62,0.812,0.869,0.93,0.588,0.612,0.62
+,,avg+3D,om3-white_material,0.609,0.812,0.871,0.934,0.572,0.598,0.609
+,white+3Dx1,avg,om3-3Dwhite-1frame_material,0.608,0.793,0.861,0.924,0.586,0.584,0.608
+,,avg+3D,om3-3Dwhite-1frame_material,0.604,0.794,0.857,0.923,0.579,0.579,0.604
+,white+3Dx4,avg,om3-3Dwhite_material,0.618,0.809,0.871,0.929,0.596,0.598,0.618
+,,avg+3D,om3-3Dwhite_material,0.627,0.811,0.873,0.926,0.604,0.607,0.627
+Yes,white,avg,om3-white_material_bm-pretrn,0.62,0.817,0.883,0.933,0.583,0.579,0.62
+,,avg+3D,om3-white_material_bm-pretrn,0.628,0.825,0.883,0.936,0.592,0.599,0.628
+,white+3Dx1,avg,om3-3Dwhite-1frame_material_bm-pretrn,0.62,0.828,0.88,0.936,0.598,0.591,0.62
+,,avg+3D,om3-3Dwhite-1frame_material_bm-pretrn,0.619,0.819,0.878,0.931,0.602,0.604,0.619
+,white+3Dx4,avg,om3-3Dwhite_material_bm-pretrn,0.621,0.812,0.873,0.928,0.6,0.601,0.621
+,,avg+3D,om3-3Dwhite_material_bm-pretrn,0.624,0.809,0.873,0.935,0.609,0.624,0.624

material_min3_max1.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
+No,white,avg,om3-white_material,0.582,0.782,0.85,0.914,0.553,0.578,0.582
+,,avg+3D,om3-white_material,0.576,0.784,0.85,0.92,0.543,0.572,0.576
+,white+3Dx1,avg,om3-3Dwhite-1frame_material,0.573,0.759,0.838,0.91,0.555,0.558,0.573
+,,avg+3D,om3-3Dwhite-1frame_material,0.567,0.762,0.84,0.915,0.548,0.556,0.567
+,white+3Dx4,avg,om3-3Dwhite_material,0.575,0.777,0.843,0.912,0.557,0.561,0.575
+,,avg+3D,om3-3Dwhite_material,0.583,0.779,0.849,0.912,0.563,0.565,0.583
+Yes,white,avg,om3-white_material_bm-pretrn,0.587,0.787,0.856,0.917,0.555,0.551,0.587
+,,avg+3D,om3-white_material_bm-pretrn,0.596,0.797,0.867,0.922,0.566,0.571,0.596
+,white+3Dx1,avg,om3-3Dwhite-1frame_material_bm-pretrn,0.59,0.8,0.861,0.925,0.571,0.566,0.59
+,,avg+3D,om3-3Dwhite-1frame_material_bm-pretrn,0.59,0.79,0.852,0.918,0.578,0.583,0.59
+,white+3Dx4,avg,om3-3Dwhite_material_bm-pretrn,0.582,0.778,0.85,0.911,0.562,0.56,0.582
+,,avg+3D,om3-3Dwhite_material_bm-pretrn,0.583,0.779,0.841,0.913,0.572,0.581,0.583

material_min4.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
+No,white,avg,om4-white_material,0.617,0.817,0.868,0.926,0.587,0.596,0.617
+,,avg+3D,om4-white_material,0.608,0.808,0.865,0.925,0.571,0.582,0.608
+,white+3Dx1,avg,om4-3Dwhite-1frame_material,0.62,0.813,0.864,0.92,0.603,0.616,0.62
+,,avg+3D,om4-3Dwhite-1frame_material,0.625,0.812,0.871,0.919,0.605,0.616,0.625
+,white+3Dx4,avg,om4-3Dwhite_material,0.621,0.808,0.869,0.918,0.607,0.611,0.621
+,,avg+3D,om4-3Dwhite_material,0.62,0.808,0.871,0.92,0.605,0.609,0.62
+Yes,white,avg,om4-white_material_bm-pretrn,0.611,0.805,0.865,0.914,0.577,0.584,0.611
+,,avg+3D,om4-white_material_bm-pretrn,0.598,0.802,0.87,0.919,0.566,0.582,0.598
+,white+3Dx1,avg,om4-3Dwhite-1frame_material_bm-pretrn,0.635,0.824,0.877,0.93,0.61,0.608,0.635
+,,avg+3D,om4-3Dwhite-1frame_material_bm-pretrn,0.64,0.816,0.873,0.924,0.618,0.621,0.64
+,white+3Dx4,avg,om4-3Dwhite_material_bm-pretrn,0.626,0.822,0.877,0.923,0.601,0.614,0.626
+,,avg+3D,om4-3Dwhite_material_bm-pretrn,0.628,0.821,0.877,0.928,0.605,0.61,0.628

material_min4_max1.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
+No,white,avg,om4-white_material,0.585,0.791,0.85,0.912,0.558,0.563,0.585
+,,avg+3D,om4-white_material,0.579,0.787,0.847,0.915,0.547,0.557,0.579
+,white+3Dx1,avg,om4-3Dwhite-1frame_material,0.593,0.785,0.839,0.903,0.58,0.587,0.593
+,,avg+3D,om4-3Dwhite-1frame_material,0.595,0.781,0.845,0.906,0.578,0.582,0.595
+,white+3Dx4,avg,om4-3Dwhite_material,0.59,0.786,0.841,0.905,0.578,0.583,0.59
+,,avg+3D,om4-3Dwhite_material,0.587,0.786,0.849,0.906,0.573,0.576,0.587
+Yes,white,avg,om4-white_material_bm-pretrn,0.575,0.775,0.846,0.899,0.546,0.558,0.575
+,,avg+3D,om4-white_material_bm-pretrn,0.576,0.771,0.84,0.908,0.549,0.568,0.576
+,white+3Dx1,avg,om4-3Dwhite-1frame_material_bm-pretrn,0.596,0.799,0.858,0.916,0.573,0.571,0.596
+,,avg+3D,om4-3Dwhite-1frame_material_bm-pretrn,0.598,0.792,0.859,0.914,0.579,0.582,0.598
+,white+3Dx4,avg,om4-3Dwhite_material_bm-pretrn,0.59,0.793,0.857,0.91,0.567,0.574,0.59
+,,avg+3D,om4-3Dwhite_material_bm-pretrn,0.598,0.791,0.859,0.92,0.577,0.578,0.598

material_min5.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
+No,white,avg,om5-white_material,0.605,0.774,0.84,0.91,0.571,0.609,0.605
+,,avg+3D,om5-white_material,0.603,0.777,0.845,0.919,0.564,0.603,0.603
+,white+3Dx1,avg,om5-3Dwhite-1frame_material,0.628,0.806,0.868,0.924,0.6,0.602,0.628
+,,avg+3D,om5-3Dwhite-1frame_material,0.628,0.809,0.871,0.927,0.597,0.602,0.628
+,white+3Dx4,avg,om5-3Dwhite_material,0.63,0.816,0.874,0.924,0.598,0.601,0.63
+,,avg+3D,om5-3Dwhite_material,0.622,0.818,0.877,0.927,0.587,0.588,0.622
+Yes,white,avg,om5-white_material_bm-pretrn,0.605,0.791,0.855,0.912,0.572,0.586,0.605
+,,avg+3D,om5-white_material_bm-pretrn,0.592,0.784,0.847,0.909,0.557,0.582,0.592
+,white+3Dx1,avg,om5-3Dwhite-1frame_material_bm-pretrn,0.601,0.812,0.875,0.925,0.57,0.567,0.601
+,,avg+3D,om5-3Dwhite-1frame_material_bm-pretrn,0.622,0.809,0.875,0.926,0.595,0.598,0.622
+,white+3Dx4,avg,om5-3Dwhite_material_bm-pretrn,0.633,0.819,0.88,0.93,0.607,0.607,0.633
+,,avg+3D,om5-3Dwhite_material_bm-pretrn,0.642,0.824,0.879,0.929,0.627,0.632,0.642

material_min5_max1.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
+No,white,avg,om5-white_material,0.565,0.742,0.813,0.887,0.536,0.569,0.565
+,,avg+3D,om5-white_material,0.568,0.734,0.822,0.899,0.532,0.576,0.568
+,white+3Dx1,avg,om5-3Dwhite-1frame_material,0.604,0.774,0.84,0.915,0.58,0.581,0.604
+,,avg+3D,om5-3Dwhite-1frame_material,0.599,0.777,0.846,0.919,0.573,0.574,0.599
+,white+3Dx4,avg,om5-3Dwhite_material,0.593,0.792,0.849,0.911,0.565,0.563,0.593
+,,avg+3D,om5-3Dwhite_material,0.591,0.79,0.85,0.917,0.562,0.567,0.591
+Yes,white,avg,om5-white_material_bm-pretrn,0.569,0.767,0.831,0.902,0.54,0.547,0.569
+,,avg+3D,om5-white_material_bm-pretrn,0.562,0.757,0.827,0.897,0.526,0.542,0.562
+,white+3Dx1,avg,om5-3Dwhite-1frame_material_bm-pretrn,0.576,0.785,0.853,0.915,0.548,0.542,0.576
+,,avg+3D,om5-3Dwhite-1frame_material_bm-pretrn,0.594,0.779,0.852,0.91,0.57,0.576,0.594
+,white+3Dx4,avg,om5-3Dwhite_material_bm-pretrn,0.6,0.794,0.856,0.923,0.577,0.572,0.6
+,,avg+3D,om5-3Dwhite_material_bm-pretrn,0.606,0.792,0.852,0.915,0.593,0.597,0.606

material_min6.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
+No,white,avg,om6-white_material,0.632,0.817,0.878,0.927,0.606,0.598,0.632
+,,avg+3D,om6-white_material,0.632,0.811,0.869,0.924,0.602,0.593,0.632
+,white+3Dx1,avg,om6-3Dwhite-1frame_material,0.615,0.817,0.878,0.929,0.593,0.617,0.615
+,,avg+3D,om6-3Dwhite-1frame_material,0.62,0.818,0.88,0.928,0.597,0.623,0.62
+,white+3Dx4,avg,om6-3Dwhite_material,0.628,0.819,0.879,0.933,0.604,0.611,0.628
+,,avg+3D,om6-3Dwhite_material,0.624,0.819,0.88,0.933,0.597,0.598,0.624
+Yes,white,avg,om6-white_material_bm-pretrn,0.641,0.824,0.886,0.934,0.614,0.61,0.641
+,,avg+3D,om6-white_material_bm-pretrn,0.644,0.834,0.885,0.938,0.617,0.618,0.644
+,white+3Dx1,avg,om6-3Dwhite-1frame_material_bm-pretrn,0.622,0.818,0.88,0.935,0.593,0.59,0.622
+,,avg+3D,om6-3Dwhite-1frame_material_bm-pretrn,0.617,0.813,0.879,0.932,0.586,0.584,0.617
+,white+3Dx4,avg,om6-3Dwhite_material_bm-pretrn,0.64,0.824,0.879,0.936,0.615,0.615,0.64
+,,avg+3D,om6-3Dwhite_material_bm-pretrn,0.648,0.827,0.883,0.94,0.622,0.622,0.648

material_min6_max1.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
+No,white,avg,om6-white_material,0.603,0.793,0.854,0.911,0.58,0.572,0.603
+,,avg+3D,om6-white_material,0.6,0.79,0.856,0.91,0.574,0.568,0.6
+,white+3Dx1,avg,om6-3Dwhite-1frame_material,0.578,0.786,0.852,0.915,0.556,0.574,0.578
+,,avg+3D,om6-3Dwhite-1frame_material,0.581,0.793,0.858,0.917,0.558,0.58,0.581
+,white+3Dx4,avg,om6-3Dwhite_material,0.592,0.795,0.858,0.919,0.57,0.568,0.592
+,,avg+3D,om6-3Dwhite_material,0.597,0.793,0.858,0.922,0.571,0.567,0.597
+Yes,white,avg,om6-white_material_bm-pretrn,0.612,0.804,0.865,0.917,0.587,0.579,0.612
+,,avg+3D,om6-white_material_bm-pretrn,0.613,0.812,0.864,0.922,0.589,0.586,0.613
+,white+3Dx1,avg,om6-3Dwhite-1frame_material_bm-pretrn,0.598,0.791,0.864,0.919,0.573,0.566,0.598
+,,avg+3D,om6-3Dwhite-1frame_material_bm-pretrn,0.599,0.787,0.863,0.924,0.571,0.568,0.599
+,white+3Dx4,avg,om6-3Dwhite_material_bm-pretrn,0.597,0.793,0.859,0.924,0.575,0.575,0.597
+,,avg+3D,om6-3Dwhite_material_bm-pretrn,0.609,0.799,0.867,0.927,0.585,0.58,0.609

material_x_plus3Ds.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
+No,white,avg,om3-white_material,0.62,0.812,0.869,0.93,0.588,0.612,0.62
+,,avg+3D,om3-white_material,0.609,0.812,0.871,0.934,0.572,0.598,0.609
+,white+3Dx1,avg,om3-3Dwhite-1frame_material,0.608,0.793,0.861,0.924,0.586,0.584,0.608
+,,avg+3D,om3-3Dwhite-1frame_material,0.604,0.794,0.857,0.923,0.579,0.579,0.604
+,white+3Dx4,avg,om3-3Dwhite_material,0.618,0.809,0.871,0.929,0.596,0.598,0.618
+,,avg+3D,om3-3Dwhite_material,0.627,0.811,0.873,0.926,0.604,0.607,0.627
+Yes,white,avg,om3-white_material_bm-pretrn,0.62,0.817,0.883,0.933,0.583,0.579,0.62
+,,avg+3D,om3-white_material_bm-pretrn,0.628,0.825,0.883,0.936,0.592,0.599,0.628
+,white+3Dx1,avg,om3-3Dwhite-1frame_material_bm-pretrn,0.62,0.828,0.88,0.936,0.598,0.591,0.62
+,,avg+3D,om3-3Dwhite-1frame_material_bm-pretrn,0.619,0.819,0.878,0.931,0.602,0.604,0.619
+,white+3Dx4,avg,om3-3Dwhite_material_bm-pretrn,0.621,0.812,0.873,0.928,0.6,0.601,0.621
+,,avg+3D,om3-3Dwhite_material_bm-pretrn,0.624,0.809,0.873,0.935,0.609,0.624,0.624

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3c9863addf5ea52fec2daa05bb2ecbaa772b5750cfd62532070f4ff77b4269a
+size 43215124

object_name_min3.csv ADDED Viewed

	@@ -0,0 +1 @@


1	+ BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall

object_name_min3_max1.csv ADDED Viewed

	@@ -0,0 +1 @@


1	+ BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall

object_name_min4.csv ADDED Viewed

	@@ -0,0 +1,19 @@

+BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
+No,white,avg,om4-white_name,0.609,0.752,0.805,0.866,0.581,0.58,0.609
+,,avg+3D,om4-white_name,0.603,0.744,0.8,0.863,0.573,0.57,0.603
+,white+3Dx1,avg,om4-3Dwhite-1frame_name,0.584,0.743,0.792,0.858,0.548,0.543,0.584
+,,avg+3D,om4-3Dwhite-1frame_name,0.579,0.741,0.793,0.854,0.541,0.536,0.579
+,white+3Dx4,avg,om4-3Dwhite_name,0.563,0.716,0.777,0.843,0.54,0.558,0.563
+,,avg+3D,om4-3Dwhite_name,0.571,0.715,0.779,0.849,0.547,0.563,0.571
+Yes,white,avg,om4-white_name_bm-pretrn,0.58,0.739,0.798,0.867,0.547,0.546,0.58
+,,avg,om4-white_name_bm-pretrn-b,0.588,0.743,0.798,0.857,0.552,0.553,0.588
+,,avg+3D,om4-white_name_bm-pretrn,0.575,0.732,0.795,0.859,0.541,0.544,0.575
+,,avg+3D,om4-white_name_bm-pretrn-b,0.59,0.749,0.803,0.863,0.56,0.564,0.59
+,white+3Dx1,avg,om4-3Dwhite-1frame_name_bm-pretrn,0.587,0.742,0.789,0.857,0.552,0.548,0.587
+,,avg,om4-3Dwhite-1frame_name_bm-pretrn-b,0.589,0.74,0.801,0.861,0.557,0.558,0.589
+,,avg+3D,om4-3Dwhite-1frame_name_bm-pretrn,0.605,0.75,0.8,0.866,0.58,0.577,0.605
+,,avg+3D,om4-3Dwhite-1frame_name_bm-pretrn-b,0.594,0.749,0.801,0.871,0.573,0.585,0.594
+,white+3Dx4,avg,om4-3Dwhite_name_bm-pretrn,0.558,0.726,0.785,0.845,0.539,0.55,0.558
+,,avg,om4-3Dwhite_name_bm-pretrn-b,0.566,0.721,0.774,0.85,0.543,0.55,0.566
+,,avg+3D,om4-3Dwhite_name_bm-pretrn,0.55,0.718,0.781,0.839,0.538,0.572,0.55
+,,avg+3D,om4-3Dwhite_name_bm-pretrn-b,0.562,0.709,0.765,0.839,0.554,0.597,0.562

object_name_min4_max1.csv ADDED Viewed

	@@ -0,0 +1,19 @@

+BM pretrain,Train data,Test time method,config,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall
+No,white,avg,om4-white_name,0.575,0.722,0.779,0.845,0.551,0.55,0.575
+,,avg+3D,om4-white_name,0.568,0.721,0.779,0.841,0.542,0.543,0.568
+,white+3Dx1,avg,om4-3Dwhite-1frame_name,0.54,0.718,0.766,0.836,0.507,0.497,0.54
+,,avg+3D,om4-3Dwhite-1frame_name,0.544,0.712,0.771,0.834,0.507,0.496,0.544
+,white+3Dx4,avg,om4-3Dwhite_name,0.53,0.682,0.749,0.821,0.512,0.533,0.53
+,,avg+3D,om4-3Dwhite_name,0.535,0.686,0.75,0.828,0.515,0.536,0.535
+Yes,white,avg,om4-white_name_bm-pretrn,0.549,0.709,0.776,0.84,0.518,0.514,0.549
+,,avg,om4-white_name_bm-pretrn-b,0.55,0.722,0.769,0.841,0.518,0.516,0.55
+,,avg+3D,om4-white_name_bm-pretrn,0.544,0.703,0.771,0.842,0.511,0.511,0.544
+,,avg+3D,om4-white_name_bm-pretrn-b,0.554,0.721,0.778,0.844,0.529,0.534,0.554
+,white+3Dx1,avg,om4-3Dwhite-1frame_name_bm-pretrn,0.555,0.712,0.761,0.836,0.523,0.513,0.555
+,,avg,om4-3Dwhite-1frame_name_bm-pretrn-b,0.562,0.705,0.77,0.833,0.53,0.527,0.562
+,,avg+3D,om4-3Dwhite-1frame_name_bm-pretrn,0.568,0.72,0.772,0.843,0.547,0.546,0.568
+,,avg+3D,om4-3Dwhite-1frame_name_bm-pretrn-b,0.564,0.717,0.772,0.848,0.543,0.55,0.564
+,white+3Dx4,avg,om4-3Dwhite_name_bm-pretrn,0.517,0.68,0.752,0.815,0.498,0.507,0.517
+,,avg,om4-3Dwhite_name_bm-pretrn-b,0.516,0.67,0.733,0.823,0.499,0.514,0.516
+,,avg+3D,om4-3Dwhite_name_bm-pretrn,0.506,0.677,0.743,0.807,0.494,0.528,0.506
+,,avg+3D,om4-3Dwhite_name_bm-pretrn-b,0.518,0.66,0.726,0.807,0.508,0.544,0.518

object_name_min5.csv ADDED Viewed

	@@ -0,0 +1 @@


1	+ BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall

object_name_min5_max1.csv ADDED Viewed

	@@ -0,0 +1 @@


1	+ BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall

object_name_min6.csv ADDED Viewed

	@@ -0,0 +1 @@


1	+ BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall

object_name_min6_max1.csv ADDED Viewed

	@@ -0,0 +1 @@


1	+ BM pretrain,Train data,Test time method,Acc.,Top 3 Acc.,Top 5 Acc.,Top 10 Acc.,F1,Precision,Recall

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "crop_size": {
+    "height": 289,
+    "width": 289
+  },
+  "do_center_crop": false,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "EfficientNetImageProcessor",
+  "image_std": [
+    0.47853944,
+    0.4732864,
+    0.47434163
+  ],
+  "include_top": true,
+  "resample": 0,
+  "rescale_factor": 0.00392156862745098,
+  "rescale_offset": false,
+  "size": {
+    "height": 300,
+    "width": 300
+  }
+}

results.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc310ec19387e9a791da6356d5d50bd06c606c82ffd0f52ba6fe709f9154ce88
+size 191453

results_local.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb5f163e18e85fccf040281518e2bb04ed3af6d8542d59d802297affdc23a6e4
+size 191453

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf85df85ac87760d3117c6911e8713dacf4c0df3b705a14f35c5188bbf5c80d0
+size 4856

wandb/debug-cli.james.log ADDED Viewed

File without changes

wandb/debug-internal.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/debug.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20240214_112422-hfwsgqj3/files/config.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20240214_112422-hfwsgqj3/files/output.log ADDED Viewed

	@@ -0,0 +1,33 @@

+[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
+[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
+[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
+[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
+[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
+Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:
+- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([3872, 1536]) in the model instantiated
+- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3872]) in the model instantiated
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
+[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
+[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
+[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
+[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
+Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:
+- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([3872, 1536]) in the model instantiated
+- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3872]) in the model instantiated
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
+[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
+[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
+[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
+[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
+[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
+[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
+[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
+[{"variableName": "data_config", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "ds", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "file2obj", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "file2obj_3", "type": "pandas", "supportedEngines": ["pandas"]}, {"variableName": "labels", "type": "list", "supportedEngines": ["pandas"]}, {"variableName": "obj_num_counts", "type": "series", "supportedEngines": ["pandas"]}, {"variableName": "output", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_class_indices", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "top5_probabilities", "type": "tensor", "supportedEngines": ["pandas"]}, {"variableName": "train_val", "type": "dictionary", "supportedEngines": ["pandas"]}, {"variableName": "trainval_test", "type": "dictionary", "supportedEngines": ["pandas"]}]
+Some weights of EfficientNetForImageClassification were not initialized from the model checkpoint at google/efficientnet-b3 and are newly initialized because the shapes did not match:
+- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([3872, 1536]) in the model instantiated
+- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3872]) in the model instantiated
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes
+[34m[1mwandb[39m[22m: [33mWARNING[39m Serializing object of type dict that is 147552 bytes

wandb/run-20240214_112422-hfwsgqj3/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,202 @@

+accelerate==0.27.2
+aiohttp==3.9.3
+aiosignal==1.3.1
+alabaster==0.7.16
+anyio==4.2.0
+appdirs==1.4.4
+argon2-cffi-bindings==21.2.0
+argon2-cffi==23.1.0
+arrow==1.3.0
+artifact-classification==0.0.1
+asttokens==2.4.1
+async-lru==2.0.4
+async-timeout==4.0.3
+attrs==23.2.0
+babel==2.14.0
+beautifulsoup4==4.12.3
+black==24.1.1
+bleach==6.1.0
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+click==8.1.7
+comm==0.2.1
+contourpy==1.2.0
+coverage==7.4.1
+cycler==0.12.1
+datasets==2.17.0
+debugpy==1.8.0
+decorator==5.1.1
+defusedxml==0.7.1
+dill==0.3.8
+docker-pycreds==0.4.0
+docutils==0.20.1
+easydict==1.11
+et-xmlfile==1.1.0
+evaluate==0.4.1
+exceptiongroup==1.2.0
+executing==2.0.1
+fastjsonschema==2.19.1
+filelock==3.13.1
+flake8==7.0.0
+fonttools==4.48.1
+fqdn==1.5.1
+frozenlist==1.4.1
+fsspec==2023.10.0
+gdown==5.1.0
+gitdb==4.0.11
+gitpython==3.1.41
+h11==0.14.0
+httpcore==1.0.2
+httpx==0.26.0
+huggingface-hub==0.20.3
+idna==3.6
+imagesize==1.4.1
+ipykernel==6.29.2
+ipython==8.21.0
+ipywidgets==8.1.2
+isoduration==20.11.0
+isort==5.13.2
+jedi==0.19.1
+jinja2==3.1.3
+joblib==1.3.2
+json5==0.9.14
+jsonpointer==2.4
+jsonschema-specifications==2023.12.1
+jsonschema==4.21.1
+jupyter-client==8.6.0
+jupyter-console==6.6.3
+jupyter-core==5.7.1
+jupyter-events==0.9.0
+jupyter-lsp==2.2.2
+jupyter-server-terminals==0.5.2
+jupyter-server==2.12.5
+jupyter==1.0.0
+jupyterlab-pygments==0.3.0
+jupyterlab-server==2.25.2
+jupyterlab-widgets==3.0.10
+jupyterlab==4.1.1
+kiwisolver==1.4.5
+kornia==0.7.1
+loguru==0.7.2
+markupsafe==2.1.5
+matplotlib-inline==0.1.6
+matplotlib==3.8.2
+mccabe==0.7.0
+mistune==3.0.2
+mpmath==1.3.0
+multidict==6.0.5
+multiprocess==0.70.16
+mypy-extensions==1.0.0
+nbclient==0.9.0
+nbconvert==7.16.0
+nbformat==5.9.2
+nest-asyncio==1.6.0
+networkx==3.2.1
+notebook-shim==0.2.3
+notebook==7.1.0
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.19.3
+nvidia-nvjitlink-cu12==12.3.101
+nvidia-nvtx-cu12==12.1.105
+opencv-python==4.9.0.80
+openpyxl==3.1.2
+overrides==7.7.0
+packaging==23.2
+pandas==2.2.0
+pandocfilters==1.5.1
+parso==0.8.3
+pathspec==0.12.1
+pexpect==4.9.0
+pillow==10.2.0
+pip==24.0
+platformdirs==4.2.0
+prometheus-client==0.19.0
+prompt-toolkit==3.0.43
+protobuf==4.25.2
+psutil==5.9.8
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow-hotfix==0.6
+pyarrow==15.0.0
+pycodestyle==2.11.1
+pycparser==2.21
+pyflakes==3.2.0
+pygments==2.17.2
+pyparsing==3.1.1
+pysocks==1.7.1
+python-dateutil==2.8.2
+python-dotenv==1.0.1
+python-json-logger==2.0.7
+pytz==2024.1
+pyyaml==6.0.1
+pyzmq==25.1.2
+qtconsole==5.5.1
+qtpy==2.4.1
+referencing==0.33.0
+regex==2023.12.25
+requests==2.31.0
+responses==0.18.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rpds-py==0.17.1
+safetensors==0.4.2
+scikit-learn==1.4.0
+scipy==1.12.0
+send2trash==1.8.2
+sentry-sdk==1.40.4
+setproctitle==1.3.3
+setuptools==69.1.0
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.0
+snowballstemmer==2.2.0
+soupsieve==2.5
+sphinx==7.2.6
+sphinxcontrib-applehelp==1.0.8
+sphinxcontrib-devhelp==1.0.6
+sphinxcontrib-htmlhelp==2.0.5
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==1.0.7
+sphinxcontrib-serializinghtml==1.1.10
+stack-data==0.6.3
+sympy==1.12
+terminado==0.18.0
+threadpoolctl==3.2.0
+timm==0.9.12
+tinycss2==1.2.1
+tokenizers==0.15.2
+tomli==2.0.1
+torch==2.2.0
+torchvision==0.17.0
+tornado==6.4
+tqdm==4.66.1
+traitlets==5.14.1
+transformers==4.37.2
+transparent-background==1.2.12
+triton==2.2.0
+typer==0.9.0
+types-python-dateutil==2.8.19.20240106
+typing-extensions==4.9.0
+tzdata==2023.4
+uri-template==1.3.0
+urllib3==2.2.0
+wandb==0.16.3
+wcwidth==0.2.13
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.7.0
+wget==3.2
+wheel==0.42.0
+widgetsnbextension==4.0.10
+xxhash==3.4.1
+yarl==1.9.4