obichimav
/

SemanticSegmentationModel

Model card Files Files and versions Community

obichimav commited on Jan 23

Commit

8e5d8c7

verified ·

1 Parent(s): 062ecd1

Upload 42 files

Browse files

Pytorch Segmentation pipeline. use to train/predict binary/multi/raster images

Files changed (42) hide show

README.md +52 -0
examples/.ipynb_checkpoints/predict-checkpoint.ipynb +255 -0
examples/.ipynb_checkpoints/train-checkpoint.ipynb +113 -0
examples/predict.ipynb +255 -0
examples/train.ipynb +113 -0
requirements.txt +15 -0
semantic-segmentation/SemanticModel/.ipynb_checkpoints/custom_losses-checkpoint.py +97 -0
semantic-segmentation/SemanticModel/.ipynb_checkpoints/data_loader-checkpoint.py +129 -0
semantic-segmentation/SemanticModel/.ipynb_checkpoints/encoder_management-checkpoint.py +136 -0
semantic-segmentation/SemanticModel/.ipynb_checkpoints/evaluation_utils-checkpoint.py +108 -0
semantic-segmentation/SemanticModel/.ipynb_checkpoints/image_preprocessing-checkpoint.py +81 -0
semantic-segmentation/SemanticModel/.ipynb_checkpoints/metrics-checkpoint.py +94 -0
semantic-segmentation/SemanticModel/.ipynb_checkpoints/model_core-checkpoint.py +129 -0
semantic-segmentation/SemanticModel/.ipynb_checkpoints/prediction-checkpoint.py +336 -0
semantic-segmentation/SemanticModel/.ipynb_checkpoints/training-checkpoint.py +313 -0
semantic-segmentation/SemanticModel/.ipynb_checkpoints/utilities-checkpoint.py +119 -0
semantic-segmentation/SemanticModel/.ipynb_checkpoints/visualization-checkpoint.py +115 -0
semantic-segmentation/SemanticModel/__init__.py +0 -0
semantic-segmentation/SemanticModel/__pycache__/__init__.cpython-38.pyc +0 -0
semantic-segmentation/SemanticModel/__pycache__/custom_losses.cpython-38.pyc +0 -0
semantic-segmentation/SemanticModel/__pycache__/data_loader.cpython-38.pyc +0 -0
semantic-segmentation/SemanticModel/__pycache__/encoder_management.cpython-38.pyc +0 -0
semantic-segmentation/SemanticModel/__pycache__/evaluation_utils.cpython-38.pyc +0 -0
semantic-segmentation/SemanticModel/__pycache__/image_preprocessing.cpython-38.pyc +0 -0
semantic-segmentation/SemanticModel/__pycache__/metrics.cpython-38.pyc +0 -0
semantic-segmentation/SemanticModel/__pycache__/model_core.cpython-38.pyc +0 -0
semantic-segmentation/SemanticModel/__pycache__/prediction.cpython-38.pyc +0 -0
semantic-segmentation/SemanticModel/__pycache__/training.cpython-38.pyc +0 -0
semantic-segmentation/SemanticModel/__pycache__/utilities.cpython-38.pyc +0 -0
semantic-segmentation/SemanticModel/__pycache__/visualization.cpython-38.pyc +0 -0
semantic-segmentation/SemanticModel/custom_losses.py +97 -0
semantic-segmentation/SemanticModel/data_loader.py +129 -0
semantic-segmentation/SemanticModel/encoder_management.py +136 -0
semantic-segmentation/SemanticModel/evaluation_utils.py +108 -0
semantic-segmentation/SemanticModel/image_preprocessing.py +81 -0
semantic-segmentation/SemanticModel/metrics.py +94 -0
semantic-segmentation/SemanticModel/model_core.py +129 -0
semantic-segmentation/SemanticModel/prediction.py +336 -0
semantic-segmentation/SemanticModel/training.py +313 -0
semantic-segmentation/SemanticModel/utilities.py +119 -0
semantic-segmentation/SemanticModel/visualization.py +115 -0
setup.py +34 -0

README.md ADDED Viewed

	@@ -0,0 +1,52 @@

+# SemanticModel
+Deep learning framework for semantic segmentation using PyTorch.
+## Install
+```bash
+pip install -r requirements.txt
+python setup.py install
+```
+## Usage
+```python
+from SemanticModel.model_core import SegmentationModel
+from SemanticModel.prediction import PredictionPipeline
+# Train
+model = SegmentationModel(
+    classes=['background', 'object'],
+    architecture='unet',
+    encoder='timm-regnety_120'
+)
+trainer = ModelTrainer(
+    model_config=model,
+    root_dir='path/to/dataset',
+    epochs=40
+)
+model, metrics = trainer.train()
+# Predict
+predictor = PredictionPipeline(model)
+predictor.predict_single_image('image.jpg')
+predictor.predict_directory('image_dir/')
+predictor.predict_raster('raster.tif')
+# Load pretrained
+model = SegmentationModel(
+    classes=['background', 'object'],
+    weights='path/to/best_model.pth'
+)
+```
+## Data Structure
+```
+dataset/
+├── train/
+│   ├── Images/
+│   └── Masks/
+└── val/
+    ├── Images/
+    └── Masks/
+```

examples/.ipynb_checkpoints/predict-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,255 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "42e4027f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/jovyan/shared/Chima/ml_project/repos/MYSMP\n"
+     ]
+    }
+   ],
+   "source": [
+    "%cd \"../../MYSMP\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ef6ea33e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %pip install -r requirements.txt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "30812aed",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/jovyan/shared/Chima/ml_project/repos/MYSMP/semantic-segmentation\n"
+     ]
+    }
+   ],
+   "source": [
+    "%cd \"../MYSMP/semantic-segmentation\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "aaa4a036",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/jovyan/shared/Chima/ml_project/repos/MYSMP/semantic-segmentation\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "099239c4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading pretrained model...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/srv/conda/envs/notebook/lib/python3.8/site-packages/segmentation_models_pytorch/base/modules.py:116: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
+      "  return self.activation(x)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Initialize the model\n",
+    "from SemanticModel.model_core import SegmentationModel\n",
+    "\n",
+    "model = SegmentationModel(\n",
+    "    classes=['bg', 'cacao', 'matarraton', 'abarco'],\n",
+    "    architecture='unet',\n",
+    "    encoder='timm-regnety_120',\n",
+    "    weights='../data/model_outputs-unet[timm-regnety_120]-01-23-2025_075803/best_model.pth'\n",
+    ")\n",
+    "\n",
+    "# Initialize prediction pipeline\n",
+    "from SemanticModel.prediction import PredictionPipeline\n",
+    "\n",
+    "predictor = PredictionPipeline(model)\n",
+    "output_dir='../predictions'\n",
+    "image_path= '../data/Images/2019-Mission2-odm_1_42.jpg'\n",
+    "\n",
+    "# Make prediction\n",
+    "prediction = predictor.predict_single_image(image_path=image_path,output_dir=output_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "e0ae4c21",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Predictions saved to: path/to/folderofImages/predictions\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'path/to/folderofImages/predictions'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Directory of images\n",
+    "image_path= 'path/to/folderofImages'\n",
+    "predictor.predict_directory(image_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "fc4f2a48",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading raster...\n",
+      "Processed 6/6 tiles\n",
+      "Prediction saved to: ../predictions/prediction.tif\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(array([[[0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         ...,\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0]],\n",
+       " \n",
+       "        [[0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         ...,\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0]],\n",
+       " \n",
+       "        [[0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         ...,\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0]],\n",
+       " \n",
+       "        ...,\n",
+       " \n",
+       "        [[0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         ...,\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0]],\n",
+       " \n",
+       "        [[0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         ...,\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0]],\n",
+       " \n",
+       "        [[0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         ...,\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0]]], dtype=uint8),\n",
+       " {'driver': 'GTiff', 'dtype': 'uint8', 'nodata': None, 'width': 2365, 'height': 1797, 'count': 3, 'crs': CRS.from_epsg(32618), 'transform': Affine(0.03564594364277113, 0.0, 740295.5186183113,\n",
+       "        0.0, -0.03564594364276106, 485117.0212715292), 'tiled': False, 'interleave': 'pixel'})"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Large raster\n",
+    "output_path='../predictions/prediction.tif'\n",
+    "raster_path = '../data/2021-Mission7_clipped_2.tif'\n",
+    "predictor.predict_raster(raster_path, tile_size=1024,output_path=output_path,format='color')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8e4ab5a5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "AgLab - Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

examples/.ipynb_checkpoints/train-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,113 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "333ede5f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%cd \"../../MYSMP\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "84d4c945",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %pip install -r requirements.txt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7b088ef9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%cd \"../MYSMP/semantic-segmentation\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99937292",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from SemanticModel.model_core import SegmentationModel\n",
+    "from SemanticModel.training import ModelTrainer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab69a291",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# initialization loss function\n",
+    "model = SegmentationModel(\n",
+    "    classes=['bg', 'cacao', 'matarraton', 'abarco'],\n",
+    "    architecture='unet',\n",
+    "    encoder='timm-regnety_120',\n",
+    "    weights='imagenet',\n",
+    "    loss='dice'  # Try 'dice' or 'tversky' instead of default\n",
+    ")\n",
+    "\n",
+    "# training parameters\n",
+    "trainer = ModelTrainer(\n",
+    "    model_config=model,\n",
+    "    root_dir='../data',\n",
+    "    epochs=100,\n",
+    "    train_size=1024,\n",
+    "    batch_size=4,\n",
+    "    learning_rate=1e-3,  # Increased learning rate\n",
+    "    step_count=3,        # More learning rate adjustments\n",
+    "    decay_factor=0.5     # Stronger decay\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "38fc7c6f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trained_model, metrics = trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a053c2ae",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "AgLab - Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

examples/predict.ipynb ADDED Viewed

	@@ -0,0 +1,255 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "42e4027f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/jovyan/shared/Chima/ml_project/repos/MYSMP\n"
+     ]
+    }
+   ],
+   "source": [
+    "%cd \"../../MYSMP\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ef6ea33e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %pip install -r requirements.txt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "30812aed",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/jovyan/shared/Chima/ml_project/repos/MYSMP/semantic-segmentation\n"
+     ]
+    }
+   ],
+   "source": [
+    "%cd \"../MYSMP/semantic-segmentation\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "aaa4a036",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/jovyan/shared/Chima/ml_project/repos/MYSMP/semantic-segmentation\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "099239c4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading pretrained model...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/srv/conda/envs/notebook/lib/python3.8/site-packages/segmentation_models_pytorch/base/modules.py:116: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
+      "  return self.activation(x)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Initialize the model\n",
+    "from SemanticModel.model_core import SegmentationModel\n",
+    "\n",
+    "model = SegmentationModel(\n",
+    "    classes=['bg', 'cacao', 'matarraton', 'abarco'],\n",
+    "    architecture='unet',\n",
+    "    encoder='timm-regnety_120',\n",
+    "    weights='../data/model_outputs-unet[timm-regnety_120]-01-23-2025_075803/best_model.pth'\n",
+    ")\n",
+    "\n",
+    "# Initialize prediction pipeline\n",
+    "from SemanticModel.prediction import PredictionPipeline\n",
+    "\n",
+    "predictor = PredictionPipeline(model)\n",
+    "output_dir='../predictions'\n",
+    "image_path= '../data/Images/2019-Mission2-odm_1_42.jpg'\n",
+    "\n",
+    "# Make prediction\n",
+    "prediction = predictor.predict_single_image(image_path=image_path,output_dir=output_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "e0ae4c21",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Predictions saved to: path/to/folderofImages/predictions\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'path/to/folderofImages/predictions'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Directory of images\n",
+    "image_path= 'path/to/folderofImages'\n",
+    "predictor.predict_directory(image_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "fc4f2a48",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading raster...\n",
+      "Processed 6/6 tiles\n",
+      "Prediction saved to: ../predictions/prediction.tif\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(array([[[0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         ...,\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0]],\n",
+       " \n",
+       "        [[0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         ...,\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0]],\n",
+       " \n",
+       "        [[0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         ...,\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0]],\n",
+       " \n",
+       "        ...,\n",
+       " \n",
+       "        [[0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         ...,\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0]],\n",
+       " \n",
+       "        [[0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         ...,\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0]],\n",
+       " \n",
+       "        [[0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         ...,\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0],\n",
+       "         [0, 0, 0]]], dtype=uint8),\n",
+       " {'driver': 'GTiff', 'dtype': 'uint8', 'nodata': None, 'width': 2365, 'height': 1797, 'count': 3, 'crs': CRS.from_epsg(32618), 'transform': Affine(0.03564594364277113, 0.0, 740295.5186183113,\n",
+       "        0.0, -0.03564594364276106, 485117.0212715292), 'tiled': False, 'interleave': 'pixel'})"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Large raster\n",
+    "output_path='../predictions/prediction.tif'\n",
+    "raster_path = '../data/2021-Mission7_clipped_2.tif'\n",
+    "predictor.predict_raster(raster_path, tile_size=1024,output_path=output_path,format='color')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8e4ab5a5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "AgLab - Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

examples/train.ipynb ADDED Viewed

	@@ -0,0 +1,113 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "333ede5f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%cd \"../../MYSMP\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "84d4c945",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %pip install -r requirements.txt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7b088ef9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%cd \"../MYSMP/semantic-segmentation\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99937292",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from SemanticModel.model_core import SegmentationModel\n",
+    "from SemanticModel.training import ModelTrainer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab69a291",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# initialization loss function\n",
+    "model = SegmentationModel(\n",
+    "    classes=['bg', 'cacao', 'matarraton', 'abarco'],\n",
+    "    architecture='unet',\n",
+    "    encoder='timm-regnety_120',\n",
+    "    weights='imagenet',\n",
+    "    loss='dice'  # Try 'dice' or 'tversky' instead of default\n",
+    ")\n",
+    "\n",
+    "# training parameters\n",
+    "trainer = ModelTrainer(\n",
+    "    model_config=model,\n",
+    "    root_dir='../data',\n",
+    "    epochs=100,\n",
+    "    train_size=1024,\n",
+    "    batch_size=4,\n",
+    "    learning_rate=1e-3,  # Increased learning rate\n",
+    "    step_count=3,        # More learning rate adjustments\n",
+    "    decay_factor=0.5     # Stronger decay\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "38fc7c6f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trained_model, metrics = trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a053c2ae",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "AgLab - Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+torch
+torchvision
+tensorboard
+pyproj
+fiona==1.8.20
+rtree
+geopandas
+rasterio
+slidingwindow
+opencv-python
+wandb
+tifffile
+imagecodecs
+albumentations
+segmentation-models-pytorch>=0.3.3

semantic-segmentation/SemanticModel/.ipynb_checkpoints/custom_losses-checkpoint.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import torch
+import torch.nn.functional as F
+from segmentation_models_pytorch.utils import base
+from segmentation_models_pytorch.base.modules import Activation
+class FocalLossFunction(base.Loss):
+    def __init__(self, activation=None, alpha=0.25, gamma=1.5, reduction='mean', **kwargs):
+        super().__init__(**kwargs)
+        self.activation = Activation(activation)
+        self.alpha = alpha
+        self.gamma = gamma
+        self.reduction = reduction
+    def forward(self, inputs, targets):
+        if inputs.shape[1] == 1:  # Binary case
+            inputs = torch.cat((inputs, 1 - inputs), dim=1)
+            targets = torch.cat((targets, 1 - targets), dim=1)
+        targets = torch.argmax(targets, dim=1)
+        cross_entropy = F.cross_entropy(inputs, targets, reduction='none')
+        probability = torch.exp(-cross_entropy)
+        alpha_factor = self.alpha if inputs.shape[1] > 1 else torch.where(
+            targets == 1, 1-self.alpha, self.alpha)
+        focal_weight = alpha_factor * (1 - probability) ** self.gamma * cross_entropy
+        if self.reduction == 'mean':
+            return focal_weight.mean()
+        elif self.reduction == 'sum':
+            return focal_weight.sum()
+        return focal_weight
+class TverskyLossFunction(base.Loss):
+    def __init__(self, activation=None, alpha=0.5, beta=0.5, ignore_channels=None,
+                 reduction='mean', **kwargs):
+        super().__init__(**kwargs)
+        self.activation = Activation(activation)
+        self.alpha = alpha
+        self.beta = beta
+        self.ignore_channels = ignore_channels
+        self.reduction = reduction
+    def forward(self, inputs, targets):
+        if self.ignore_channels is not None:
+            mask = torch.ones(inputs.shape[1], dtype=torch.bool, device=inputs.device)
+            mask[self.ignore_channels] = False
+            inputs = inputs[:, mask, ...]
+        num_classes = inputs.shape[1]
+        inputs_softmax = (torch.sigmoid(inputs) if num_classes == 1
+                         else F.softmax(inputs, dim=1))
+        if num_classes == 1:
+            inputs_softmax = inputs_softmax.squeeze(1)
+            targets = targets.squeeze(1)
+        tversky_loss = 0
+        for class_idx in range(num_classes):
+            if num_classes == 1:
+                flat_inputs = inputs_softmax.reshape(-1)
+                flat_targets = targets.reshape(-1)
+            else:
+                flat_inputs = inputs_softmax[:, class_idx].reshape(-1)
+                flat_targets = targets[:, class_idx].reshape(-1)
+            intersection = (flat_inputs * flat_targets).sum()
+            fps = ((1 - flat_targets) * flat_inputs).sum()
+            fns = (flat_targets * (1 - flat_inputs)).sum()
+            tversky_index = intersection + self.alpha * fps + self.beta * fns + 1e-10
+            tversky_loss += 1 - intersection / tversky_index
+        if self.reduction == 'mean':
+            return tversky_loss / (1 if num_classes == 1 else num_classes)
+        elif self.reduction == 'sum':
+            return tversky_loss
+        return tversky_loss / inputs.shape[0]
+class EnhancedCrossEntropy(base.Loss):
+    def __init__(self, activation=None, ignore_channels=None, reduction='mean', **kwargs):
+        super().__init__(**kwargs)
+        self.activation = Activation(activation)
+        self.ignore_channels = ignore_channels
+        self.reduction = reduction
+    def forward(self, inputs, targets):
+        inputs = self.activation(inputs)
+        if self.ignore_channels is not None:
+            mask = torch.ones(inputs.shape[1], dtype=torch.bool, device=inputs.device)
+            mask[self.ignore_channels] = False
+            inputs = inputs[:, mask, ...]
+        if targets.dim() == 4:  # Convert one-hot to class indices
+            targets = torch.argmax(targets, dim=1)
+        return F.cross_entropy(inputs, targets, reduction=self.reduction)

semantic-segmentation/SemanticModel/.ipynb_checkpoints/data_loader-checkpoint.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import os
+import cv2
+import numpy as np
+from torch.utils.data import Dataset as BaseDataset
+class SegmentationDataset(BaseDataset):
+    """Dataset class for semantic segmentation task."""
+    def __init__(self, data_dir, classes=['background', 'object'],
+                 augmentation=None, preprocessing=None):
+        self.image_dir = os.path.join(data_dir, 'Images')
+        self.mask_dir = os.path.join(data_dir, 'Masks')
+        for dir_path in [self.image_dir, self.mask_dir]:
+            if not os.path.exists(dir_path):
+                raise FileNotFoundError(f"Directory not found: {dir_path}")
+        self.filenames = self._get_filenames()
+        self.image_paths = [os.path.join(self.image_dir, fname) for fname in self.filenames]
+        self.mask_paths = self._get_mask_paths()
+        self.target_classes = [cls for cls in classes if cls.lower() != 'background']
+        self.class_values = [i for i, cls in enumerate(classes) if cls.lower() != 'background']
+        self.augmentation = augmentation
+        self.preprocessing = preprocessing
+    def __getitem__(self, index):
+        image = self._load_image(self.image_paths[index])
+        mask = self._load_mask(self.mask_paths[index])
+        if self.augmentation:
+            processed = self.augmentation(image=image, mask=mask)
+            image, mask = processed['image'], processed['mask']
+        if self.preprocessing:
+            processed = self.preprocessing(image=image, mask=mask)
+            image, mask = processed['image'], processed['mask']
+        return image, mask
+    def __len__(self):
+        return len(self.filenames)
+    def _get_filenames(self):
+        """Returns sorted list of filenames, excluding directories."""
+        files = sorted(os.listdir(self.image_dir))
+        return [f for f in files if not os.path.isdir(os.path.join(self.image_dir, f))]
+    def _get_mask_paths(self):
+        """Generates corresponding mask paths for each image."""
+        mask_paths = []
+        for image_file in self.filenames:
+            name, _ = os.path.splitext(image_file)
+            mask_paths.append(os.path.join(self.mask_dir, f"{name}.png"))
+        return mask_paths
+    def _load_image(self, path):
+        """Loads and converts image to RGB."""
+        image = cv2.imread(path)
+        return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    def _load_mask(self, path):
+        """Loads and processes segmentation mask."""
+        mask = cv2.imread(path, 0)
+        masks = [(mask == value) for value in self.class_values]
+        mask = np.stack(masks, axis=-1).astype('float')
+        return mask
+class InferenceDataset(BaseDataset):
+    """Dataset class for inference without ground truth masks."""
+    def __init__(self, data_dir, classes=['background', 'object'],
+                 augmentation=None, preprocessing=None):
+        self.filenames = sorted([
+            f for f in os.listdir(data_dir)
+            if not os.path.isdir(os.path.join(data_dir, f))
+        ])
+        self.image_paths = [os.path.join(data_dir, fname) for fname in self.filenames]
+        self.target_classes = [cls for cls in classes if cls.lower() != 'background']
+        self.class_values = [i for i, cls in enumerate(classes) if cls.lower() != 'background']
+        self.augmentation = augmentation
+        self.preprocessing = preprocessing
+    def __getitem__(self, index):
+        image = cv2.imread(self.image_paths[index])
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        original_height, original_width = image.shape[:2]
+        if self.augmentation:
+            image = self.augmentation(image=image)['image']
+        if self.preprocessing:
+            image = self.preprocessing(image=image)['image']
+        return image, original_height, original_width
+    def __len__(self):
+        return len(self.filenames)
+class StreamingDataset(BaseDataset):
+    """Dataset class optimized for video frame processing."""
+    def __init__(self, data_dir, classes=['background', 'object'],
+                 augmentation=None, preprocessing=None):
+        self.filenames = self._get_frame_filenames(data_dir)
+        self.image_paths = [os.path.join(data_dir, fname) for fname in self.filenames]
+        self.target_classes = [cls for cls in classes if cls.lower() != 'background']
+        self.class_values = [i for i, cls in enumerate(classes) if cls.lower() != 'background']
+        self.augmentation = augmentation
+        self.preprocessing = preprocessing
+    def _get_frame_filenames(self, directory):
+        """Returns sorted list of frame filenames."""
+        files = sorted(os.listdir(directory))
+        return [f for f in files if (('frame' in f or 'Image' in f) and
+                                   f.lower().endswith('jpg') and
+                                   not os.path.isdir(os.path.join(directory, f)))]
+    def __getitem__(self, index):
+        return InferenceDataset.__getitem__(self, index)
+    def __len__(self):
+        return len(self.filenames)

semantic-segmentation/SemanticModel/.ipynb_checkpoints/encoder_management-checkpoint.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import os
+import ssl
+import shutil
+import tempfile
+import hashlib
+from tqdm import tqdm
+from torch.hub import get_dir
+from urllib.request import urlopen, Request
+from segmentation_models_pytorch.encoders import (
+    resnet_encoders, dpn_encoders, vgg_encoders, senet_encoders,
+    densenet_encoders, inceptionresnetv2_encoders, inceptionv4_encoders,
+    efficient_net_encoders, mobilenet_encoders, xception_encoders,
+    timm_efficientnet_encoders, timm_resnest_encoders, timm_res2net_encoders,
+    timm_regnet_encoders, timm_sknet_encoders, timm_mobilenetv3_encoders,
+    timm_gernet_encoders
+)
+from segmentation_models_pytorch.encoders.timm_universal import TimmUniversalEncoder
+def initialize_encoders():
+    """Initialize dictionary of available encoders."""
+    available_encoders = {}
+    encoder_modules = [
+        resnet_encoders, dpn_encoders, vgg_encoders, senet_encoders,
+        densenet_encoders, inceptionresnetv2_encoders, inceptionv4_encoders,
+        efficient_net_encoders, mobilenet_encoders, xception_encoders,
+        timm_efficientnet_encoders, timm_resnest_encoders, timm_res2net_encoders,
+        timm_regnet_encoders, timm_sknet_encoders, timm_mobilenetv3_encoders,
+        timm_gernet_encoders
+    ]
+    for module in encoder_modules:
+        available_encoders.update(module)
+    try:
+        import segmentation_models_pytorch
+        from packaging import version
+        if version.parse(segmentation_models_pytorch.__version__) >= version.parse("0.3.3"):
+            from segmentation_models_pytorch.encoders.mix_transformer import mix_transformer_encoders
+            from segmentation_models_pytorch.encoders.mobileone import mobileone_encoders
+            available_encoders.update(mix_transformer_encoders)
+            available_encoders.update(mobileone_encoders)
+    except ImportError:
+        pass
+    return available_encoders
+def download_weights(url, destination, hash_prefix=None, show_progress=True):
+    """Downloads model weights with progress tracking and verification."""
+    ssl._create_default_https_context = ssl._create_unverified_context
+    req = Request(url, headers={"User-Agent": "torch.hub"})
+    response = urlopen(req)
+    content_length = response.headers.get("Content-Length")
+    file_size = int(content_length[0]) if content_length else None
+    destination = os.path.expanduser(destination)
+    temp_file = tempfile.NamedTemporaryFile(delete=False, dir=os.path.dirname(destination))
+    try:
+        hasher = hashlib.sha256() if hash_prefix else None
+        with tqdm(total=file_size, disable=not show_progress,
+                 unit='B', unit_scale=True, unit_divisor=1024) as pbar:
+            while True:
+                buffer = response.read(8192)
+                if not buffer:
+                    break
+                temp_file.write(buffer)
+                if hasher:
+                    hasher.update(buffer)
+                pbar.update(len(buffer))
+        temp_file.close()
+        if hasher and hash_prefix:
+            digest = hasher.hexdigest()
+            if digest[:len(hash_prefix)] != hash_prefix:
+                raise RuntimeError(f'Invalid hash value (expected "{hash_prefix}", got "{digest}")')
+        shutil.move(temp_file.name, destination)
+    finally:
+        temp_file.close()
+        if os.path.exists(temp_file.name):
+            os.remove(temp_file.name)
+def initialize_encoder(name, in_channels=3, depth=5, weights=None, output_stride=32, **kwargs):
+    """Initializes and returns configured encoder."""
+    encoders = initialize_encoders()
+    if name.startswith("tu-"):
+        name = name[3:]
+        return TimmUniversalEncoder(
+            name=name,
+            in_channels=in_channels,
+            depth=depth,
+            output_stride=output_stride,
+            pretrained=weights is not None,
+            **kwargs
+        )
+    try:
+        encoder_config = encoders[name]
+    except KeyError:
+        raise KeyError(f"Invalid encoder name '{name}'. Available encoders: {list(encoders.keys())}")
+    encoder_class = encoder_config["encoder"]
+    encoder_params = encoder_config["params"]
+    encoder_params.update(depth=depth)
+    if weights:
+        try:
+            weights_config = encoder_config["pretrained_settings"][weights]
+        except KeyError:
+            raise KeyError(
+                f"Invalid weights '{weights}' for encoder '{name}'. "
+                f"Available options: {list(encoder_config['pretrained_settings'].keys())}"
+            )
+        cache_dir = os.path.join(get_dir(), 'checkpoints')
+        os.makedirs(cache_dir, exist_ok=True)
+        weights_file = os.path.basename(weights_config["url"])
+        weights_path = os.path.join(cache_dir, weights_file)
+        if not os.path.exists(weights_path):
+            print(f'Downloading {weights_file}...')
+            download_weights(
+                weights_config["url"].replace("https", "http"),
+                weights_path
+            )
+    return encoder_class(**encoder_params)

semantic-segmentation/SemanticModel/.ipynb_checkpoints/evaluation_utils-checkpoint.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+import cv2
+import torch
+from tqdm import tqdm
+from torch.utils.data import DataLoader
+from segmentation_models_pytorch.base.modules import Activation
+from SemanticModel.data_loader import SegmentationDataset
+from SemanticModel.metrics import compute_mean_iou
+from SemanticModel.image_preprocessing import get_validation_augmentations
+def evaluate_model(model_config, data_path, image_size=None):
+    """Evaluates model performance on a dataset."""
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    classes = ['background'] + model_config.classes if model_config.background_flag else model_config.classes
+    data_path = os.path.realpath(data_path)
+    image_subdir = os.path.join(data_path, 'Images')
+    mask_subdir = os.path.join(data_path, 'Masks')
+    if not all(os.path.exists(d) for d in [image_subdir, mask_subdir]):
+        raise Exception("Missing required subdirectories: 'Images' and 'Masks'")
+    if not image_size:
+        sample_image = cv2.imread(os.path.join(image_subdir, os.listdir(image_subdir)[0]))
+        height, width = sample_image.shape[:2]
+        image_size = max(height, width)
+    evaluation_dataset = SegmentationDataset(
+        data_path,
+        classes=classes,
+        augmentation=get_validation_augmentations(
+            im_width=image_size,
+            im_height=image_size,
+            fixed_size=False
+        ),
+        preprocessing=model_config.preprocessing
+    )
+    evaluation_loader = DataLoader(
+        evaluation_dataset,
+        batch_size=1,
+        shuffle=False,
+        num_workers=2
+    )
+    model = model_config.model.to(device)
+    model.eval()
+    requires_sigmoid = False
+    if model_config.n_classes == 1:
+        current_activation = _check_activation_function(model)
+        if current_activation != 'Sigmoid':
+            requires_sigmoid = True
+    predictions = []
+    ground_truth = []
+    print("Evaluating model performance...")
+    with torch.no_grad():
+        for images, masks in tqdm(evaluation_loader):
+            images = images.to(device)
+            masks = masks.to(device)
+            outputs = model.forward(images)
+            if model_config.n_classes > 1:
+                predictions.extend([p.cpu().argmax(dim=0) for p in outputs])
+                ground_truth.extend([gt.cpu().argmax(dim=0) for gt in masks])
+            else:
+                if requires_sigmoid:
+                    predictions.extend([
+                        (torch.sigmoid(p) > 0.5).float().squeeze().cpu()
+                        for p in outputs
+                    ])
+                else:
+                    predictions.extend([
+                        (p > 0.5).float().squeeze().cpu()
+                        for p in outputs
+                    ])
+                ground_truth.extend([gt.cpu().squeeze() for gt in masks])
+    metrics = compute_mean_iou(
+        predictions,
+        ground_truth,
+        num_labels=len(classes),
+        ignore_index=255
+    )
+    print("\nEvaluation Results:")
+    print(f"Mean IoU: {metrics['mean_iou']:.3f}")
+    print("\nPer-class IoU:")
+    for idx, iou in enumerate(metrics['per_category_iou']):
+        print(f"{classes[idx]}: {iou:.3f}")
+    return metrics
+def _check_activation_function(model):
+    """Checks the activation function used in model's segmentation head."""
+    from segmentation_models_pytorch.base.modules import Activation
+    activation_functions = []
+    for _, module in model.segmentation_head.named_children():
+        if isinstance(module, Activation):
+            activation_functions.append(type(module.activation).__name__)
+    return activation_functions[-1] if activation_functions else None

semantic-segmentation/SemanticModel/.ipynb_checkpoints/image_preprocessing-checkpoint.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import cv2
+import numpy as np
+import albumentations as albu
+from albumentations.augmentations.geometric.resize import LongestMaxSize
+def round_pixel_dim(dimension: float) -> int:
+    """Rounds pixel dimensions consistently."""
+    if abs(round(dimension) - dimension) == 0.5:
+        return int(2.0 * round(dimension / 2.0))
+    return int(round(dimension))
+def resize_with_padding(image, target_size, stride=32, interpolation=cv2.INTER_LINEAR):
+    """Resizes image maintaining aspect ratio and ensures dimensions are stride-compatible."""
+    height, width = image.shape[:2]
+    max_dimension = max(height, width)
+    if ((height % stride == 0) and (width % stride == 0) and
+        (max_dimension <= target_size)):
+        return image
+    scale = target_size / float(max(width, height))
+    new_dims = tuple(round_pixel_dim(dim * scale) for dim in (height, width))
+    new_height, new_width = new_dims
+    new_height = ((new_height // stride + 1) * stride
+                 if new_height % stride != 0 else new_height)
+    new_width = ((new_width // stride + 1) * stride
+                if new_width % stride != 0 else new_width)
+    return cv2.resize(image, (new_width, new_height), interpolation=interpolation)
+class PaddedResize(LongestMaxSize):
+    def apply(self, img: np.ndarray, target_size: int = 1024,
+             interpolation: int = cv2.INTER_LINEAR, **params) -> np.ndarray:
+        return resize_with_padding(img, target_size=target_size, interpolation=interpolation)
+def get_training_augmentations(width=768, height=576):
+    """Configures training-time augmentations."""
+    target_size = max([width, height])
+    transforms = [
+        albu.HorizontalFlip(p=0.5),
+        albu.ShiftScaleRotate(
+            scale_limit=0.5, rotate_limit=90, shift_limit=0.1, p=0.5, border_mode=0),
+        albu.PadIfNeeded(min_height=target_size, min_width=target_size, always_apply=True),
+        albu.RandomCrop(height=target_size, width=target_size, always_apply=True),
+        albu.GaussNoise(p=0.2),
+        albu.Perspective(p=0.2),
+        albu.OneOf([albu.CLAHE(p=1), albu.RandomGamma(p=1)], p=0.33),
+        albu.OneOf([
+            albu.Sharpen(p=1),
+            albu.Blur(blur_limit=3, p=1),
+            albu.MotionBlur(blur_limit=3, p=1)], p=0.33),
+        albu.OneOf([
+            albu.RandomBrightnessContrast(p=1),
+            albu.HueSaturationValue(p=1)], p=0.33),
+    ]
+    return albu.Compose(transforms)
+def get_validation_augmentations(width=1920, height=1440, fixed_size=True):
+    """Configures validation/inference-time augmentations."""
+    if fixed_size:
+        transforms = [albu.Resize(height=height, width=width, always_apply=True)]
+        return albu.Compose(transforms)
+    target_size = max(width, height)
+    transforms = [PaddedResize(max_size=target_size, always_apply=True)]
+    return albu.Compose(transforms)
+def convert_to_tensor(x, **kwargs):
+    """Converts image array to PyTorch tensor format."""
+    if x.ndim == 2:
+        x = np.expand_dims(x, axis=-1)
+    return x.transpose(2, 0, 1).astype('float32')
+def get_preprocessing_pipeline(preprocessing_fn):
+    """Builds preprocessing pipeline including normalization and tensor conversion."""
+    transforms = [
+        albu.Lambda(image=preprocessing_fn),
+        albu.Lambda(image=convert_to_tensor, mask=convert_to_tensor),
+    ]
+    return albu.Compose(transforms)

semantic-segmentation/SemanticModel/.ipynb_checkpoints/metrics-checkpoint.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from typing import Dict, Optional
+import numpy as np
+def compute_intersection_union(prediction, ground_truth, num_classes, ignore_index: bool,
+                             label_mapping: Optional[Dict[int, int]] = None,
+                             reduce_labels: bool = False):
+    """Computes intersection and union for IoU calculation."""
+    if label_mapping:
+        for old_id, new_id in label_mapping.items():
+            ground_truth[ground_truth == old_id] = new_id
+    prediction = np.array(prediction)
+    ground_truth = np.array(ground_truth)
+    if reduce_labels:
+        ground_truth[ground_truth == 0] = 255
+        ground_truth = ground_truth - 1
+        ground_truth[ground_truth == 254] = 255
+    valid_mask = np.not_equal(ground_truth, ignore_index)
+    prediction = prediction[valid_mask]
+    ground_truth = ground_truth[valid_mask]
+    intersection_mask = prediction == ground_truth
+    intersection = prediction[intersection_mask]
+    area_intersection = np.histogram(intersection, bins=num_classes,
+                                   range=(0, num_classes - 1))[0]
+    area_prediction = np.histogram(prediction, bins=num_classes,
+                                 range=(0, num_classes - 1))[0]
+    area_ground_truth = np.histogram(ground_truth, bins=num_classes,
+                                   range=(0, num_classes - 1))[0]
+    area_union = area_prediction + area_ground_truth - area_intersection
+    return area_intersection, area_union, area_prediction, area_ground_truth
+def compute_total_intersection_union(predictions, ground_truths, num_classes, ignore_index: bool,
+                                   label_mapping: Optional[Dict[int, int]] = None,
+                                   reduce_labels: bool = False):
+    """Computes total intersection and union across all samples."""
+    totals = {
+        'intersection': np.zeros((num_classes,), dtype=np.float64),
+        'union': np.zeros((num_classes,), dtype=np.float64),
+        'prediction': np.zeros((num_classes,), dtype=np.float64),
+        'ground_truth': np.zeros((num_classes,), dtype=np.float64)
+    }
+    for pred, gt in zip(predictions, ground_truths):
+        intersection, union, pred_area, gt_area = compute_intersection_union(
+            pred, gt, num_classes, ignore_index, label_mapping, reduce_labels
+        )
+        totals['intersection'] += intersection
+        totals['union'] += union
+        totals['prediction'] += pred_area
+        totals['ground_truth'] += gt_area
+    return tuple(totals.values())
+def compute_mean_iou(predictions, ground_truths, num_classes, ignore_index: bool,
+                    nan_to_num: Optional[int] = None,
+                    label_mapping: Optional[Dict[int, int]] = None,
+                    reduce_labels: bool = False):
+    """Computes mean IoU and related metrics."""
+    intersection, union, prediction_area, ground_truth_area = compute_total_intersection_union(
+        predictions, ground_truths, num_classes, ignore_index, label_mapping, reduce_labels
+    )
+    metrics = {}
+    # Compute overall accuracy
+    total_accuracy = intersection.sum() / ground_truth_area.sum()
+    # Compute IoU per class
+    iou_per_class = intersection / union
+    accuracy_per_class = intersection / ground_truth_area
+    metrics.update({
+        "mean_iou": np.nanmean(iou_per_class),
+        "mean_accuracy": np.nanmean(accuracy_per_class),
+        "overall_accuracy": total_accuracy,
+        "per_category_iou": iou_per_class,
+        "per_category_accuracy": accuracy_per_class
+    })
+    if nan_to_num is not None:
+        metrics = {
+            metric: np.nan_to_num(value, nan=nan_to_num)
+            for metric, value in metrics.items()
+        }
+    return metrics

semantic-segmentation/SemanticModel/.ipynb_checkpoints/model_core-checkpoint.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import torch
+import torch.nn as nn
+import segmentation_models_pytorch as smp
+from segmentation_models_pytorch import utils
+from SemanticModel.encoder_management import initialize_encoder
+from SemanticModel.custom_losses import FocalLossFunction, TverskyLossFunction, EnhancedCrossEntropy
+from SemanticModel.image_preprocessing import get_preprocessing_pipeline
+class SegmentationModel:
+    def __init__(self, classes=['background', 'foreground'], architecture='unet',
+                 encoder='timm-regnety_120', weights='imagenet', loss=None):
+        self._initialize_classes(classes)
+        self.architecture = architecture
+        self.encoder = encoder
+        self.weights = weights
+        self._setup_loss_function(loss)
+        self._initialize_model()
+    def _initialize_classes(self, classes):
+        """Sets up class configuration."""
+        if len(classes) <= 2:
+            self.classes = [c for c in classes if c.lower() != 'background']
+            self.class_values = [i for i, c in enumerate(classes) if c.lower() != 'background']
+            self.background_flag = 'background' in classes
+        else:
+            self.classes = classes
+            self.class_values = list(range(len(classes)))
+            self.background_flag = False
+        self.n_classes = len(self.classes)
+    def _setup_loss_function(self, loss):
+        """Configures model's loss function."""
+        if not loss:
+            loss = 'bce_with_logits' if self.n_classes > 1 else 'dice'
+        if loss.lower() not in ['dice', 'bce_with_logits', 'focal', 'tversky']:
+            print(f'Invalid loss: {loss}, defaulting to dice')
+            loss = 'dice'
+        loss_configs = {
+            'bce_with_logits': {
+                'activation': None,
+                'loss': EnhancedCrossEntropy() if self.n_classes > 1 else utils.losses.BCEWithLogitsLoss()
+            },
+            'dice': {
+                'activation': 'softmax' if self.n_classes > 1 else 'sigmoid',
+                'loss': utils.losses.DiceLoss()
+            },
+            'focal': {
+                'activation': None,
+                'loss': FocalLossFunction()
+            },
+            'tversky': {
+                'activation': None,
+                'loss': TverskyLossFunction()
+            }
+        }
+        config = loss_configs[loss.lower()]
+        self.activation = config['activation']
+        self.loss = config['loss']
+        self.loss_name = loss
+    def _initialize_model(self):
+        """Initializes the segmentation model architecture."""
+        if self.weights.endswith('pth'):
+            self._load_pretrained_model()
+        else:
+            self._create_new_model()
+    def _load_pretrained_model(self):
+        """Loads model from pretrained weights."""
+        print('Loading pretrained model...')
+        self.model = torch.load(self.weights)
+        if isinstance(self.model, torch.nn.DataParallel):
+            self.model = self.model.module
+        try:
+            preprocessing_fn = smp.encoders.get_preprocessing_fn(self.encoder, 'imagenet')
+            self.preprocessing = get_preprocessing_pipeline(preprocessing_fn)
+        except:
+            print('Failed to configure preprocessing. Setting to None.')
+            self.preprocessing = None
+    def _create_new_model(self):
+        """Creates new model with specified architecture."""
+        preprocessing_fn = smp.encoders.get_preprocessing_fn(self.encoder, 'imagenet')
+        self.preprocessing = get_preprocessing_pipeline(preprocessing_fn)
+        initialize_encoder(name=self.encoder, weights=self.weights)
+        architectures = {
+            'unet': smp.Unet,
+            'unet++': smp.UnetPlusPlus,
+            'deeplabv3': smp.DeepLabV3,
+            'deeplabv3+': smp.DeepLabV3Plus,
+            'fpn': smp.FPN,
+            'linknet': smp.Linknet,
+            'manet': smp.MAnet,
+            'pan': smp.PAN,
+            'pspnet': smp.PSPNet
+        }
+        if self.architecture not in architectures:
+            raise ValueError(f'Unsupported architecture: {self.architecture}')
+        self.model = architectures[self.architecture](
+            encoder_name=self.encoder,
+            encoder_weights=self.weights,
+            classes=self.n_classes,
+            activation=self.activation
+        )
+    @property
+    def config_data(self):
+        """Returns model configuration data."""
+        return {
+            'architecture': self.architecture,
+            'encoder': self.encoder,
+            'weights': self.weights,
+            'activation': self.activation,
+            'loss': self.loss_name,
+            'classes': ['background'] + self.classes if self.background_flag else self.classes
+        }
+def list_architectures():
+    """Returns available architecture options."""
+    return ['unet', 'unet++', 'deeplabv3', 'deeplabv3+', 'fpn',
+            'linknet', 'manet', 'pan', 'pspnet']

semantic-segmentation/SemanticModel/.ipynb_checkpoints/prediction-checkpoint.py ADDED Viewed

	@@ -0,0 +1,336 @@

+import os
+import cv2
+import time
+import torch
+import imageio
+import tifffile
+import numpy as np
+import slidingwindow
+import rasterio as rio
+import geopandas as gpd
+from shapely.geometry import Polygon
+from rasterio import mask as riomask
+from torch.utils.data import DataLoader
+from SemanticModel.visualization import generate_color_mapping
+from SemanticModel.image_preprocessing import get_validation_augmentations
+from SemanticModel.data_loader import InferenceDataset, StreamingDataset
+from SemanticModel.utilities import calc_image_size, convert_coordinates
+class PredictionPipeline:
+    def __init__(self, model_config, device=None):
+        self.config = model_config
+        self.device = device or torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.classes = ['background'] + model_config.classes if model_config.background_flag else model_config.classes
+        self.colors = generate_color_mapping(len(self.classes))
+        self.model = model_config.model.to(self.device)
+        self.model.eval()
+    def _preprocess_image(self, image_path, target_size=None):
+        """Preprocesses single image for prediction."""
+        image = cv2.imread(image_path)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        height, width = image.shape[:2]
+        target_size = target_size or max(height, width)
+        test_height, test_width = calc_image_size(image, target_size)
+        augmentation = get_validation_augmentations(test_width, test_height)
+        image = augmentation(image=image)['image']
+        image = self.config.preprocessing(image=image)['image']
+        return image, (height, width)
+    def predict_single_image(self, image_path, target_size=None, output_dir=None,
+                           format='integer', save_output=True):
+        """Generates prediction for a single image."""
+        image, original_dims = self._preprocess_image(image_path, target_size)
+        x_tensor = torch.from_numpy(image).to(self.device).unsqueeze(0)
+        with torch.no_grad():
+            prediction = self.model.predict(x_tensor)
+        if self.config.n_classes > 1:
+            prediction = np.argmax(prediction.squeeze().cpu().numpy(), axis=0)
+        else:
+            prediction = prediction.squeeze().cpu().numpy().round()
+        # Resize to original dimensions if needed
+        if prediction.shape[:2] != original_dims:
+            prediction = cv2.resize(prediction, original_dims[::-1],
+                                  interpolation=cv2.INTER_NEAREST)
+        prediction = self._format_prediction(prediction, format)
+        if save_output:
+            self._save_prediction(prediction, image_path, output_dir, format)
+        return prediction
+    def predict_directory(self, input_dir, target_size=None, output_dir=None,
+                         fixed_size=True, format='integer'):
+        """Generates predictions for all images in directory."""
+        output_dir = output_dir or os.path.join(input_dir, 'predictions')
+        os.makedirs(output_dir, exist_ok=True)
+        dataset = InferenceDataset(
+            input_dir,
+            classes=self.classes,
+            augmentation=get_validation_augmentations(
+                target_size, target_size, fixed_size=fixed_size
+            ) if target_size else None,
+            preprocessing=self.config.preprocessing
+        )
+        total_images = len(dataset)
+        start_time = time.time()
+        for idx in range(total_images):
+            if (idx + 1) % 10 == 0 or idx == total_images - 1:
+                elapsed = time.time() - start_time
+                print(f'\rProcessed {idx+1}/{total_images} images in {elapsed:.1f}s',
+                      end='')
+            image, height, width = dataset[idx]
+            filename = dataset.filenames[idx]
+            x_tensor = torch.from_numpy(image).to(self.device).unsqueeze(0)
+            with torch.no_grad():
+                prediction = self.model.predict(x_tensor)
+            if self.config.n_classes > 1:
+                prediction = np.argmax(prediction.squeeze().cpu().numpy(), axis=0)
+            else:
+                prediction = prediction.squeeze().cpu().numpy().round()
+            if prediction.shape != (height, width):
+                prediction = cv2.resize(prediction, (width, height),
+                                     interpolation=cv2.INTER_NEAREST)
+            prediction = self._format_prediction(prediction, format)
+            self._save_prediction(prediction, filename, output_dir, format)
+        print(f'\nPredictions saved to: {output_dir}')
+        return output_dir
+    def predict_raster(self, raster_path, tile_size=1024, overlap=0.175,
+                      boundary_path=None, output_path=None, format='integer'):
+        """Processes large raster images using tiling approach."""
+        print('Loading raster...')
+        with rio.open(raster_path) as src:
+            raster = src.read()
+            raster = np.moveaxis(raster, 0, 2)[:,:,:3]
+            profile = src.profile
+            transform = src.transform
+        if boundary_path:
+            boundary = gpd.read_file(boundary_path)
+            boundary = boundary.to_crs(profile['crs'])
+            boundary_geom = boundary.iloc[0].geometry
+        tiles = slidingwindow.generate(
+            raster,
+            slidingwindow.DimOrder.HeightWidthChannel,
+            tile_size,
+            overlap
+        )
+        pred_raster = np.zeros_like(raster[:,:,0], dtype='uint8')
+        confidence = np.zeros_like(pred_raster, dtype=np.float32)
+        aug = get_validation_augmentations(tile_size, tile_size, fixed_size=False)
+        for idx, tile in enumerate(tiles):
+            if (idx + 1) % 10 == 0 or idx == len(tiles) - 1:
+                print(f'\rProcessed {idx+1}/{len(tiles)} tiles', end='')
+            bounds = tile.indices()
+            tile_image = raster[bounds[0], bounds[1]]
+            if boundary_path:
+                corners = [
+                    convert_coordinates(transform, bounds[1].start, bounds[0].start),
+                    convert_coordinates(transform, bounds[1].stop, bounds[0].start),
+                    convert_coordinates(transform, bounds[1].stop, bounds[0].stop),
+                    convert_coordinates(transform, bounds[1].start, bounds[0].stop)
+                ]
+                if not Polygon(corners).intersects(boundary_geom):
+                    continue
+            processed = aug(image=tile_image)['image']
+            processed = self.config.preprocessing(image=processed)['image']
+            x_tensor = torch.from_numpy(processed).to(self.device).unsqueeze(0)
+            with torch.no_grad():
+                prediction = self.model.predict(x_tensor)
+                prediction = prediction.squeeze().cpu().numpy()
+            if self.config.n_classes > 1:
+                tile_pred = np.argmax(prediction, axis=0)
+                tile_conf = np.max(prediction, axis=0)
+            else:
+                tile_conf = np.abs(prediction - 0.5)
+                tile_pred = prediction.round()
+            if tile_pred.shape != tile_image.shape[:2]:
+                tile_pred = cv2.resize(tile_pred, tile_image.shape[:2][::-1],
+                                     interpolation=cv2.INTER_NEAREST)
+                tile_conf = cv2.resize(tile_conf, tile_image.shape[:2][::-1],
+                                     interpolation=cv2.INTER_LINEAR)
+            # Update prediction and confidence maps
+            existing_conf = confidence[bounds[0], bounds[1]]
+            existing_pred = pred_raster[bounds[0], bounds[1]]
+            mask = existing_conf < tile_conf
+            existing_pred[mask] = tile_pred[mask]
+            existing_conf[mask] = tile_conf[mask]
+            pred_raster[bounds[0], bounds[1]] = existing_pred
+            confidence[bounds[0], bounds[1]] = existing_conf
+        pred_raster = self._format_prediction(pred_raster, format)
+        if output_path or boundary_path:
+            self._save_raster_prediction(
+                pred_raster, raster_path, output_path,
+                profile, boundary_geom if boundary_path else None
+            )
+        return pred_raster, profile
+    def _format_prediction(self, prediction, format):
+        """Formats prediction according to specified output type."""
+        if format == 'integer':
+            return prediction.astype('uint8')
+        elif format == 'color':
+            return self._apply_color_mapping(prediction)
+        else:
+            raise ValueError(f"Unsupported format: {format}")
+    def _save_prediction(self, prediction, source_path, output_dir, format):
+        """Saves prediction to disk."""
+        filename = os.path.splitext(os.path.basename(source_path))[0]
+        output_path = os.path.join(output_dir, f"{filename}_pred.png")
+        cv2.imwrite(output_path, prediction)
+    def _save_raster_prediction(self, prediction, source_path, output_path,
+                              profile, boundary=None):
+        """Saves raster prediction with geospatial information."""
+        output_path = output_path or source_path.replace(
+            os.path.splitext(source_path)[1], '_predicted.tif'
+        )
+        profile.update(
+            dtype='uint8',
+            count=3 if prediction.ndim == 3 else 1
+        )
+        with rio.open(output_path, 'w', **profile) as dst:
+            if prediction.ndim == 3:
+                for i in range(3):
+                    dst.write(prediction[:,:,i], i+1)
+            else:
+                dst.write(prediction, 1)
+        if boundary:
+            with rio.open(output_path) as src:
+                cropped, transform = riomask.mask(src, [boundary], crop=True)
+                profile.update(
+                    height=cropped.shape[1],
+                    width=cropped.shape[2],
+                    transform=transform
+                )
+            os.remove(output_path)
+            with rio.open(output_path, 'w', **profile) as dst:
+                dst.write(cropped)
+        print(f'\nPrediction saved to: {output_path}')
+    def predict_video_frames(self, input_dir, target_size=None, output_dir=None):
+        """Processes video frames with specialized visualization."""
+        output_dir = output_dir or os.path.join(input_dir, 'predictions')
+        os.makedirs(output_dir, exist_ok=True)
+        dataset = StreamingDataset(
+            input_dir,
+            classes=self.classes,
+            augmentation=get_validation_augmentations(
+                target_size, target_size
+            ) if target_size else None,
+            preprocessing=self.config.preprocessing
+        )
+        image = cv2.imread(dataset.image_paths[0])
+        height, width = image.shape[:2]
+        white = 255 * np.ones((height, width))
+        black = np.zeros_like(white)
+        red = np.dstack((white, black, black))
+        blue = np.dstack((black, black, white))
+        # Pre-compute rotated versions
+        rotated_red = np.rot90(red)
+        rotated_blue = np.rot90(blue)
+        total_frames = len(dataset)
+        start_time = time.time()
+        for idx in range(total_frames):
+            if (idx + 1) % 10 == 0 or idx == total_frames - 1:
+                elapsed = time.time() - start_time
+                print(f'\rProcessed {idx+1}/{total_frames} frames in {elapsed:.1f}s', end='')
+            frame, height, width = dataset[idx]
+            filename = dataset.filenames[idx]
+            x_tensor = torch.from_numpy(frame).to(self.device).unsqueeze(0)
+            with torch.no_grad():
+                prediction = self.model.predict(x_tensor)
+            if self.config.n_classes > 1:
+                prediction = np.argmax(prediction.squeeze().cpu().numpy(), axis=0)
+                masks = [prediction == i for i in range(1, self.config.n_classes)]
+            else:
+                prediction = prediction.squeeze().cpu().numpy().round()
+                masks = [prediction == 1]
+            if prediction.shape != (height, width):
+                prediction = cv2.resize(prediction, (width, height),
+                                     interpolation=cv2.INTER_NEAREST)
+            original = cv2.imread(os.path.join(input_dir, filename))
+            original = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)
+            try:
+                for i, mask in enumerate(masks):
+                    color = red if i == 0 else blue
+                    rotated_color = rotated_red if i == 0 else rotated_blue
+                    try:
+                        original[mask,:] = 0.45*original[mask,:] + 0.55*color[mask,:]
+                    except:
+                        original[mask,:] = 0.45*original[mask,:] + 0.55*rotated_color[mask,:]
+            except:
+                print(f"\nWarning: Error processing frame {filename}")
+                continue
+            output_path = os.path.join(output_dir, filename)
+            imageio.imwrite(output_path, original, quality=100)
+        print(f'\nProcessed frames saved to: {output_dir}')
+        return output_dir
+    def _apply_color_mapping(self, prediction):
+        """Applies color mapping to prediction."""
+        height, width = prediction.shape
+        colored = np.zeros((height, width, 3), dtype='uint8')
+        for i, class_name in enumerate(self.classes):
+            if class_name.lower() == 'background':
+                continue
+            color = self.colors[i]
+            colored[prediction == i] = color
+        return colored

semantic-segmentation/SemanticModel/.ipynb_checkpoints/training-checkpoint.py ADDED Viewed

	@@ -0,0 +1,313 @@

+import os
+import json
+import torch
+import wandb
+import datetime
+import numpy as np
+from tqdm import tqdm
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard import SummaryWriter
+from segmentation_models_pytorch.base.modules import Activation
+from SemanticModel.data_loader import SegmentationDataset
+from SemanticModel.metrics import compute_mean_iou
+from SemanticModel.image_preprocessing import get_training_augmentations, get_validation_augmentations
+from SemanticModel.utilities import list_images, validate_dimensions
+class ModelTrainer:
+    def __init__(self, model_config, root_dir, epochs=40, train_size=1024,
+                 val_size=None, workers=2, batch_size=2, learning_rate=1e-4,
+                 step_count=2, decay_factor=0.8, wandb_config=None,
+                 optimizer='rmsprop', target_class=None, resume_path=None):
+        self.config = model_config
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.root_dir = root_dir
+        self._initialize_training_params(epochs, train_size, val_size, workers,
+                                      batch_size, learning_rate, step_count,
+                                      decay_factor, optimizer, target_class)
+        self._setup_directories()
+        self._initialize_datasets()
+        self._setup_optimizer()
+        self._initialize_tracking()
+        if resume_path:
+            self._resume_training(resume_path)
+    def _initialize_training_params(self, epochs, train_size, val_size, workers,
+                                  batch_size, learning_rate, step_count,
+                                  decay_factor, optimizer, target_class):
+        self.epochs = epochs
+        self.train_size = train_size
+        self.val_size = val_size
+        self.workers = workers
+        self.batch_size = batch_size
+        self.learning_rate = learning_rate
+        self.step_schedule = self._calculate_step_schedule(epochs, step_count)
+        self.decay_factor = decay_factor
+        self.optimizer_type = optimizer
+        self.target_class = target_class
+        self.current_epoch = 1
+        self.best_iou = 0.0
+        self.best_epoch = 0
+        self.classes = ['background'] + self.config.classes if self.config.background_flag else self.config.classes
+    def _setup_directories(self):
+        """Verifies and creates necessary directories."""
+        self.train_dir = os.path.join(self.root_dir, 'train')
+        self.val_dir = os.path.join(self.root_dir, 'val')
+        required_subdirs = ['Images', 'Masks']
+        for path in [self.train_dir] + ([self.val_dir] if os.path.exists(self.val_dir) else []):
+            for subdir in required_subdirs:
+                full_path = os.path.join(path, subdir)
+                if not os.path.exists(full_path):
+                    raise FileNotFoundError(f"Missing directory: {full_path}")
+    def _initialize_datasets(self):
+        """Sets up training and validation datasets."""
+        self.train_dataset = SegmentationDataset(
+            self.train_dir,
+            classes=self.classes,
+            augmentation=get_training_augmentations(self.train_size, self.train_size),
+            preprocessing=self.config.preprocessing
+        )
+        if os.path.exists(self.val_dir):
+            self.val_dataset = SegmentationDataset(
+                self.val_dir,
+                classes=self.classes,
+                augmentation=get_validation_augmentations(
+                    self.val_size or self.train_size,
+                    self.val_size or self.train_size,
+                    fixed_size=False
+                ),
+                preprocessing=self.config.preprocessing
+            )
+            self.val_loader = DataLoader(
+                self.val_dataset,
+                batch_size=1,
+                shuffle=False,
+                num_workers=self.workers
+            )
+        else:
+            self.val_dataset = self.train_dataset
+            self.val_loader = DataLoader(
+                self.val_dataset,
+                batch_size=1,
+                shuffle=False,
+                num_workers=self.workers
+            )
+        self.train_loader = DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            shuffle=True,
+            num_workers=self.workers
+        )
+    def _setup_optimizer(self):
+        """Configures model optimizer."""
+        optimizer_map = {
+            'adam': torch.optim.Adam,
+            'sgd': lambda params: torch.optim.SGD(params, momentum=0.9),
+            'rmsprop': torch.optim.RMSprop
+        }
+        optimizer_class = optimizer_map.get(self.optimizer_type.lower())
+        if not optimizer_class:
+            raise ValueError(f"Unsupported optimizer: {self.optimizer_type}")
+        self.optimizer = optimizer_class([{'params': self.config.model.parameters(),
+                                         'lr': self.learning_rate}])
+    def _initialize_tracking(self):
+        """Sets up training progress tracking."""
+        timestamp = datetime.datetime.now().strftime("%m-%d-%Y_%H%M%S")
+        self.output_dir = os.path.join(
+            self.root_dir,
+            f'model_outputs-{self.config.architecture}[{self.config.encoder}]-{timestamp}'
+        )
+        os.makedirs(self.output_dir, exist_ok=True)
+        self.writer = SummaryWriter(log_dir=self.output_dir)
+        self.metrics = {
+            'best_epoch': self.best_epoch,
+            'best_epoch_iou': self.best_iou,
+            'last_epoch': 0,
+            'last_epoch_iou': 0.0,
+            'last_epoch_lr': self.learning_rate,
+            'step_schedule': self.step_schedule,
+            'decay_factor': self.decay_factor,
+            'target_class': self.target_class or 'overall'
+        }
+    def _calculate_step_schedule(self, epochs, steps):
+        """Calculates learning rate step schedule."""
+        return list(map(int, np.linspace(0, epochs, steps + 2)[1:-1]))
+    def train(self):
+        """Executes training loop."""
+        model = self.config.model.to(self.device)
+        if torch.cuda.device_count() > 1:
+            model = torch.nn.DataParallel(model)
+            print(f'Using {torch.cuda.device_count()} GPUs')
+        self._save_config()
+        for epoch in range(self.current_epoch, self.epochs + 1):
+            print(f'\nEpoch {epoch}/{self.epochs}')
+            print(f'Learning rate: {self.optimizer.param_groups[0]["lr"]:.3e}')
+            train_loss = self._train_epoch(model)
+            val_loss, val_metrics = self._validate_epoch(model)
+            self._update_tracking(epoch, train_loss, val_loss, val_metrics)
+            self._adjust_learning_rate(epoch)
+            self._save_checkpoints(model, epoch, val_metrics)
+        print(f'\nTraining completed. Best {self.metrics["target_class"]} IoU: {self.best_iou:.3f}')
+        return model, self.metrics
+    def _train_epoch(self, model):
+        """Executes single training epoch."""
+        model.train()
+        total_loss = 0
+        sample_count = 0
+        for batch in tqdm(self.train_loader, desc='Training'):
+            images, masks = [x.to(self.device) for x in batch]
+            self.optimizer.zero_grad()
+            outputs = model(images)
+            loss = self.config.loss(outputs, masks)
+            loss.backward()
+            self.optimizer.step()
+            total_loss += loss.item() * len(images)
+            sample_count += len(images)
+        return total_loss / sample_count
+    def _validate_epoch(self, model):
+        """Executes validation pass."""
+        model.eval()
+        total_loss = 0
+        predictions = []
+        ground_truth = []
+        with torch.no_grad():
+            for batch in tqdm(self.val_loader, desc='Validation'):
+                images, masks = [x.to(self.device) for x in batch]
+                outputs = model(images)
+                loss = self.config.loss(outputs, masks)
+                total_loss += loss.item()
+                if self.config.n_classes > 1:
+                    predictions.extend([p.cpu().argmax(dim=0) for p in outputs])
+                    ground_truth.extend([m.cpu().argmax(dim=0) for m in masks])
+                else:
+                    predictions.extend([(torch.sigmoid(p) > 0.5).float().squeeze().cpu()
+                                     for p in outputs])
+                    ground_truth.extend([m.cpu().squeeze() for m in masks])
+        metrics = compute_mean_iou(
+            predictions,
+            ground_truth,
+            num_classes=len(self.classes),
+            ignore_index=255
+        )
+        return total_loss / len(self.val_loader), metrics
+    def _update_tracking(self, epoch, train_loss, val_loss, val_metrics):
+        """Updates training metrics and logging."""
+        mean_iou = val_metrics['mean_iou']
+        print(f"\nLosses - Train: {train_loss:.3f}, Val: {val_loss:.3f}")
+        print(f"Mean IoU: {mean_iou:.3f}")
+        self.writer.add_scalar('Loss/train', train_loss, epoch)
+        self.writer.add_scalar('Loss/val', val_loss, epoch)
+        self.writer.add_scalar('IoU/mean', mean_iou, epoch)
+        for idx, iou in enumerate(val_metrics['per_category_iou']):
+            print(f"{self.classes[idx]} IoU: {iou:.3f}")
+            self.writer.add_scalar(f'IoU/{self.classes[idx]}', iou, epoch)
+    def _adjust_learning_rate(self, epoch):
+        """Adjusts learning rate according to schedule."""
+        if epoch in self.step_schedule:
+            current_lr = self.optimizer.param_groups[0]['lr']
+            new_lr = current_lr * self.decay_factor
+            for param_group in self.optimizer.param_groups:
+                param_group['lr'] = new_lr
+            print(f'\nDecreased learning rate: {current_lr:.3e} -> {new_lr:.3e}')
+    def _save_checkpoints(self, model, epoch, metrics):
+        """Saves model checkpoints and metrics."""
+        epoch_iou = (metrics['mean_iou'] if self.target_class is None
+                    else metrics['per_category_iou'][self.classes.index(self.target_class)])
+        self.metrics.update({
+            'last_epoch': epoch,
+            'last_epoch_iou': round(float(epoch_iou), 3),
+            'last_epoch_lr': self.optimizer.param_groups[0]['lr']
+        })
+        if epoch_iou > self.best_iou:
+            self.best_iou = epoch_iou
+            self.best_epoch = epoch
+            self.metrics.update({
+                'best_epoch': epoch,
+                'best_epoch_iou': round(float(epoch_iou), 3),
+                'overall_iou': round(float(metrics['mean_iou']), 3)
+            })
+            torch.save(model, os.path.join(self.output_dir, 'best_model.pth'))
+            print(f'New best model saved (IoU: {epoch_iou:.3f})')
+        torch.save(model, os.path.join(self.output_dir, 'last_model.pth'))
+        with open(os.path.join(self.output_dir, 'metrics.json'), 'w') as f:
+            json.dump(self.metrics, f, indent=4)
+    def _save_config(self):
+        """Saves training configuration."""
+        config = {
+            **self.config.config_data,
+            'train_size': self.train_size,
+            'val_size': self.val_size,
+            'epochs': self.epochs,
+            'batch_size': self.batch_size,
+            'optimizer': self.optimizer_type,
+            'workers': self.workers,
+            'target_class': self.target_class or 'overall'
+        }
+        with open(os.path.join(self.output_dir, 'config.json'), 'w') as f:
+            json.dump(config, f, indent=4)
+    def _resume_training(self, resume_path):
+        """Resumes training from checkpoint."""
+        if not os.path.exists(resume_path):
+            raise FileNotFoundError(f"Resume path not found: {resume_path}")
+        required_files = {
+            'model': 'last_model.pth',
+            'metrics': 'metrics.json',
+            'config': 'config.json'
+        }
+        paths = {k: os.path.join(resume_path, v) for k, v in required_files.items()}
+        if not all(os.path.exists(p) for p in paths.values()):
+            raise FileNotFoundError("Missing required checkpoint files")
+        with open(paths['config']) as f:
+            config = json.load(f)
+        with open(paths['metrics']) as f:
+            metrics = json.load(f)
+        self.current_epoch = metrics['last_epoch'] + 1
+        self.best_iou = metrics['best_epoch_iou']
+        self.best_epoch = metrics['best_epoch']
+        self.learning_rate = metrics['last_epoch_lr']
+        print(f'Resuming training from epoch {self.current_epoch}')

semantic-segmentation/SemanticModel/.ipynb_checkpoints/utilities-checkpoint.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import os
+import cv2
+import shutil
+import imageio
+import numpy as np
+from glob import glob
+from pathlib import Path
+from typing import List, Tuple, Optional
+def validate_dimensions(width: int, height: int, stride: int = 32) -> Tuple[int, int]:
+    if height % stride != 0 or width % stride != 0:
+        new_height = ((height // stride + 1) * stride
+                     if height % stride != 0 else height)
+        new_width = ((width // stride + 1) * stride
+                    if width % stride != 0 else width)
+        print(f'Adjusted dimensions to: {new_height}H x {new_width}W')
+    return width, height
+def calc_image_size(image: np.ndarray, target_size: int) -> Tuple[int, int]:
+    height, width = image.shape[:2]
+    aspect_ratio = width / height
+    if aspect_ratio >= 1:
+        new_width = target_size
+        new_height = int(target_size / aspect_ratio)
+    else:
+        new_height = target_size
+        new_width = int(target_size * aspect_ratio)
+    return validate_dimensions(new_width, new_height)
+def convert_coordinates(transform: np.ndarray, x: float, y: float) -> Tuple[float, float]:
+    transformed = transform @ np.array([x, y, 1])
+    return transformed[0], transformed[1]
+def list_images(directory: str, mask_format: bool = False) -> List[str]:
+    extensions = ['*.png', '*.PNG'] if mask_format else [
+        '*.jpg', '*.jpeg', '*.png', '*.tif', '*.tiff',
+        '*.JPG', '*.JPEG', '*.PNG', '*.TIF', '*.TIFF'
+    ]
+    image_paths = []
+    for ext in extensions:
+        image_paths.extend(glob(os.path.join(directory, ext)))
+    return sorted(list(set(image_paths)))
+def prepare_dataset_split(root_dir: str, train_ratio: float = 0.7,
+                         generate_empty_masks: bool = False) -> None:
+    image_dir = os.path.join(root_dir, 'Images')
+    mask_dir = os.path.join(root_dir, 'Masks')
+    if not all(os.path.exists(d) for d in [image_dir, mask_dir]):
+        raise Exception("Required 'Images' and 'Masks' directories not found")
+    image_paths = np.array(list_images(image_dir))
+    mask_paths = np.array(list_images(mask_dir, mask_format=True))
+    if generate_empty_masks:
+        temp_dir = os.path.join(mask_dir, 'temp')
+        create_empty_masks(image_dir, outdir=temp_dir)
+        for mask_path in list_images(temp_dir, mask_format=True):
+            target_path = os.path.join(mask_dir, os.path.basename(mask_path))
+            if not os.path.exists(target_path):
+                shutil.move(mask_path, target_path)
+        shutil.rmtree(temp_dir)
+        mask_paths = np.array(list_images(mask_dir, mask_format=True))
+    if len(image_paths) != len(mask_paths):
+        raise Exception(f"Unmatched images ({len(image_paths)}) and masks ({len(mask_paths)})")
+    train_ratio = float(train_ratio)
+    if not (0 < train_ratio <= 1):
+        raise ValueError(f"Invalid train ratio: {train_ratio}")
+    train_size = int(np.floor(train_ratio * len(image_paths)))
+    indices = np.random.permutation(len(image_paths))
+    splits = {
+        'train': {'indices': indices[:train_size]},
+        'val': {'indices': indices[train_size:]} if train_ratio < 1 else None
+    }
+    for split_name, split_data in splits.items():
+        if split_data is None:
+            continue
+        split_dir = os.path.join(root_dir, split_name)
+        for subdir in ['Images', 'Masks']:
+            subdir_path = os.path.join(split_dir, subdir)
+            os.makedirs(subdir_path, exist_ok=True)
+            sources = image_paths if subdir == 'Images' else mask_paths
+            for idx in split_data['indices']:
+                source = sources[idx]
+                destination = os.path.join(subdir_path, os.path.basename(source))
+                shutil.copyfile(source, destination)
+        print(f"Created {split_name} split with {len(split_data['indices'])} samples")
+def create_empty_masks(image_dir: str, pixel_value: int = 0,
+                      outdir: Optional[str] = None) -> str:
+    outdir = outdir or os.path.join(image_dir, 'Masks')
+    os.makedirs(outdir, exist_ok=True)
+    image_paths = list_images(image_dir)
+    print(f"Generating {len(image_paths)} empty masks...")
+    for image_path in image_paths:
+        image = imageio.imread(image_path)
+        mask = np.full((image.shape[0], image.shape[1]), pixel_value, dtype='uint8')
+        output_path = os.path.join(outdir,
+                                 f"{Path(image_path).stem}.png")
+        imageio.imwrite(output_path, mask)
+    return outdir

semantic-segmentation/SemanticModel/.ipynb_checkpoints/visualization-checkpoint.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+import torch
+def plot_predictions(model, images, masks, device, num_samples=4):
+    """Visualize model predictions against ground truth."""
+    with torch.no_grad():
+        model.eval()
+        predictions = model.predict(images.to(device))
+    fig, axes = plt.subplots(num_samples, 3, figsize=(12, 4*num_samples))
+    for idx in range(num_samples):
+        # Original image
+        img = images[idx].permute(1, 2, 0).cpu().numpy()
+        axes[idx, 0].imshow(img)
+        axes[idx, 0].set_title('Original Image')
+        # Ground truth
+        truth = masks[idx].argmax(dim=0).cpu().numpy()
+        axes[idx, 1].imshow(truth, cmap='tab20')
+        axes[idx, 1].set_title('Ground Truth')
+        # Prediction
+        pred = predictions[idx].argmax(dim=0).cpu().numpy()
+        axes[idx, 2].imshow(pred, cmap='tab20')
+        axes[idx, 2].set_title('Prediction')
+        for ax in axes[idx]:
+            ax.axis('off')
+    plt.tight_layout()
+    return fig
+def create_overlay_mask(image, mask, alpha=0.5, color_map=None):
+    """Create transparent overlay of segmentation mask on image."""
+    if color_map is None:
+        color_map = {
+            0: [0, 0, 0],      # background
+            1: [255, 0, 0],    # class 1 (red)
+            2: [0, 255, 0],    # class 2 (green)
+            3: [0, 0, 255],    # class 3 (blue)
+        }
+    overlay = image.copy()
+    mask_colored = np.zeros_like(image)
+    for label, color in color_map.items():
+        mask_colored[mask == label] = color
+    cv2.addWeighted(mask_colored, alpha, overlay, 1 - alpha, 0, overlay)
+    return overlay
+def plot_training_history(history):
+    """Plot training and validation metrics."""
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
+    # Loss plot
+    ax1.plot(history['train_loss'], label='Training Loss')
+    ax1.plot(history['val_loss'], label='Validation Loss')
+    ax1.set_xlabel('Epoch')
+    ax1.set_ylabel('Loss')
+    ax1.set_title('Training and Validation Loss')
+    ax1.legend()
+    # IoU plot
+    ax2.plot(history['mean_iou'], label='Mean IoU')
+    for class_name, ious in history['class_ious'].items():
+        ax2.plot(ious, label=f'{class_name} IoU')
+    ax2.set_xlabel('Epoch')
+    ax2.set_ylabel('IoU')
+    ax2.set_title('IoU Metrics')
+    ax2.legend()
+    plt.tight_layout()
+    return fig
+def visualize_predictions_on_batch(model, batch_images, batch_size=8):
+    """Create grid visualization for a batch of predictions."""
+    with torch.no_grad():
+        predictions = model.predict(batch_images)
+    fig = plt.figure(figsize=(15, 5))
+    for idx in range(min(batch_size, len(batch_images))):
+        plt.subplot(2, 4, idx + 1)
+        img = batch_images[idx].permute(1, 2, 0).cpu().numpy()
+        mask = predictions[idx].argmax(dim=0).cpu().numpy()
+        overlay = create_overlay_mask(img, mask)
+        plt.imshow(overlay)
+        plt.axis('off')
+    plt.tight_layout()
+    return fig
+def save_visualization(fig, save_path):
+    """Save visualization figure."""
+    fig.savefig(save_path, bbox_inches='tight', dpi=300)
+    plt.close(fig)
+def generate_color_mapping(num_classes):
+    """Generate distinct colors for segmentation classes."""
+    colors = [
+        [0, 0, 0],       # Background (black)
+        [255, 0, 0],     # Red
+        [0, 255, 0],     # Green
+        [0, 0, 255],     # Blue
+        [255, 255, 0],   # Yellow
+        [255, 0, 255],   # Magenta
+        [0, 255, 255],   # Cyan
+        [128, 0, 0],     # Dark Red
+        [0, 128, 0],     # Dark Green
+        [0, 0, 128]      # Dark Blue
+    ]
+    return colors[:num_classes]

semantic-segmentation/SemanticModel/__init__.py ADDED Viewed

File without changes

semantic-segmentation/SemanticModel/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (187 Bytes). View file

semantic-segmentation/SemanticModel/__pycache__/custom_losses.cpython-38.pyc ADDED Viewed

Binary file (3.45 kB). View file

semantic-segmentation/SemanticModel/__pycache__/data_loader.cpython-38.pyc ADDED Viewed

Binary file (6.72 kB). View file

semantic-segmentation/SemanticModel/__pycache__/encoder_management.cpython-38.pyc ADDED Viewed

Binary file (4.17 kB). View file

semantic-segmentation/SemanticModel/__pycache__/evaluation_utils.cpython-38.pyc ADDED Viewed

Binary file (3.62 kB). View file

semantic-segmentation/SemanticModel/__pycache__/image_preprocessing.cpython-38.pyc ADDED Viewed

Binary file (3.62 kB). View file

semantic-segmentation/SemanticModel/__pycache__/metrics.cpython-38.pyc ADDED Viewed

Binary file (2.56 kB). View file

semantic-segmentation/SemanticModel/__pycache__/model_core.cpython-38.pyc ADDED Viewed

Binary file (4.52 kB). View file

semantic-segmentation/SemanticModel/__pycache__/prediction.cpython-38.pyc ADDED Viewed

Binary file (9.62 kB). View file

semantic-segmentation/SemanticModel/__pycache__/training.cpython-38.pyc ADDED Viewed

Binary file (10.8 kB). View file

semantic-segmentation/SemanticModel/__pycache__/utilities.cpython-38.pyc ADDED Viewed

Binary file (4.03 kB). View file

semantic-segmentation/SemanticModel/__pycache__/visualization.cpython-38.pyc ADDED Viewed

Binary file (3.41 kB). View file

semantic-segmentation/SemanticModel/custom_losses.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import torch
+import torch.nn.functional as F
+from segmentation_models_pytorch.utils import base
+from segmentation_models_pytorch.base.modules import Activation
+class FocalLossFunction(base.Loss):
+    def __init__(self, activation=None, alpha=0.25, gamma=1.5, reduction='mean', **kwargs):
+        super().__init__(**kwargs)
+        self.activation = Activation(activation)
+        self.alpha = alpha
+        self.gamma = gamma
+        self.reduction = reduction
+    def forward(self, inputs, targets):
+        if inputs.shape[1] == 1:  # Binary case
+            inputs = torch.cat((inputs, 1 - inputs), dim=1)
+            targets = torch.cat((targets, 1 - targets), dim=1)
+        targets = torch.argmax(targets, dim=1)
+        cross_entropy = F.cross_entropy(inputs, targets, reduction='none')
+        probability = torch.exp(-cross_entropy)
+        alpha_factor = self.alpha if inputs.shape[1] > 1 else torch.where(
+            targets == 1, 1-self.alpha, self.alpha)
+        focal_weight = alpha_factor * (1 - probability) ** self.gamma * cross_entropy
+        if self.reduction == 'mean':
+            return focal_weight.mean()
+        elif self.reduction == 'sum':
+            return focal_weight.sum()
+        return focal_weight
+class TverskyLossFunction(base.Loss):
+    def __init__(self, activation=None, alpha=0.5, beta=0.5, ignore_channels=None,
+                 reduction='mean', **kwargs):
+        super().__init__(**kwargs)
+        self.activation = Activation(activation)
+        self.alpha = alpha
+        self.beta = beta
+        self.ignore_channels = ignore_channels
+        self.reduction = reduction
+    def forward(self, inputs, targets):
+        if self.ignore_channels is not None:
+            mask = torch.ones(inputs.shape[1], dtype=torch.bool, device=inputs.device)
+            mask[self.ignore_channels] = False
+            inputs = inputs[:, mask, ...]
+        num_classes = inputs.shape[1]
+        inputs_softmax = (torch.sigmoid(inputs) if num_classes == 1
+                         else F.softmax(inputs, dim=1))
+        if num_classes == 1:
+            inputs_softmax = inputs_softmax.squeeze(1)
+            targets = targets.squeeze(1)
+        tversky_loss = 0
+        for class_idx in range(num_classes):
+            if num_classes == 1:
+                flat_inputs = inputs_softmax.reshape(-1)
+                flat_targets = targets.reshape(-1)
+            else:
+                flat_inputs = inputs_softmax[:, class_idx].reshape(-1)
+                flat_targets = targets[:, class_idx].reshape(-1)
+            intersection = (flat_inputs * flat_targets).sum()
+            fps = ((1 - flat_targets) * flat_inputs).sum()
+            fns = (flat_targets * (1 - flat_inputs)).sum()
+            tversky_index = intersection + self.alpha * fps + self.beta * fns + 1e-10
+            tversky_loss += 1 - intersection / tversky_index
+        if self.reduction == 'mean':
+            return tversky_loss / (1 if num_classes == 1 else num_classes)
+        elif self.reduction == 'sum':
+            return tversky_loss
+        return tversky_loss / inputs.shape[0]
+class EnhancedCrossEntropy(base.Loss):
+    def __init__(self, activation=None, ignore_channels=None, reduction='mean', **kwargs):
+        super().__init__(**kwargs)
+        self.activation = Activation(activation)
+        self.ignore_channels = ignore_channels
+        self.reduction = reduction
+    def forward(self, inputs, targets):
+        inputs = self.activation(inputs)
+        if self.ignore_channels is not None:
+            mask = torch.ones(inputs.shape[1], dtype=torch.bool, device=inputs.device)
+            mask[self.ignore_channels] = False
+            inputs = inputs[:, mask, ...]
+        if targets.dim() == 4:  # Convert one-hot to class indices
+            targets = torch.argmax(targets, dim=1)
+        return F.cross_entropy(inputs, targets, reduction=self.reduction)

semantic-segmentation/SemanticModel/data_loader.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import os
+import cv2
+import numpy as np
+from torch.utils.data import Dataset as BaseDataset
+class SegmentationDataset(BaseDataset):
+    """Dataset class for semantic segmentation task."""
+    def __init__(self, data_dir, classes=['background', 'object'],
+                 augmentation=None, preprocessing=None):
+        self.image_dir = os.path.join(data_dir, 'Images')
+        self.mask_dir = os.path.join(data_dir, 'Masks')
+        for dir_path in [self.image_dir, self.mask_dir]:
+            if not os.path.exists(dir_path):
+                raise FileNotFoundError(f"Directory not found: {dir_path}")
+        self.filenames = self._get_filenames()
+        self.image_paths = [os.path.join(self.image_dir, fname) for fname in self.filenames]
+        self.mask_paths = self._get_mask_paths()
+        self.target_classes = [cls for cls in classes if cls.lower() != 'background']
+        self.class_values = [i for i, cls in enumerate(classes) if cls.lower() != 'background']
+        self.augmentation = augmentation
+        self.preprocessing = preprocessing
+    def __getitem__(self, index):
+        image = self._load_image(self.image_paths[index])
+        mask = self._load_mask(self.mask_paths[index])
+        if self.augmentation:
+            processed = self.augmentation(image=image, mask=mask)
+            image, mask = processed['image'], processed['mask']
+        if self.preprocessing:
+            processed = self.preprocessing(image=image, mask=mask)
+            image, mask = processed['image'], processed['mask']
+        return image, mask
+    def __len__(self):
+        return len(self.filenames)
+    def _get_filenames(self):
+        """Returns sorted list of filenames, excluding directories."""
+        files = sorted(os.listdir(self.image_dir))
+        return [f for f in files if not os.path.isdir(os.path.join(self.image_dir, f))]
+    def _get_mask_paths(self):
+        """Generates corresponding mask paths for each image."""
+        mask_paths = []
+        for image_file in self.filenames:
+            name, _ = os.path.splitext(image_file)
+            mask_paths.append(os.path.join(self.mask_dir, f"{name}.png"))
+        return mask_paths
+    def _load_image(self, path):
+        """Loads and converts image to RGB."""
+        image = cv2.imread(path)
+        return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    def _load_mask(self, path):
+        """Loads and processes segmentation mask."""
+        mask = cv2.imread(path, 0)
+        masks = [(mask == value) for value in self.class_values]
+        mask = np.stack(masks, axis=-1).astype('float')
+        return mask
+class InferenceDataset(BaseDataset):
+    """Dataset class for inference without ground truth masks."""
+    def __init__(self, data_dir, classes=['background', 'object'],
+                 augmentation=None, preprocessing=None):
+        self.filenames = sorted([
+            f for f in os.listdir(data_dir)
+            if not os.path.isdir(os.path.join(data_dir, f))
+        ])
+        self.image_paths = [os.path.join(data_dir, fname) for fname in self.filenames]
+        self.target_classes = [cls for cls in classes if cls.lower() != 'background']
+        self.class_values = [i for i, cls in enumerate(classes) if cls.lower() != 'background']
+        self.augmentation = augmentation
+        self.preprocessing = preprocessing
+    def __getitem__(self, index):
+        image = cv2.imread(self.image_paths[index])
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        original_height, original_width = image.shape[:2]
+        if self.augmentation:
+            image = self.augmentation(image=image)['image']
+        if self.preprocessing:
+            image = self.preprocessing(image=image)['image']
+        return image, original_height, original_width
+    def __len__(self):
+        return len(self.filenames)
+class StreamingDataset(BaseDataset):
+    """Dataset class optimized for video frame processing."""
+    def __init__(self, data_dir, classes=['background', 'object'],
+                 augmentation=None, preprocessing=None):
+        self.filenames = self._get_frame_filenames(data_dir)
+        self.image_paths = [os.path.join(data_dir, fname) for fname in self.filenames]
+        self.target_classes = [cls for cls in classes if cls.lower() != 'background']
+        self.class_values = [i for i, cls in enumerate(classes) if cls.lower() != 'background']
+        self.augmentation = augmentation
+        self.preprocessing = preprocessing
+    def _get_frame_filenames(self, directory):
+        """Returns sorted list of frame filenames."""
+        files = sorted(os.listdir(directory))
+        return [f for f in files if (('frame' in f or 'Image' in f) and
+                                   f.lower().endswith('jpg') and
+                                   not os.path.isdir(os.path.join(directory, f)))]
+    def __getitem__(self, index):
+        return InferenceDataset.__getitem__(self, index)
+    def __len__(self):
+        return len(self.filenames)

semantic-segmentation/SemanticModel/encoder_management.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import os
+import ssl
+import shutil
+import tempfile
+import hashlib
+from tqdm import tqdm
+from torch.hub import get_dir
+from urllib.request import urlopen, Request
+from segmentation_models_pytorch.encoders import (
+    resnet_encoders, dpn_encoders, vgg_encoders, senet_encoders,
+    densenet_encoders, inceptionresnetv2_encoders, inceptionv4_encoders,
+    efficient_net_encoders, mobilenet_encoders, xception_encoders,
+    timm_efficientnet_encoders, timm_resnest_encoders, timm_res2net_encoders,
+    timm_regnet_encoders, timm_sknet_encoders, timm_mobilenetv3_encoders,
+    timm_gernet_encoders
+)
+from segmentation_models_pytorch.encoders.timm_universal import TimmUniversalEncoder
+def initialize_encoders():
+    """Initialize dictionary of available encoders."""
+    available_encoders = {}
+    encoder_modules = [
+        resnet_encoders, dpn_encoders, vgg_encoders, senet_encoders,
+        densenet_encoders, inceptionresnetv2_encoders, inceptionv4_encoders,
+        efficient_net_encoders, mobilenet_encoders, xception_encoders,
+        timm_efficientnet_encoders, timm_resnest_encoders, timm_res2net_encoders,
+        timm_regnet_encoders, timm_sknet_encoders, timm_mobilenetv3_encoders,
+        timm_gernet_encoders
+    ]
+    for module in encoder_modules:
+        available_encoders.update(module)
+    try:
+        import segmentation_models_pytorch
+        from packaging import version
+        if version.parse(segmentation_models_pytorch.__version__) >= version.parse("0.3.3"):
+            from segmentation_models_pytorch.encoders.mix_transformer import mix_transformer_encoders
+            from segmentation_models_pytorch.encoders.mobileone import mobileone_encoders
+            available_encoders.update(mix_transformer_encoders)
+            available_encoders.update(mobileone_encoders)
+    except ImportError:
+        pass
+    return available_encoders
+def download_weights(url, destination, hash_prefix=None, show_progress=True):
+    """Downloads model weights with progress tracking and verification."""
+    ssl._create_default_https_context = ssl._create_unverified_context
+    req = Request(url, headers={"User-Agent": "torch.hub"})
+    response = urlopen(req)
+    content_length = response.headers.get("Content-Length")
+    file_size = int(content_length[0]) if content_length else None
+    destination = os.path.expanduser(destination)
+    temp_file = tempfile.NamedTemporaryFile(delete=False, dir=os.path.dirname(destination))
+    try:
+        hasher = hashlib.sha256() if hash_prefix else None
+        with tqdm(total=file_size, disable=not show_progress,
+                 unit='B', unit_scale=True, unit_divisor=1024) as pbar:
+            while True:
+                buffer = response.read(8192)
+                if not buffer:
+                    break
+                temp_file.write(buffer)
+                if hasher:
+                    hasher.update(buffer)
+                pbar.update(len(buffer))
+        temp_file.close()
+        if hasher and hash_prefix:
+            digest = hasher.hexdigest()
+            if digest[:len(hash_prefix)] != hash_prefix:
+                raise RuntimeError(f'Invalid hash value (expected "{hash_prefix}", got "{digest}")')
+        shutil.move(temp_file.name, destination)
+    finally:
+        temp_file.close()
+        if os.path.exists(temp_file.name):
+            os.remove(temp_file.name)
+def initialize_encoder(name, in_channels=3, depth=5, weights=None, output_stride=32, **kwargs):
+    """Initializes and returns configured encoder."""
+    encoders = initialize_encoders()
+    if name.startswith("tu-"):
+        name = name[3:]
+        return TimmUniversalEncoder(
+            name=name,
+            in_channels=in_channels,
+            depth=depth,
+            output_stride=output_stride,
+            pretrained=weights is not None,
+            **kwargs
+        )
+    try:
+        encoder_config = encoders[name]
+    except KeyError:
+        raise KeyError(f"Invalid encoder name '{name}'. Available encoders: {list(encoders.keys())}")
+    encoder_class = encoder_config["encoder"]
+    encoder_params = encoder_config["params"]
+    encoder_params.update(depth=depth)
+    if weights:
+        try:
+            weights_config = encoder_config["pretrained_settings"][weights]
+        except KeyError:
+            raise KeyError(
+                f"Invalid weights '{weights}' for encoder '{name}'. "
+                f"Available options: {list(encoder_config['pretrained_settings'].keys())}"
+            )
+        cache_dir = os.path.join(get_dir(), 'checkpoints')
+        os.makedirs(cache_dir, exist_ok=True)
+        weights_file = os.path.basename(weights_config["url"])
+        weights_path = os.path.join(cache_dir, weights_file)
+        if not os.path.exists(weights_path):
+            print(f'Downloading {weights_file}...')
+            download_weights(
+                weights_config["url"].replace("https", "http"),
+                weights_path
+            )
+    return encoder_class(**encoder_params)

semantic-segmentation/SemanticModel/evaluation_utils.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+import cv2
+import torch
+from tqdm import tqdm
+from torch.utils.data import DataLoader
+from segmentation_models_pytorch.base.modules import Activation
+from SemanticModel.data_loader import SegmentationDataset
+from SemanticModel.metrics import compute_mean_iou
+from SemanticModel.image_preprocessing import get_validation_augmentations
+def evaluate_model(model_config, data_path, image_size=None):
+    """Evaluates model performance on a dataset."""
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    classes = ['background'] + model_config.classes if model_config.background_flag else model_config.classes
+    data_path = os.path.realpath(data_path)
+    image_subdir = os.path.join(data_path, 'Images')
+    mask_subdir = os.path.join(data_path, 'Masks')
+    if not all(os.path.exists(d) for d in [image_subdir, mask_subdir]):
+        raise Exception("Missing required subdirectories: 'Images' and 'Masks'")
+    if not image_size:
+        sample_image = cv2.imread(os.path.join(image_subdir, os.listdir(image_subdir)[0]))
+        height, width = sample_image.shape[:2]
+        image_size = max(height, width)
+    evaluation_dataset = SegmentationDataset(
+        data_path,
+        classes=classes,
+        augmentation=get_validation_augmentations(
+            im_width=image_size,
+            im_height=image_size,
+            fixed_size=False
+        ),
+        preprocessing=model_config.preprocessing
+    )
+    evaluation_loader = DataLoader(
+        evaluation_dataset,
+        batch_size=1,
+        shuffle=False,
+        num_workers=2
+    )
+    model = model_config.model.to(device)
+    model.eval()
+    requires_sigmoid = False
+    if model_config.n_classes == 1:
+        current_activation = _check_activation_function(model)
+        if current_activation != 'Sigmoid':
+            requires_sigmoid = True
+    predictions = []
+    ground_truth = []
+    print("Evaluating model performance...")
+    with torch.no_grad():
+        for images, masks in tqdm(evaluation_loader):
+            images = images.to(device)
+            masks = masks.to(device)
+            outputs = model.forward(images)
+            if model_config.n_classes > 1:
+                predictions.extend([p.cpu().argmax(dim=0) for p in outputs])
+                ground_truth.extend([gt.cpu().argmax(dim=0) for gt in masks])
+            else:
+                if requires_sigmoid:
+                    predictions.extend([
+                        (torch.sigmoid(p) > 0.5).float().squeeze().cpu()
+                        for p in outputs
+                    ])
+                else:
+                    predictions.extend([
+                        (p > 0.5).float().squeeze().cpu()
+                        for p in outputs
+                    ])
+                ground_truth.extend([gt.cpu().squeeze() for gt in masks])
+    metrics = compute_mean_iou(
+        predictions,
+        ground_truth,
+        num_labels=len(classes),
+        ignore_index=255
+    )
+    print("\nEvaluation Results:")
+    print(f"Mean IoU: {metrics['mean_iou']:.3f}")
+    print("\nPer-class IoU:")
+    for idx, iou in enumerate(metrics['per_category_iou']):
+        print(f"{classes[idx]}: {iou:.3f}")
+    return metrics
+def _check_activation_function(model):
+    """Checks the activation function used in model's segmentation head."""
+    from segmentation_models_pytorch.base.modules import Activation
+    activation_functions = []
+    for _, module in model.segmentation_head.named_children():
+        if isinstance(module, Activation):
+            activation_functions.append(type(module.activation).__name__)
+    return activation_functions[-1] if activation_functions else None

semantic-segmentation/SemanticModel/image_preprocessing.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import cv2
+import numpy as np
+import albumentations as albu
+from albumentations.augmentations.geometric.resize import LongestMaxSize
+def round_pixel_dim(dimension: float) -> int:
+    """Rounds pixel dimensions consistently."""
+    if abs(round(dimension) - dimension) == 0.5:
+        return int(2.0 * round(dimension / 2.0))
+    return int(round(dimension))
+def resize_with_padding(image, target_size, stride=32, interpolation=cv2.INTER_LINEAR):
+    """Resizes image maintaining aspect ratio and ensures dimensions are stride-compatible."""
+    height, width = image.shape[:2]
+    max_dimension = max(height, width)
+    if ((height % stride == 0) and (width % stride == 0) and
+        (max_dimension <= target_size)):
+        return image
+    scale = target_size / float(max(width, height))
+    new_dims = tuple(round_pixel_dim(dim * scale) for dim in (height, width))
+    new_height, new_width = new_dims
+    new_height = ((new_height // stride + 1) * stride
+                 if new_height % stride != 0 else new_height)
+    new_width = ((new_width // stride + 1) * stride
+                if new_width % stride != 0 else new_width)
+    return cv2.resize(image, (new_width, new_height), interpolation=interpolation)
+class PaddedResize(LongestMaxSize):
+    def apply(self, img: np.ndarray, target_size: int = 1024,
+             interpolation: int = cv2.INTER_LINEAR, **params) -> np.ndarray:
+        return resize_with_padding(img, target_size=target_size, interpolation=interpolation)
+def get_training_augmentations(width=768, height=576):
+    """Configures training-time augmentations."""
+    target_size = max([width, height])
+    transforms = [
+        albu.HorizontalFlip(p=0.5),
+        albu.ShiftScaleRotate(
+            scale_limit=0.5, rotate_limit=90, shift_limit=0.1, p=0.5, border_mode=0),
+        albu.PadIfNeeded(min_height=target_size, min_width=target_size, always_apply=True),
+        albu.RandomCrop(height=target_size, width=target_size, always_apply=True),
+        albu.GaussNoise(p=0.2),
+        albu.Perspective(p=0.2),
+        albu.OneOf([albu.CLAHE(p=1), albu.RandomGamma(p=1)], p=0.33),
+        albu.OneOf([
+            albu.Sharpen(p=1),
+            albu.Blur(blur_limit=3, p=1),
+            albu.MotionBlur(blur_limit=3, p=1)], p=0.33),
+        albu.OneOf([
+            albu.RandomBrightnessContrast(p=1),
+            albu.HueSaturationValue(p=1)], p=0.33),
+    ]
+    return albu.Compose(transforms)
+def get_validation_augmentations(width=1920, height=1440, fixed_size=True):
+    """Configures validation/inference-time augmentations."""
+    if fixed_size:
+        transforms = [albu.Resize(height=height, width=width, always_apply=True)]
+        return albu.Compose(transforms)
+    target_size = max(width, height)
+    transforms = [PaddedResize(max_size=target_size, always_apply=True)]
+    return albu.Compose(transforms)
+def convert_to_tensor(x, **kwargs):
+    """Converts image array to PyTorch tensor format."""
+    if x.ndim == 2:
+        x = np.expand_dims(x, axis=-1)
+    return x.transpose(2, 0, 1).astype('float32')
+def get_preprocessing_pipeline(preprocessing_fn):
+    """Builds preprocessing pipeline including normalization and tensor conversion."""
+    transforms = [
+        albu.Lambda(image=preprocessing_fn),
+        albu.Lambda(image=convert_to_tensor, mask=convert_to_tensor),
+    ]
+    return albu.Compose(transforms)

semantic-segmentation/SemanticModel/metrics.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from typing import Dict, Optional
+import numpy as np
+def compute_intersection_union(prediction, ground_truth, num_classes, ignore_index: bool,
+                             label_mapping: Optional[Dict[int, int]] = None,
+                             reduce_labels: bool = False):
+    """Computes intersection and union for IoU calculation."""
+    if label_mapping:
+        for old_id, new_id in label_mapping.items():
+            ground_truth[ground_truth == old_id] = new_id
+    prediction = np.array(prediction)
+    ground_truth = np.array(ground_truth)
+    if reduce_labels:
+        ground_truth[ground_truth == 0] = 255
+        ground_truth = ground_truth - 1
+        ground_truth[ground_truth == 254] = 255
+    valid_mask = np.not_equal(ground_truth, ignore_index)
+    prediction = prediction[valid_mask]
+    ground_truth = ground_truth[valid_mask]
+    intersection_mask = prediction == ground_truth
+    intersection = prediction[intersection_mask]
+    area_intersection = np.histogram(intersection, bins=num_classes,
+                                   range=(0, num_classes - 1))[0]
+    area_prediction = np.histogram(prediction, bins=num_classes,
+                                 range=(0, num_classes - 1))[0]
+    area_ground_truth = np.histogram(ground_truth, bins=num_classes,
+                                   range=(0, num_classes - 1))[0]
+    area_union = area_prediction + area_ground_truth - area_intersection
+    return area_intersection, area_union, area_prediction, area_ground_truth
+def compute_total_intersection_union(predictions, ground_truths, num_classes, ignore_index: bool,
+                                   label_mapping: Optional[Dict[int, int]] = None,
+                                   reduce_labels: bool = False):
+    """Computes total intersection and union across all samples."""
+    totals = {
+        'intersection': np.zeros((num_classes,), dtype=np.float64),
+        'union': np.zeros((num_classes,), dtype=np.float64),
+        'prediction': np.zeros((num_classes,), dtype=np.float64),
+        'ground_truth': np.zeros((num_classes,), dtype=np.float64)
+    }
+    for pred, gt in zip(predictions, ground_truths):
+        intersection, union, pred_area, gt_area = compute_intersection_union(
+            pred, gt, num_classes, ignore_index, label_mapping, reduce_labels
+        )
+        totals['intersection'] += intersection
+        totals['union'] += union
+        totals['prediction'] += pred_area
+        totals['ground_truth'] += gt_area
+    return tuple(totals.values())
+def compute_mean_iou(predictions, ground_truths, num_classes, ignore_index: bool,
+                    nan_to_num: Optional[int] = None,
+                    label_mapping: Optional[Dict[int, int]] = None,
+                    reduce_labels: bool = False):
+    """Computes mean IoU and related metrics."""
+    intersection, union, prediction_area, ground_truth_area = compute_total_intersection_union(
+        predictions, ground_truths, num_classes, ignore_index, label_mapping, reduce_labels
+    )
+    metrics = {}
+    # Compute overall accuracy
+    total_accuracy = intersection.sum() / ground_truth_area.sum()
+    # Compute IoU per class
+    iou_per_class = intersection / union
+    accuracy_per_class = intersection / ground_truth_area
+    metrics.update({
+        "mean_iou": np.nanmean(iou_per_class),
+        "mean_accuracy": np.nanmean(accuracy_per_class),
+        "overall_accuracy": total_accuracy,
+        "per_category_iou": iou_per_class,
+        "per_category_accuracy": accuracy_per_class
+    })
+    if nan_to_num is not None:
+        metrics = {
+            metric: np.nan_to_num(value, nan=nan_to_num)
+            for metric, value in metrics.items()
+        }
+    return metrics

semantic-segmentation/SemanticModel/model_core.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import torch
+import torch.nn as nn
+import segmentation_models_pytorch as smp
+from segmentation_models_pytorch import utils
+from SemanticModel.encoder_management import initialize_encoder
+from SemanticModel.custom_losses import FocalLossFunction, TverskyLossFunction, EnhancedCrossEntropy
+from SemanticModel.image_preprocessing import get_preprocessing_pipeline
+class SegmentationModel:
+    def __init__(self, classes=['background', 'foreground'], architecture='unet',
+                 encoder='timm-regnety_120', weights='imagenet', loss=None):
+        self._initialize_classes(classes)
+        self.architecture = architecture
+        self.encoder = encoder
+        self.weights = weights
+        self._setup_loss_function(loss)
+        self._initialize_model()
+    def _initialize_classes(self, classes):
+        """Sets up class configuration."""
+        if len(classes) <= 2:
+            self.classes = [c for c in classes if c.lower() != 'background']
+            self.class_values = [i for i, c in enumerate(classes) if c.lower() != 'background']
+            self.background_flag = 'background' in classes
+        else:
+            self.classes = classes
+            self.class_values = list(range(len(classes)))
+            self.background_flag = False
+        self.n_classes = len(self.classes)
+    def _setup_loss_function(self, loss):
+        """Configures model's loss function."""
+        if not loss:
+            loss = 'bce_with_logits' if self.n_classes > 1 else 'dice'
+        if loss.lower() not in ['dice', 'bce_with_logits', 'focal', 'tversky']:
+            print(f'Invalid loss: {loss}, defaulting to dice')
+            loss = 'dice'
+        loss_configs = {
+            'bce_with_logits': {
+                'activation': None,
+                'loss': EnhancedCrossEntropy() if self.n_classes > 1 else utils.losses.BCEWithLogitsLoss()
+            },
+            'dice': {
+                'activation': 'softmax' if self.n_classes > 1 else 'sigmoid',
+                'loss': utils.losses.DiceLoss()
+            },
+            'focal': {
+                'activation': None,
+                'loss': FocalLossFunction()
+            },
+            'tversky': {
+                'activation': None,
+                'loss': TverskyLossFunction()
+            }
+        }
+        config = loss_configs[loss.lower()]
+        self.activation = config['activation']
+        self.loss = config['loss']
+        self.loss_name = loss
+    def _initialize_model(self):
+        """Initializes the segmentation model architecture."""
+        if self.weights.endswith('pth'):
+            self._load_pretrained_model()
+        else:
+            self._create_new_model()
+    def _load_pretrained_model(self):
+        """Loads model from pretrained weights."""
+        print('Loading pretrained model...')
+        self.model = torch.load(self.weights)
+        if isinstance(self.model, torch.nn.DataParallel):
+            self.model = self.model.module
+        try:
+            preprocessing_fn = smp.encoders.get_preprocessing_fn(self.encoder, 'imagenet')
+            self.preprocessing = get_preprocessing_pipeline(preprocessing_fn)
+        except:
+            print('Failed to configure preprocessing. Setting to None.')
+            self.preprocessing = None
+    def _create_new_model(self):
+        """Creates new model with specified architecture."""
+        preprocessing_fn = smp.encoders.get_preprocessing_fn(self.encoder, 'imagenet')
+        self.preprocessing = get_preprocessing_pipeline(preprocessing_fn)
+        initialize_encoder(name=self.encoder, weights=self.weights)
+        architectures = {
+            'unet': smp.Unet,
+            'unet++': smp.UnetPlusPlus,
+            'deeplabv3': smp.DeepLabV3,
+            'deeplabv3+': smp.DeepLabV3Plus,
+            'fpn': smp.FPN,
+            'linknet': smp.Linknet,
+            'manet': smp.MAnet,
+            'pan': smp.PAN,
+            'pspnet': smp.PSPNet
+        }
+        if self.architecture not in architectures:
+            raise ValueError(f'Unsupported architecture: {self.architecture}')
+        self.model = architectures[self.architecture](
+            encoder_name=self.encoder,
+            encoder_weights=self.weights,
+            classes=self.n_classes,
+            activation=self.activation
+        )
+    @property
+    def config_data(self):
+        """Returns model configuration data."""
+        return {
+            'architecture': self.architecture,
+            'encoder': self.encoder,
+            'weights': self.weights,
+            'activation': self.activation,
+            'loss': self.loss_name,
+            'classes': ['background'] + self.classes if self.background_flag else self.classes
+        }
+def list_architectures():
+    """Returns available architecture options."""
+    return ['unet', 'unet++', 'deeplabv3', 'deeplabv3+', 'fpn',
+            'linknet', 'manet', 'pan', 'pspnet']

semantic-segmentation/SemanticModel/prediction.py ADDED Viewed

	@@ -0,0 +1,336 @@

+import os
+import cv2
+import time
+import torch
+import imageio
+import tifffile
+import numpy as np
+import slidingwindow
+import rasterio as rio
+import geopandas as gpd
+from shapely.geometry import Polygon
+from rasterio import mask as riomask
+from torch.utils.data import DataLoader
+from SemanticModel.visualization import generate_color_mapping
+from SemanticModel.image_preprocessing import get_validation_augmentations
+from SemanticModel.data_loader import InferenceDataset, StreamingDataset
+from SemanticModel.utilities import calc_image_size, convert_coordinates
+class PredictionPipeline:
+    def __init__(self, model_config, device=None):
+        self.config = model_config
+        self.device = device or torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.classes = ['background'] + model_config.classes if model_config.background_flag else model_config.classes
+        self.colors = generate_color_mapping(len(self.classes))
+        self.model = model_config.model.to(self.device)
+        self.model.eval()
+    def _preprocess_image(self, image_path, target_size=None):
+        """Preprocesses single image for prediction."""
+        image = cv2.imread(image_path)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        height, width = image.shape[:2]
+        target_size = target_size or max(height, width)
+        test_height, test_width = calc_image_size(image, target_size)
+        augmentation = get_validation_augmentations(test_width, test_height)
+        image = augmentation(image=image)['image']
+        image = self.config.preprocessing(image=image)['image']
+        return image, (height, width)
+    def predict_single_image(self, image_path, target_size=None, output_dir=None,
+                           format='integer', save_output=True):
+        """Generates prediction for a single image."""
+        image, original_dims = self._preprocess_image(image_path, target_size)
+        x_tensor = torch.from_numpy(image).to(self.device).unsqueeze(0)
+        with torch.no_grad():
+            prediction = self.model.predict(x_tensor)
+        if self.config.n_classes > 1:
+            prediction = np.argmax(prediction.squeeze().cpu().numpy(), axis=0)
+        else:
+            prediction = prediction.squeeze().cpu().numpy().round()
+        # Resize to original dimensions if needed
+        if prediction.shape[:2] != original_dims:
+            prediction = cv2.resize(prediction, original_dims[::-1],
+                                  interpolation=cv2.INTER_NEAREST)
+        prediction = self._format_prediction(prediction, format)
+        if save_output:
+            self._save_prediction(prediction, image_path, output_dir, format)
+        return prediction
+    def predict_directory(self, input_dir, target_size=None, output_dir=None,
+                         fixed_size=True, format='integer'):
+        """Generates predictions for all images in directory."""
+        output_dir = output_dir or os.path.join(input_dir, 'predictions')
+        os.makedirs(output_dir, exist_ok=True)
+        dataset = InferenceDataset(
+            input_dir,
+            classes=self.classes,
+            augmentation=get_validation_augmentations(
+                target_size, target_size, fixed_size=fixed_size
+            ) if target_size else None,
+            preprocessing=self.config.preprocessing
+        )
+        total_images = len(dataset)
+        start_time = time.time()
+        for idx in range(total_images):
+            if (idx + 1) % 10 == 0 or idx == total_images - 1:
+                elapsed = time.time() - start_time
+                print(f'\rProcessed {idx+1}/{total_images} images in {elapsed:.1f}s',
+                      end='')
+            image, height, width = dataset[idx]
+            filename = dataset.filenames[idx]
+            x_tensor = torch.from_numpy(image).to(self.device).unsqueeze(0)
+            with torch.no_grad():
+                prediction = self.model.predict(x_tensor)
+            if self.config.n_classes > 1:
+                prediction = np.argmax(prediction.squeeze().cpu().numpy(), axis=0)
+            else:
+                prediction = prediction.squeeze().cpu().numpy().round()
+            if prediction.shape != (height, width):
+                prediction = cv2.resize(prediction, (width, height),
+                                     interpolation=cv2.INTER_NEAREST)
+            prediction = self._format_prediction(prediction, format)
+            self._save_prediction(prediction, filename, output_dir, format)
+        print(f'\nPredictions saved to: {output_dir}')
+        return output_dir
+    def predict_raster(self, raster_path, tile_size=1024, overlap=0.175,
+                      boundary_path=None, output_path=None, format='integer'):
+        """Processes large raster images using tiling approach."""
+        print('Loading raster...')
+        with rio.open(raster_path) as src:
+            raster = src.read()
+            raster = np.moveaxis(raster, 0, 2)[:,:,:3]
+            profile = src.profile
+            transform = src.transform
+        if boundary_path:
+            boundary = gpd.read_file(boundary_path)
+            boundary = boundary.to_crs(profile['crs'])
+            boundary_geom = boundary.iloc[0].geometry
+        tiles = slidingwindow.generate(
+            raster,
+            slidingwindow.DimOrder.HeightWidthChannel,
+            tile_size,
+            overlap
+        )
+        pred_raster = np.zeros_like(raster[:,:,0], dtype='uint8')
+        confidence = np.zeros_like(pred_raster, dtype=np.float32)
+        aug = get_validation_augmentations(tile_size, tile_size, fixed_size=False)
+        for idx, tile in enumerate(tiles):
+            if (idx + 1) % 10 == 0 or idx == len(tiles) - 1:
+                print(f'\rProcessed {idx+1}/{len(tiles)} tiles', end='')
+            bounds = tile.indices()
+            tile_image = raster[bounds[0], bounds[1]]
+            if boundary_path:
+                corners = [
+                    convert_coordinates(transform, bounds[1].start, bounds[0].start),
+                    convert_coordinates(transform, bounds[1].stop, bounds[0].start),
+                    convert_coordinates(transform, bounds[1].stop, bounds[0].stop),
+                    convert_coordinates(transform, bounds[1].start, bounds[0].stop)
+                ]
+                if not Polygon(corners).intersects(boundary_geom):
+                    continue
+            processed = aug(image=tile_image)['image']
+            processed = self.config.preprocessing(image=processed)['image']
+            x_tensor = torch.from_numpy(processed).to(self.device).unsqueeze(0)
+            with torch.no_grad():
+                prediction = self.model.predict(x_tensor)
+                prediction = prediction.squeeze().cpu().numpy()
+            if self.config.n_classes > 1:
+                tile_pred = np.argmax(prediction, axis=0)
+                tile_conf = np.max(prediction, axis=0)
+            else:
+                tile_conf = np.abs(prediction - 0.5)
+                tile_pred = prediction.round()
+            if tile_pred.shape != tile_image.shape[:2]:
+                tile_pred = cv2.resize(tile_pred, tile_image.shape[:2][::-1],
+                                     interpolation=cv2.INTER_NEAREST)
+                tile_conf = cv2.resize(tile_conf, tile_image.shape[:2][::-1],
+                                     interpolation=cv2.INTER_LINEAR)
+            # Update prediction and confidence maps
+            existing_conf = confidence[bounds[0], bounds[1]]
+            existing_pred = pred_raster[bounds[0], bounds[1]]
+            mask = existing_conf < tile_conf
+            existing_pred[mask] = tile_pred[mask]
+            existing_conf[mask] = tile_conf[mask]
+            pred_raster[bounds[0], bounds[1]] = existing_pred
+            confidence[bounds[0], bounds[1]] = existing_conf
+        pred_raster = self._format_prediction(pred_raster, format)
+        if output_path or boundary_path:
+            self._save_raster_prediction(
+                pred_raster, raster_path, output_path,
+                profile, boundary_geom if boundary_path else None
+            )
+        return pred_raster, profile
+    def _format_prediction(self, prediction, format):
+        """Formats prediction according to specified output type."""
+        if format == 'integer':
+            return prediction.astype('uint8')
+        elif format == 'color':
+            return self._apply_color_mapping(prediction)
+        else:
+            raise ValueError(f"Unsupported format: {format}")
+    def _save_prediction(self, prediction, source_path, output_dir, format):
+        """Saves prediction to disk."""
+        filename = os.path.splitext(os.path.basename(source_path))[0]
+        output_path = os.path.join(output_dir, f"{filename}_pred.png")
+        cv2.imwrite(output_path, prediction)
+    def _save_raster_prediction(self, prediction, source_path, output_path,
+                              profile, boundary=None):
+        """Saves raster prediction with geospatial information."""
+        output_path = output_path or source_path.replace(
+            os.path.splitext(source_path)[1], '_predicted.tif'
+        )
+        profile.update(
+            dtype='uint8',
+            count=3 if prediction.ndim == 3 else 1
+        )
+        with rio.open(output_path, 'w', **profile) as dst:
+            if prediction.ndim == 3:
+                for i in range(3):
+                    dst.write(prediction[:,:,i], i+1)
+            else:
+                dst.write(prediction, 1)
+        if boundary:
+            with rio.open(output_path) as src:
+                cropped, transform = riomask.mask(src, [boundary], crop=True)
+                profile.update(
+                    height=cropped.shape[1],
+                    width=cropped.shape[2],
+                    transform=transform
+                )
+            os.remove(output_path)
+            with rio.open(output_path, 'w', **profile) as dst:
+                dst.write(cropped)
+        print(f'\nPrediction saved to: {output_path}')
+    def predict_video_frames(self, input_dir, target_size=None, output_dir=None):
+        """Processes video frames with specialized visualization."""
+        output_dir = output_dir or os.path.join(input_dir, 'predictions')
+        os.makedirs(output_dir, exist_ok=True)
+        dataset = StreamingDataset(
+            input_dir,
+            classes=self.classes,
+            augmentation=get_validation_augmentations(
+                target_size, target_size
+            ) if target_size else None,
+            preprocessing=self.config.preprocessing
+        )
+        image = cv2.imread(dataset.image_paths[0])
+        height, width = image.shape[:2]
+        white = 255 * np.ones((height, width))
+        black = np.zeros_like(white)
+        red = np.dstack((white, black, black))
+        blue = np.dstack((black, black, white))
+        # Pre-compute rotated versions
+        rotated_red = np.rot90(red)
+        rotated_blue = np.rot90(blue)
+        total_frames = len(dataset)
+        start_time = time.time()
+        for idx in range(total_frames):
+            if (idx + 1) % 10 == 0 or idx == total_frames - 1:
+                elapsed = time.time() - start_time
+                print(f'\rProcessed {idx+1}/{total_frames} frames in {elapsed:.1f}s', end='')
+            frame, height, width = dataset[idx]
+            filename = dataset.filenames[idx]
+            x_tensor = torch.from_numpy(frame).to(self.device).unsqueeze(0)
+            with torch.no_grad():
+                prediction = self.model.predict(x_tensor)
+            if self.config.n_classes > 1:
+                prediction = np.argmax(prediction.squeeze().cpu().numpy(), axis=0)
+                masks = [prediction == i for i in range(1, self.config.n_classes)]
+            else:
+                prediction = prediction.squeeze().cpu().numpy().round()
+                masks = [prediction == 1]
+            if prediction.shape != (height, width):
+                prediction = cv2.resize(prediction, (width, height),
+                                     interpolation=cv2.INTER_NEAREST)
+            original = cv2.imread(os.path.join(input_dir, filename))
+            original = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)
+            try:
+                for i, mask in enumerate(masks):
+                    color = red if i == 0 else blue
+                    rotated_color = rotated_red if i == 0 else rotated_blue
+                    try:
+                        original[mask,:] = 0.45*original[mask,:] + 0.55*color[mask,:]
+                    except:
+                        original[mask,:] = 0.45*original[mask,:] + 0.55*rotated_color[mask,:]
+            except:
+                print(f"\nWarning: Error processing frame {filename}")
+                continue
+            output_path = os.path.join(output_dir, filename)
+            imageio.imwrite(output_path, original, quality=100)
+        print(f'\nProcessed frames saved to: {output_dir}')
+        return output_dir
+    def _apply_color_mapping(self, prediction):
+        """Applies color mapping to prediction."""
+        height, width = prediction.shape
+        colored = np.zeros((height, width, 3), dtype='uint8')
+        for i, class_name in enumerate(self.classes):
+            if class_name.lower() == 'background':
+                continue
+            color = self.colors[i]
+            colored[prediction == i] = color
+        return colored

semantic-segmentation/SemanticModel/training.py ADDED Viewed

	@@ -0,0 +1,313 @@

+import os
+import json
+import torch
+import wandb
+import datetime
+import numpy as np
+from tqdm import tqdm
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard import SummaryWriter
+from segmentation_models_pytorch.base.modules import Activation
+from SemanticModel.data_loader import SegmentationDataset
+from SemanticModel.metrics import compute_mean_iou
+from SemanticModel.image_preprocessing import get_training_augmentations, get_validation_augmentations
+from SemanticModel.utilities import list_images, validate_dimensions
+class ModelTrainer:
+    def __init__(self, model_config, root_dir, epochs=40, train_size=1024,
+                 val_size=None, workers=2, batch_size=2, learning_rate=1e-4,
+                 step_count=2, decay_factor=0.8, wandb_config=None,
+                 optimizer='rmsprop', target_class=None, resume_path=None):
+        self.config = model_config
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.root_dir = root_dir
+        self._initialize_training_params(epochs, train_size, val_size, workers,
+                                      batch_size, learning_rate, step_count,
+                                      decay_factor, optimizer, target_class)
+        self._setup_directories()
+        self._initialize_datasets()
+        self._setup_optimizer()
+        self._initialize_tracking()
+        if resume_path:
+            self._resume_training(resume_path)
+    def _initialize_training_params(self, epochs, train_size, val_size, workers,
+                                  batch_size, learning_rate, step_count,
+                                  decay_factor, optimizer, target_class):
+        self.epochs = epochs
+        self.train_size = train_size
+        self.val_size = val_size
+        self.workers = workers
+        self.batch_size = batch_size
+        self.learning_rate = learning_rate
+        self.step_schedule = self._calculate_step_schedule(epochs, step_count)
+        self.decay_factor = decay_factor
+        self.optimizer_type = optimizer
+        self.target_class = target_class
+        self.current_epoch = 1
+        self.best_iou = 0.0
+        self.best_epoch = 0
+        self.classes = ['background'] + self.config.classes if self.config.background_flag else self.config.classes
+    def _setup_directories(self):
+        """Verifies and creates necessary directories."""
+        self.train_dir = os.path.join(self.root_dir, 'train')
+        self.val_dir = os.path.join(self.root_dir, 'val')
+        required_subdirs = ['Images', 'Masks']
+        for path in [self.train_dir] + ([self.val_dir] if os.path.exists(self.val_dir) else []):
+            for subdir in required_subdirs:
+                full_path = os.path.join(path, subdir)
+                if not os.path.exists(full_path):
+                    raise FileNotFoundError(f"Missing directory: {full_path}")
+    def _initialize_datasets(self):
+        """Sets up training and validation datasets."""
+        self.train_dataset = SegmentationDataset(
+            self.train_dir,
+            classes=self.classes,
+            augmentation=get_training_augmentations(self.train_size, self.train_size),
+            preprocessing=self.config.preprocessing
+        )
+        if os.path.exists(self.val_dir):
+            self.val_dataset = SegmentationDataset(
+                self.val_dir,
+                classes=self.classes,
+                augmentation=get_validation_augmentations(
+                    self.val_size or self.train_size,
+                    self.val_size or self.train_size,
+                    fixed_size=False
+                ),
+                preprocessing=self.config.preprocessing
+            )
+            self.val_loader = DataLoader(
+                self.val_dataset,
+                batch_size=1,
+                shuffle=False,
+                num_workers=self.workers
+            )
+        else:
+            self.val_dataset = self.train_dataset
+            self.val_loader = DataLoader(
+                self.val_dataset,
+                batch_size=1,
+                shuffle=False,
+                num_workers=self.workers
+            )
+        self.train_loader = DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            shuffle=True,
+            num_workers=self.workers
+        )
+    def _setup_optimizer(self):
+        """Configures model optimizer."""
+        optimizer_map = {
+            'adam': torch.optim.Adam,
+            'sgd': lambda params: torch.optim.SGD(params, momentum=0.9),
+            'rmsprop': torch.optim.RMSprop
+        }
+        optimizer_class = optimizer_map.get(self.optimizer_type.lower())
+        if not optimizer_class:
+            raise ValueError(f"Unsupported optimizer: {self.optimizer_type}")
+        self.optimizer = optimizer_class([{'params': self.config.model.parameters(),
+                                         'lr': self.learning_rate}])
+    def _initialize_tracking(self):
+        """Sets up training progress tracking."""
+        timestamp = datetime.datetime.now().strftime("%m-%d-%Y_%H%M%S")
+        self.output_dir = os.path.join(
+            self.root_dir,
+            f'model_outputs-{self.config.architecture}[{self.config.encoder}]-{timestamp}'
+        )
+        os.makedirs(self.output_dir, exist_ok=True)
+        self.writer = SummaryWriter(log_dir=self.output_dir)
+        self.metrics = {
+            'best_epoch': self.best_epoch,
+            'best_epoch_iou': self.best_iou,
+            'last_epoch': 0,
+            'last_epoch_iou': 0.0,
+            'last_epoch_lr': self.learning_rate,
+            'step_schedule': self.step_schedule,
+            'decay_factor': self.decay_factor,
+            'target_class': self.target_class or 'overall'
+        }
+    def _calculate_step_schedule(self, epochs, steps):
+        """Calculates learning rate step schedule."""
+        return list(map(int, np.linspace(0, epochs, steps + 2)[1:-1]))
+    def train(self):
+        """Executes training loop."""
+        model = self.config.model.to(self.device)
+        if torch.cuda.device_count() > 1:
+            model = torch.nn.DataParallel(model)
+            print(f'Using {torch.cuda.device_count()} GPUs')
+        self._save_config()
+        for epoch in range(self.current_epoch, self.epochs + 1):
+            print(f'\nEpoch {epoch}/{self.epochs}')
+            print(f'Learning rate: {self.optimizer.param_groups[0]["lr"]:.3e}')
+            train_loss = self._train_epoch(model)
+            val_loss, val_metrics = self._validate_epoch(model)
+            self._update_tracking(epoch, train_loss, val_loss, val_metrics)
+            self._adjust_learning_rate(epoch)
+            self._save_checkpoints(model, epoch, val_metrics)
+        print(f'\nTraining completed. Best {self.metrics["target_class"]} IoU: {self.best_iou:.3f}')
+        return model, self.metrics
+    def _train_epoch(self, model):
+        """Executes single training epoch."""
+        model.train()
+        total_loss = 0
+        sample_count = 0
+        for batch in tqdm(self.train_loader, desc='Training'):
+            images, masks = [x.to(self.device) for x in batch]
+            self.optimizer.zero_grad()
+            outputs = model(images)
+            loss = self.config.loss(outputs, masks)
+            loss.backward()
+            self.optimizer.step()
+            total_loss += loss.item() * len(images)
+            sample_count += len(images)
+        return total_loss / sample_count
+    def _validate_epoch(self, model):
+        """Executes validation pass."""
+        model.eval()
+        total_loss = 0
+        predictions = []
+        ground_truth = []
+        with torch.no_grad():
+            for batch in tqdm(self.val_loader, desc='Validation'):
+                images, masks = [x.to(self.device) for x in batch]
+                outputs = model(images)
+                loss = self.config.loss(outputs, masks)
+                total_loss += loss.item()
+                if self.config.n_classes > 1:
+                    predictions.extend([p.cpu().argmax(dim=0) for p in outputs])
+                    ground_truth.extend([m.cpu().argmax(dim=0) for m in masks])
+                else:
+                    predictions.extend([(torch.sigmoid(p) > 0.5).float().squeeze().cpu()
+                                     for p in outputs])
+                    ground_truth.extend([m.cpu().squeeze() for m in masks])
+        metrics = compute_mean_iou(
+            predictions,
+            ground_truth,
+            num_classes=len(self.classes),
+            ignore_index=255
+        )
+        return total_loss / len(self.val_loader), metrics
+    def _update_tracking(self, epoch, train_loss, val_loss, val_metrics):
+        """Updates training metrics and logging."""
+        mean_iou = val_metrics['mean_iou']
+        print(f"\nLosses - Train: {train_loss:.3f}, Val: {val_loss:.3f}")
+        print(f"Mean IoU: {mean_iou:.3f}")
+        self.writer.add_scalar('Loss/train', train_loss, epoch)
+        self.writer.add_scalar('Loss/val', val_loss, epoch)
+        self.writer.add_scalar('IoU/mean', mean_iou, epoch)
+        for idx, iou in enumerate(val_metrics['per_category_iou']):
+            print(f"{self.classes[idx]} IoU: {iou:.3f}")
+            self.writer.add_scalar(f'IoU/{self.classes[idx]}', iou, epoch)
+    def _adjust_learning_rate(self, epoch):
+        """Adjusts learning rate according to schedule."""
+        if epoch in self.step_schedule:
+            current_lr = self.optimizer.param_groups[0]['lr']
+            new_lr = current_lr * self.decay_factor
+            for param_group in self.optimizer.param_groups:
+                param_group['lr'] = new_lr
+            print(f'\nDecreased learning rate: {current_lr:.3e} -> {new_lr:.3e}')
+    def _save_checkpoints(self, model, epoch, metrics):
+        """Saves model checkpoints and metrics."""
+        epoch_iou = (metrics['mean_iou'] if self.target_class is None
+                    else metrics['per_category_iou'][self.classes.index(self.target_class)])
+        self.metrics.update({
+            'last_epoch': epoch,
+            'last_epoch_iou': round(float(epoch_iou), 3),
+            'last_epoch_lr': self.optimizer.param_groups[0]['lr']
+        })
+        if epoch_iou > self.best_iou:
+            self.best_iou = epoch_iou
+            self.best_epoch = epoch
+            self.metrics.update({
+                'best_epoch': epoch,
+                'best_epoch_iou': round(float(epoch_iou), 3),
+                'overall_iou': round(float(metrics['mean_iou']), 3)
+            })
+            torch.save(model, os.path.join(self.output_dir, 'best_model.pth'))
+            print(f'New best model saved (IoU: {epoch_iou:.3f})')
+        torch.save(model, os.path.join(self.output_dir, 'last_model.pth'))
+        with open(os.path.join(self.output_dir, 'metrics.json'), 'w') as f:
+            json.dump(self.metrics, f, indent=4)
+    def _save_config(self):
+        """Saves training configuration."""
+        config = {
+            **self.config.config_data,
+            'train_size': self.train_size,
+            'val_size': self.val_size,
+            'epochs': self.epochs,
+            'batch_size': self.batch_size,
+            'optimizer': self.optimizer_type,
+            'workers': self.workers,
+            'target_class': self.target_class or 'overall'
+        }
+        with open(os.path.join(self.output_dir, 'config.json'), 'w') as f:
+            json.dump(config, f, indent=4)
+    def _resume_training(self, resume_path):
+        """Resumes training from checkpoint."""
+        if not os.path.exists(resume_path):
+            raise FileNotFoundError(f"Resume path not found: {resume_path}")
+        required_files = {
+            'model': 'last_model.pth',
+            'metrics': 'metrics.json',
+            'config': 'config.json'
+        }
+        paths = {k: os.path.join(resume_path, v) for k, v in required_files.items()}
+        if not all(os.path.exists(p) for p in paths.values()):
+            raise FileNotFoundError("Missing required checkpoint files")
+        with open(paths['config']) as f:
+            config = json.load(f)
+        with open(paths['metrics']) as f:
+            metrics = json.load(f)
+        self.current_epoch = metrics['last_epoch'] + 1
+        self.best_iou = metrics['best_epoch_iou']
+        self.best_epoch = metrics['best_epoch']
+        self.learning_rate = metrics['last_epoch_lr']
+        print(f'Resuming training from epoch {self.current_epoch}')

semantic-segmentation/SemanticModel/utilities.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import os
+import cv2
+import shutil
+import imageio
+import numpy as np
+from glob import glob
+from pathlib import Path
+from typing import List, Tuple, Optional
+def validate_dimensions(width: int, height: int, stride: int = 32) -> Tuple[int, int]:
+    if height % stride != 0 or width % stride != 0:
+        new_height = ((height // stride + 1) * stride
+                     if height % stride != 0 else height)
+        new_width = ((width // stride + 1) * stride
+                    if width % stride != 0 else width)
+        print(f'Adjusted dimensions to: {new_height}H x {new_width}W')
+    return width, height
+def calc_image_size(image: np.ndarray, target_size: int) -> Tuple[int, int]:
+    height, width = image.shape[:2]
+    aspect_ratio = width / height
+    if aspect_ratio >= 1:
+        new_width = target_size
+        new_height = int(target_size / aspect_ratio)
+    else:
+        new_height = target_size
+        new_width = int(target_size * aspect_ratio)
+    return validate_dimensions(new_width, new_height)
+def convert_coordinates(transform: np.ndarray, x: float, y: float) -> Tuple[float, float]:
+    transformed = transform @ np.array([x, y, 1])
+    return transformed[0], transformed[1]
+def list_images(directory: str, mask_format: bool = False) -> List[str]:
+    extensions = ['*.png', '*.PNG'] if mask_format else [
+        '*.jpg', '*.jpeg', '*.png', '*.tif', '*.tiff',
+        '*.JPG', '*.JPEG', '*.PNG', '*.TIF', '*.TIFF'
+    ]
+    image_paths = []
+    for ext in extensions:
+        image_paths.extend(glob(os.path.join(directory, ext)))
+    return sorted(list(set(image_paths)))
+def prepare_dataset_split(root_dir: str, train_ratio: float = 0.7,
+                         generate_empty_masks: bool = False) -> None:
+    image_dir = os.path.join(root_dir, 'Images')
+    mask_dir = os.path.join(root_dir, 'Masks')
+    if not all(os.path.exists(d) for d in [image_dir, mask_dir]):
+        raise Exception("Required 'Images' and 'Masks' directories not found")
+    image_paths = np.array(list_images(image_dir))
+    mask_paths = np.array(list_images(mask_dir, mask_format=True))
+    if generate_empty_masks:
+        temp_dir = os.path.join(mask_dir, 'temp')
+        create_empty_masks(image_dir, outdir=temp_dir)
+        for mask_path in list_images(temp_dir, mask_format=True):
+            target_path = os.path.join(mask_dir, os.path.basename(mask_path))
+            if not os.path.exists(target_path):
+                shutil.move(mask_path, target_path)
+        shutil.rmtree(temp_dir)
+        mask_paths = np.array(list_images(mask_dir, mask_format=True))
+    if len(image_paths) != len(mask_paths):
+        raise Exception(f"Unmatched images ({len(image_paths)}) and masks ({len(mask_paths)})")
+    train_ratio = float(train_ratio)
+    if not (0 < train_ratio <= 1):
+        raise ValueError(f"Invalid train ratio: {train_ratio}")
+    train_size = int(np.floor(train_ratio * len(image_paths)))
+    indices = np.random.permutation(len(image_paths))
+    splits = {
+        'train': {'indices': indices[:train_size]},
+        'val': {'indices': indices[train_size:]} if train_ratio < 1 else None
+    }
+    for split_name, split_data in splits.items():
+        if split_data is None:
+            continue
+        split_dir = os.path.join(root_dir, split_name)
+        for subdir in ['Images', 'Masks']:
+            subdir_path = os.path.join(split_dir, subdir)
+            os.makedirs(subdir_path, exist_ok=True)
+            sources = image_paths if subdir == 'Images' else mask_paths
+            for idx in split_data['indices']:
+                source = sources[idx]
+                destination = os.path.join(subdir_path, os.path.basename(source))
+                shutil.copyfile(source, destination)
+        print(f"Created {split_name} split with {len(split_data['indices'])} samples")
+def create_empty_masks(image_dir: str, pixel_value: int = 0,
+                      outdir: Optional[str] = None) -> str:
+    outdir = outdir or os.path.join(image_dir, 'Masks')
+    os.makedirs(outdir, exist_ok=True)
+    image_paths = list_images(image_dir)
+    print(f"Generating {len(image_paths)} empty masks...")
+    for image_path in image_paths:
+        image = imageio.imread(image_path)
+        mask = np.full((image.shape[0], image.shape[1]), pixel_value, dtype='uint8')
+        output_path = os.path.join(outdir,
+                                 f"{Path(image_path).stem}.png")
+        imageio.imwrite(output_path, mask)
+    return outdir

semantic-segmentation/SemanticModel/visualization.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+import torch
+def plot_predictions(model, images, masks, device, num_samples=4):
+    """Visualize model predictions against ground truth."""
+    with torch.no_grad():
+        model.eval()
+        predictions = model.predict(images.to(device))
+    fig, axes = plt.subplots(num_samples, 3, figsize=(12, 4*num_samples))
+    for idx in range(num_samples):
+        # Original image
+        img = images[idx].permute(1, 2, 0).cpu().numpy()
+        axes[idx, 0].imshow(img)
+        axes[idx, 0].set_title('Original Image')
+        # Ground truth
+        truth = masks[idx].argmax(dim=0).cpu().numpy()
+        axes[idx, 1].imshow(truth, cmap='tab20')
+        axes[idx, 1].set_title('Ground Truth')
+        # Prediction
+        pred = predictions[idx].argmax(dim=0).cpu().numpy()
+        axes[idx, 2].imshow(pred, cmap='tab20')
+        axes[idx, 2].set_title('Prediction')
+        for ax in axes[idx]:
+            ax.axis('off')
+    plt.tight_layout()
+    return fig
+def create_overlay_mask(image, mask, alpha=0.5, color_map=None):
+    """Create transparent overlay of segmentation mask on image."""
+    if color_map is None:
+        color_map = {
+            0: [0, 0, 0],      # background
+            1: [255, 0, 0],    # class 1 (red)
+            2: [0, 255, 0],    # class 2 (green)
+            3: [0, 0, 255],    # class 3 (blue)
+        }
+    overlay = image.copy()
+    mask_colored = np.zeros_like(image)
+    for label, color in color_map.items():
+        mask_colored[mask == label] = color
+    cv2.addWeighted(mask_colored, alpha, overlay, 1 - alpha, 0, overlay)
+    return overlay
+def plot_training_history(history):
+    """Plot training and validation metrics."""
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
+    # Loss plot
+    ax1.plot(history['train_loss'], label='Training Loss')
+    ax1.plot(history['val_loss'], label='Validation Loss')
+    ax1.set_xlabel('Epoch')
+    ax1.set_ylabel('Loss')
+    ax1.set_title('Training and Validation Loss')
+    ax1.legend()
+    # IoU plot
+    ax2.plot(history['mean_iou'], label='Mean IoU')
+    for class_name, ious in history['class_ious'].items():
+        ax2.plot(ious, label=f'{class_name} IoU')
+    ax2.set_xlabel('Epoch')
+    ax2.set_ylabel('IoU')
+    ax2.set_title('IoU Metrics')
+    ax2.legend()
+    plt.tight_layout()
+    return fig
+def visualize_predictions_on_batch(model, batch_images, batch_size=8):
+    """Create grid visualization for a batch of predictions."""
+    with torch.no_grad():
+        predictions = model.predict(batch_images)
+    fig = plt.figure(figsize=(15, 5))
+    for idx in range(min(batch_size, len(batch_images))):
+        plt.subplot(2, 4, idx + 1)
+        img = batch_images[idx].permute(1, 2, 0).cpu().numpy()
+        mask = predictions[idx].argmax(dim=0).cpu().numpy()
+        overlay = create_overlay_mask(img, mask)
+        plt.imshow(overlay)
+        plt.axis('off')
+    plt.tight_layout()
+    return fig
+def save_visualization(fig, save_path):
+    """Save visualization figure."""
+    fig.savefig(save_path, bbox_inches='tight', dpi=300)
+    plt.close(fig)
+def generate_color_mapping(num_classes):
+    """Generate distinct colors for segmentation classes."""
+    colors = [
+        [0, 0, 0],       # Background (black)
+        [255, 0, 0],     # Red
+        [0, 255, 0],     # Green
+        [0, 0, 255],     # Blue
+        [255, 255, 0],   # Yellow
+        [255, 0, 255],   # Magenta
+        [0, 255, 255],   # Cyan
+        [128, 0, 0],     # Dark Red
+        [0, 128, 0],     # Dark Green
+        [0, 0, 128]      # Dark Blue
+    ]
+    return colors[:num_classes]

setup.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# setup.py
+from setuptools import setup, find_packages
+setup(
+    name="SemanticModel",
+    version="0.1.0",
+    description="Deep learning framework for semantic segmentation",
+    author="Your Name",
+    packages=find_packages(),
+    python_requires=">=3.8",
+    install_requires=[
+        'torch',
+        'torchvision',
+        'tensorboard',
+        'pyproj',
+        'fiona==1.8.20',
+        'rtree',
+        'geopandas',
+        'rasterio',
+        'slidingwindow',
+        'opencv-python',
+        'wandb',
+        'tifffile',
+        'imagecodecs',
+        'albumentations',
+        'segmentation-models-pytorch>=0.3.3'
+    ],
+    classifiers=[
+        "Development Status :: 3 - Alpha",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "License :: OSI Approved :: MIT License",
+        "Programming Language :: Python :: 3.8",
+    ],
+)