Working towards Keras 3 Model

Files changed (8) hide show

.python-version +1 -0
README.md +4 -2
main.py +6 -0
myoquant-sdh-train.ipynb +0 -0
pyproject.toml +20 -0
random_brightness.py +0 -345
sdh_embedding_umap.ipynb +98 -90
uv.lock +0 -0

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

README.md CHANGED Viewed

@@ -35,6 +35,8 @@ model-index:
             name: Test Accuracy # Optional. Example: Test WER
 ---
 ## Model description
 <p align="center">
@@ -141,7 +143,7 @@ Test data results:
 With Tensorflow 2.10 and over:
 ```python
-model_sdh = keras.models.load_model("model.h5")
 ```
 With Tensorflow <2.10:
@@ -151,7 +153,7 @@ Then the model can easily be imported in Tensorflow/Keras using:
 ```python
 from .random_brightness import *
 model_sdh = keras.models.load_model(
-    "model.h5", custom_objects={"RandomBrightness": RandomBrightness}
 )
 ```

             name: Test Accuracy # Optional. Example: Test WER
 ---
+# TODO: UPDATE WITH LATEST INFO
 ## Model description
 <p align="center">
 With Tensorflow 2.10 and over:
 ```python
+model_sdh = keras.models.load_model("model.keras")
 ```
 With Tensorflow <2.10:
 ```python
 from .random_brightness import *
 model_sdh = keras.models.load_model(
+    "model.keras", custom_objects={"RandomBrightness": RandomBrightness}
 )
 ```

main.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def main():
+    print("Hello from myoquant-sdh-model!")
+if __name__ == "__main__":
+    main()

myoquant-sdh-train.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,20 @@

+[project]
+name = "myoquant-sdh-model"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "datasets>=3.6.0",
+    "ipywidgets>=8.1.7",
+    "jupyter>=1.1.1",
+    "keras>=3.10.0",
+    "keras-cv>=0.9.0",
+    "keras-hub>=0.21.0",
+    "matplotlib>=3.10.3",
+    "notebook>=7.4.3",
+    "scikit-learn>=1.6.1",
+    "tensorflow>=2.19.0",
+    "tensorflow-metal>=1.2.0",
+    "wandb>=0.19.11",
+]

random_brightness.py DELETED Viewed

@@ -1,345 +0,0 @@
-# @title Random Brightness Layer
-import tensorflow as tf
-from keras import backend
-from keras.engine import base_layer
-from keras.engine import base_preprocessing_layer
-from keras.layers.preprocessing import preprocessing_utils as utils
-from keras.utils import tf_utils
-from tensorflow.python.ops import stateless_random_ops
-from tensorflow.python.util.tf_export import keras_export
-from tensorflow.tools.docs import doc_controls
-@keras_export("keras.__internal__.layers.BaseImageAugmentationLayer")
-class BaseImageAugmentationLayer(base_layer.BaseRandomLayer):
-    """Abstract base layer for image augmentaion.
-    This layer contains base functionalities for preprocessing layers which
-    augment image related data, eg. image and in future, label and bounding boxes.
-    The subclasses could avoid making certain mistakes and reduce code
-    duplications.
-    This layer requires you to implement one method: `augment_image()`, which
-    augments one single image during the training. There are a few additional
-    methods that you can implement for added functionality on the layer:
-    `augment_label()`, which handles label augmentation if the layer supports
-    that.
-    `augment_bounding_box()`, which handles the bounding box augmentation, if the
-    layer supports that.
-    `get_random_transformation()`, which should produce a random transformation
-    setting. The tranformation object, which could be any type, will be passed to
-    `augment_image`, `augment_label` and `augment_bounding_box`, to coodinate
-    the randomness behavior, eg, in the RandomFlip layer, the image and
-    bounding_box should be changed in the same way.
-    The `call()` method support two formats of inputs:
-    1. Single image tensor with 3D (HWC) or 4D (NHWC) format.
-    2. A dict of tensors with stable keys. The supported keys are:
-      `"images"`, `"labels"` and `"bounding_boxes"` at the moment. We might add
-      more keys in future when we support more types of augmentation.
-    The output of the `call()` will be in two formats, which will be the same
-    structure as the inputs.
-    The `call()` will handle the logic detecting the training/inference
-    mode, unpack the inputs, forward to the correct function, and pack the output
-    back to the same structure as the inputs.
-    By default the `call()` method leverages the `tf.vectorized_map()` function.
-    Auto-vectorization can be disabled by setting `self.auto_vectorize = False`
-    in your `__init__()` method.  When disabled, `call()` instead relies
-    on `tf.map_fn()`. For example:
-    ```python
-    class SubclassLayer(BaseImageAugmentationLayer):
-      def __init__(self):
-        super().__init__()
-        self.auto_vectorize = False
-    ```
-    Example:
-    ```python
-    class RandomContrast(BaseImageAugmentationLayer):
-      def __init__(self, factor=(0.5, 1.5), **kwargs):
-        super().__init__(**kwargs)
-        self._factor = factor
-      def augment_image(self, image, transformation=None):
-        random_factor = tf.random.uniform([], self._factor[0], self._factor[1])
-        mean = tf.math.reduced_mean(inputs, axis=-1, keep_dim=True)
-        return (inputs - mean) * random_factor + mean
-    ```
-    Note that since the randomness is also a common functionnality, this layer
-    also includes a tf.keras.backend.RandomGenerator, which can be used to produce
-    the random numbers.  The random number generator is stored in the
-    `self._random_generator` attribute.
-    """
-    def __init__(self, rate=1.0, seed=None, **kwargs):
-        super().__init__(seed=seed, **kwargs)
-        self.rate = rate
-    @property
-    def auto_vectorize(self):
-        """Control whether automatic vectorization occurs.
-        By default the `call()` method leverages the `tf.vectorized_map()` function.
-        Auto-vectorization can be disabled by setting `self.auto_vectorize = False`
-        in your `__init__()` method.  When disabled, `call()` instead relies
-        on `tf.map_fn()`. For example:
-        ```python
-        class SubclassLayer(BaseImageAugmentationLayer):
-          def __init__(self):
-            super().__init__()
-            self.auto_vectorize = False
-        ```
-        """
-        return getattr(self, "_auto_vectorize", True)
-    @auto_vectorize.setter
-    def auto_vectorize(self, auto_vectorize):
-        self._auto_vectorize = auto_vectorize
-    @property
-    def _map_fn(self):
-        if self.auto_vectorize:
-            return tf.vectorized_map
-        else:
-            return tf.map_fn
-    @doc_controls.for_subclass_implementers
-    def augment_image(self, image, transformation=None):
-        """Augment a single image during training.
-        Args:
-          image: 3D image input tensor to the layer. Forwarded from `layer.call()`.
-          transformation: The transformation object produced by
-            `get_random_transformation`. Used to coordinate the randomness between
-            image, label and bounding box.
-        Returns:
-          output 3D tensor, which will be forward to `layer.call()`.
-        """
-        raise NotImplementedError()
-    @doc_controls.for_subclass_implementers
-    def augment_label(self, label, transformation=None):
-        """Augment a single label during training.
-        Args:
-          label: 1D label to the layer. Forwarded from `layer.call()`.
-          transformation: The transformation object produced by
-            `get_random_transformation`. Used to coordinate the randomness between
-            image, label and bounding box.
-        Returns:
-          output 1D tensor, which will be forward to `layer.call()`.
-        """
-        raise NotImplementedError()
-    @doc_controls.for_subclass_implementers
-    def augment_bounding_box(self, bounding_box, transformation=None):
-        """Augment bounding boxes for one image during training.
-        Args:
-          bounding_box: 2D bounding boxes to the layer. Forwarded from `call()`.
-          transformation: The transformation object produced by
-            `get_random_transformation`. Used to coordinate the randomness between
-            image, label and bounding box.
-        Returns:
-          output 2D tensor, which will be forward to `layer.call()`.
-        """
-        raise NotImplementedError()
-    @doc_controls.for_subclass_implementers
-    def get_random_transformation(self, image=None, label=None, bounding_box=None):
-        """Produce random transformation config for one single input.
-        This is used to produce same randomness between image/label/bounding_box.
-        Args:
-          image: 3D image tensor from inputs.
-          label: optional 1D label tensor from inputs.
-          bounding_box: optional 2D bounding boxes tensor from inputs.
-        Returns:
-          Any type of object, which will be forwarded to `augment_image`,
-          `augment_label` and `augment_bounding_box` as the `transformation`
-          parameter.
-        """
-        return None
-    def call(self, inputs, training=True):
-        inputs = self._ensure_inputs_are_compute_dtype(inputs)
-        if training:
-            inputs, is_dict = self._format_inputs(inputs)
-            images = inputs["images"]
-            if images.shape.rank == 3:
-                return self._format_output(self._augment(inputs), is_dict)
-            elif images.shape.rank == 4:
-                return self._format_output(self._batch_augment(inputs), is_dict)
-            else:
-                raise ValueError(
-                    "Image augmentation layers are expecting inputs to be "
-                    "rank 3 (HWC) or 4D (NHWC) tensors. Got shape: "
-                    f"{images.shape}"
-                )
-        else:
-            return inputs
-    def _augment(self, inputs):
-        image = inputs.get("images", None)
-        label = inputs.get("labels", None)
-        bounding_box = inputs.get("bounding_boxes", None)
-        transformation = self.get_random_transformation(
-            image=image, label=label, bounding_box=bounding_box
-        )  # pylint: disable=assignment-from-none
-        image = self.augment_image(image, transformation=transformation)
-        result = {"images": image}
-        if label is not None:
-            label = self.augment_label(label, transformation=transformation)
-            result["labels"] = label
-        if bounding_box is not None:
-            bounding_box = self.augment_bounding_box(
-                bounding_box, transformation=transformation
-            )
-            result["bounding_boxes"] = bounding_box
-        return result
-    def _batch_augment(self, inputs):
-        return self._map_fn(self._augment, inputs)
-    def _format_inputs(self, inputs):
-        if tf.is_tensor(inputs):
-            # single image input tensor
-            return {"images": inputs}, False
-        elif isinstance(inputs, dict):
-            # TODO(scottzhu): Check if it only contains the valid keys
-            return inputs, True
-        else:
-            raise ValueError(
-                f"Expect the inputs to be image tensor or dict. Got {inputs}"
-            )
-    def _format_output(self, output, is_dict):
-        if not is_dict:
-            return output["images"]
-        else:
-            return output
-    def _ensure_inputs_are_compute_dtype(self, inputs):
-        if isinstance(inputs, dict):
-            inputs["images"] = utils.ensure_tensor(inputs["images"], self.compute_dtype)
-        else:
-            inputs = utils.ensure_tensor(inputs, self.compute_dtype)
-        return inputs
-@keras_export("keras.layers.RandomBrightness", v1=[])
-class RandomBrightness(BaseImageAugmentationLayer):
-    """A preprocessing layer which randomly adjusts brightness during training.
-    This layer will randomly increase/reduce the brightness for the input RGB
-    images. At inference time, the output will be identical to the input.
-    Call the layer with `training=True` to adjust the brightness of the input.
-    Note that different brightness adjustment factors
-    will be apply to each the images in the batch.
-    For an overview and full list of preprocessing layers, see the preprocessing
-    [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers).
-    Args:
-      factor: Float or a list/tuple of 2 floats between -1.0 and 1.0. The
-        factor is used to determine the lower bound and upper bound of the
-        brightness adjustment. A float value will be chosen randomly between
-        the limits. When -1.0 is chosen, the output image will be black, and
-        when 1.0 is chosen, the image will be fully white. When only one float
-        is provided, eg, 0.2, then -0.2 will be used for lower bound and 0.2
-        will be used for upper bound.
-      value_range: Optional list/tuple of 2 floats for the lower and upper limit
-        of the values of the input data. Defaults to [0.0, 255.0]. Can be changed
-        to e.g. [0.0, 1.0] if the image input has been scaled before this layer.
-        The brightness adjustment will be scaled to this range, and the
-        output values will be clipped to this range.
-      seed: optional integer, for fixed RNG behavior.
-    Inputs: 3D (HWC) or 4D (NHWC) tensor, with float or int dtype. Input pixel
-      values can be of any range (e.g. `[0., 1.)` or `[0, 255]`)
-    Output: 3D (HWC) or 4D (NHWC) tensor with brightness adjusted based on the
-      `factor`. By default, the layer will output floats. The output value will
-      be clipped to the range `[0, 255]`, the valid range of RGB colors, and
-      rescaled based on the `value_range` if needed.
-    Sample usage:
-    ```python
-    random_bright = tf.keras.layers.RandomBrightness(factor=0.2)
-    # An image with shape [2, 2, 3]
-    image = [[[1, 2, 3], [4 ,5 ,6]], [[7, 8, 9], [10, 11, 12]]]
-    # Assume we randomly select the factor to be 0.1, then it will apply
-    # 0.1 * 255 to all the channel
-    output = random_bright(image, training=True)
-    # output will be int64 with 25.5 added to each channel and round down.
-    tf.Tensor([[[26.5, 27.5, 28.5]
-                [29.5, 30.5, 31.5]]
-               [[32.5, 33.5, 34.5]
-                [35.5, 36.5, 37.5]]],
-              shape=(2, 2, 3), dtype=int64)
-    ```
-    """
-    _FACTOR_VALIDATION_ERROR = (
-        "The `factor` argument should be a number (or a list of two numbers) "
-        "in the range [-1.0, 1.0]. "
-    )
-    _VALUE_RANGE_VALIDATION_ERROR = (
-        "The `value_range` argument should be a list of two numbers. "
-    )
-    def __init__(self, factor, value_range=(0, 255), seed=None, **kwargs):
-        base_preprocessing_layer.keras_kpl_gauge.get_cell("RandomBrightness").set(True)
-        super().__init__(seed=seed, force_generator=True, **kwargs)
-        self._set_factor(factor)
-        self._set_value_range(value_range)
-        self._seed = seed
-    def augment_image(self, image, transformation=None):
-        return self._brightness_adjust(image, transformation["rgb_delta"])
-    def augment_label(self, label, transformation=None):
-        return label
-    def get_random_transformation(self, image=None, label=None, bounding_box=None):
-        rgb_delta_shape = (1, 1, 1)
-        random_rgb_delta = self._random_generator.random_uniform(
-            shape=rgb_delta_shape,
-            minval=self._factor[0],
-            maxval=self._factor[1],
-        )
-        random_rgb_delta = random_rgb_delta * (
-            self._value_range[1] - self._value_range[0]
-        )
-        return {"rgb_delta": random_rgb_delta}
-    def _set_value_range(self, value_range):
-        if not isinstance(value_range, (tuple, list)):
-            raise ValueError(self._VALUE_RANGE_VALIDATION_ERROR + f"Got {value_range}")
-        if len(value_range) != 2:
-            raise ValueError(self._VALUE_RANGE_VALIDATION_ERROR + f"Got {value_range}")
-        self._value_range = sorted(value_range)
-    def _set_factor(self, factor):
-        if isinstance(factor, (tuple, list)):
-            if len(factor) != 2:
-                raise ValueError(self._FACTOR_VALIDATION_ERROR + f"Got {factor}")
-            self._check_factor_range(factor[0])
-            self._check_factor_range(factor[1])
-            self._factor = sorted(factor)
-        elif isinstance(factor, (int, float)):
-            self._check_factor_range(factor)
-            factor = abs(factor)
-            self._factor = [-factor, factor]
-        else:
-            raise ValueError(self._FACTOR_VALIDATION_ERROR + f"Got {factor}")
-    def _check_factor_range(self, input_number):
-        if input_number > 1.0 or input_number < -1.0:
-            raise ValueError(self._FACTOR_VALIDATION_ERROR + f"Got {input_number}")
-    def _brightness_adjust(self, image, rgb_delta):
-        image = utils.ensure_tensor(image, self.compute_dtype)
-        rank = image.shape.rank
-        if rank != 3:
-            raise ValueError(
-                "Expected the input image to be rank 3. Got "
-                f"inputs.shape = {image.shape}"
-            )
-        rgb_delta = tf.cast(rgb_delta, image.dtype)
-        image += rgb_delta
-        return tf.clip_by_value(image, self._value_range[0], self._value_range[1])
-    def get_config(self):
-        config = {
-            "factor": self._factor,
-            "value_range": self._value_range,
-            "seed": self._seed,
-        }
-        base_config = super().get_config()
-        return dict(list(base_config.items()) + list(config.items()))

sdh_embedding_umap.ipynb CHANGED Viewed

@@ -32,28 +32,25 @@
    ],
    "source": [
     "# List all file in data directory with Pathlib\n",
-    "from pathlib import Path\n",
     "import numpy as np\n",
-    "import pandas as pd\n",
-    "import os\n",
-    "import tensorflow as tf\n",
-    "import glob\n",
     "from tensorflow.keras.preprocessing import image\n",
     "from tensorflow.keras.utils import img_to_array\n",
     "\n",
-    "LABELS_DICT = {\"control\":0, \"sick\":1}\n",
     "SUB_FOLDERS = [\"control\", \"sick\"]\n",
     "\n",
     "def get_mouse_model(file_path):\n",
-    "  file_name = file_path.split(\"/\")[-1]\n",
-    "  if \"wt\" in file_name.lower():\n",
-    "    return \"wt\"\n",
-    "  elif \"bin1\" in file_name.lower():\n",
-    "    return \"bin1\"\n",
-    "  elif \"dnm2\" in file_name.lower():\n",
-    "    return \"dnm2\"\n",
-    "  else:\n",
-    "    return \"unknown\"\n",
     "\n",
     "def dirty_filter_file_names(file_name):\n",
     "    file_name = file_name.split(\"/\")[-1]\n",
@@ -64,10 +61,11 @@
     "    file_name = '_'.join(file_name.split(\"_\")[1:])\n",
     "    return file_name\n",
     "\n",
     "def generate_dataset(folder, sub_folders=[\"control\", \"inter\", \"sick\"]):\n",
     "    n_elem = 0\n",
     "    for sub_folder in sub_folders:\n",
-    "      n_elem += len(glob.glob(os.path.join(folder, sub_folder, \"*.tif\")))\n",
     "    images_array = np.empty(shape=(n_elem, 256, 256, 3), dtype=np.uint8)\n",
     "    path_array = []\n",
     "    mouse_model = []\n",
@@ -75,19 +73,21 @@
     "    labels_array = np.empty(shape=n_elem, dtype=np.uint8)\n",
     "    counter = 0\n",
     "    for index, sub_folder in enumerate(sub_folders):\n",
-    "      path_files = os.path.join(folder, sub_folder, \"*.tif\")\n",
-    "      for img in glob.glob(path_files):\n",
-    "          im = img_to_array(image.load_img(img))\n",
-    "          # im_resized = image.smart_resize(im, (256, 256)) \n",
-    "          path_array.append(img)\n",
-    "          mouse_model.append(get_mouse_model(img))\n",
-    "          mouse_model_full.append(dirty_filter_file_names(img))\n",
-    "          images_array[counter] = tf.image.resize(im, (256,256))\n",
-    "          labels_array[counter] = index\n",
-    "          counter += 1\n",
     "    return images_array, path_array, labels_array, mouse_model, mouse_model_full\n",
     "\n",
-    "img_data, all_files, labels, mouse_model, mouse_model_full = generate_dataset(\"data/all_images\", sub_folders=SUB_FOLDERS)\n",
     "file_dict = dict(\n",
     "    file=all_files,\n",
     "    label=labels,\n",
@@ -239,11 +239,13 @@
     "# Load image embetter \n",
     "import tensorflow as tf\n",
     "import os\n",
-    "MODEL_NAME = \"data/model.h5\"\n",
     "model = tf.keras.models.load_model(MODEL_NAME)\n",
     "emb_model = tf.keras.models.Sequential()\n",
     "emb_model.add(model.get_layer('sequential'))\n",
-    "emb_model.add(tf.keras.models.Model(inputs=model.get_layer('resnet50v2').input, outputs=model.get_layer('resnet50v2').get_layer('avg_pool').output))\n",
     "embeddings = emb_model.predict(file_dict[\"image\"])\n",
     "with open('data/results/embedding/image_embedding_custom.npy', 'wb') as f:\n",
     "    np.save(f, embeddings)"
@@ -268,6 +270,7 @@
    "source": [
     "from umap import UMAP\n",
     "import numpy as np\n",
     "embeddings = np.load(open('data/results/embedding/image_embedding_custom.npy', 'rb'))\n",
     "embedding_umap = UMAP().fit_transform(embeddings)\n",
     "embedding_umap.shape"
@@ -291,27 +294,27 @@
    ],
    "source": [
     "import matplotlib.pyplot as plt\n",
-    "from matplotlib.patches import Rectangle\n",
     "cdict = {0: 'green', 1: 'red'}\n",
     "\n",
     "# Get the index of row from embedding_umap where the first column is greater than 0 and the second column is greater than 0\n",
     "\n",
-    "x_condition2 = ((embedding_umap[:,0] > 0) & (embedding_umap[:,0] < 1))\n",
-    "y_condition2 = ((embedding_umap[:,1] > 4.75) & (embedding_umap[:,1] < 5.5))\n",
     "idx_left_cluster = np.where((x_condition2 & y_condition2))[0]\n",
     "\n",
-    "x_condition = ((embedding_umap[:,0] > 4.75) & (embedding_umap[:,0] < 5.75))\n",
-    "y_condition = ((embedding_umap[:,1] > 4.5) & (embedding_umap[:,1] < 5.25))\n",
     "idx_right_cluster = np.where((x_condition & y_condition))[0]\n",
     "\n",
-    "fig, ax = plt.subplots(1, 1, figsize=(10,10))\n",
     "for g in np.unique(labels):\n",
     "    ix = np.where(labels == g)\n",
-    "    ax.scatter(embedding_umap[:, 0][ix], \n",
-    "                embedding_umap[:, 1][ix], \n",
-    "                s=0.3, \n",
-    "                c=cdict[g],\n",
-    "                label=g)\n",
     "ax.legend()\n",
     "plt.show()"
    ]
@@ -333,21 +336,21 @@
     }
    ],
    "source": [
-    "cdict_model_full = {'BIN1_KO_AAV_EMPTY_TAG':\"red\",\n",
-    " 'BIN1_KO_AAV_MTM1_TAD':\"blue\",\n",
-    " 'BIN1_WT_AAV_EMPTY_TAG':\"green\",\n",
-    " 'SDH_TAM_Bin1cKO_ko_pla':\"orange\",\n",
-    " 'SDH_TAM_Dnm2S619L_sl_pla':\"brown\",\n",
-    " 'SDH_TAM_Dnm2S619L_sl_tam':\"black\"}\n",
-    "fig, ax = plt.subplots(1, 1, figsize=(10,10))\n",
     "for g in np.unique(mouse_model_full):\n",
     "    numpy_mouse_model_full = np.array(mouse_model_full)\n",
     "    ix = np.where(numpy_mouse_model_full == g)\n",
-    "    ax.scatter(embedding_umap[:, 0][ix], \n",
-    "                embedding_umap[:, 1][ix], \n",
-    "                s=0.3, \n",
-    "                c=cdict_model_full[g],\n",
-    "                label=g)\n",
     "ax.legend()\n",
     "plt.show()"
    ]
@@ -370,15 +373,15 @@
    ],
    "source": [
     "cdict_model = {\"wt\": 'green', \"bin1\": 'red', \"dnm2\": \"blue\", \"unknown\": \"black\"}\n",
-    "fig, ax = plt.subplots(1, 1, figsize=(10,10))\n",
     "for g in np.unique(mouse_model):\n",
     "    numpy_mouse_model = np.array(mouse_model)\n",
     "    ix = np.where(numpy_mouse_model == g)\n",
-    "    ax.scatter(embedding_umap[:, 0][ix], \n",
-    "                embedding_umap[:, 1][ix], \n",
-    "                s=0.3, \n",
-    "                c=cdict_model[g],\n",
-    "                label=g)\n",
     "ax.legend()\n",
     "plt.show()"
    ]
@@ -403,8 +406,8 @@
    ],
    "source": [
     "from doubtlab.ensemble import DoubtEnsemble\n",
-    "from doubtlab.reason import ProbaReason, LongConfidenceReason , WrongPredictionReason, CleanlabReason\n",
-    "import pandas as pd \n",
     "\n",
     "# Let's precalculate the proba values.\n",
     "probas = model.predict(img_data)\n",
@@ -421,17 +424,19 @@
     "# This dataframe now contains the predicates\n",
     "doubtlab_df = pd.DataFrame(predicate_dict)\n",
     "# Create a new column with 1 if the previous predicates are true else 0\n",
-    "doubtlab_df['doubt'] = doubtlab_df[[\"proba\",\"long\",\"wrong\",\"cleanlab\"]].ne(0).any(axis=1)\n",
     "wrong_strong_idx = doubtlab_df[doubtlab_df[\"long\"] == 1.0].index\n",
     "wrong_idx = doubtlab_df[doubtlab_df[\"wrong\"] == 1.0].index\n",
     "low_idx = doubtlab_df[doubtlab_df[\"proba\"] == 1.0].index\n",
     "cleanlab_idx = doubtlab_df[doubtlab_df[\"cleanlab\"] == 1.0].index\n",
     "doubt_idx = doubtlab_df[doubtlab_df[\"doubt\"] == True].index\n",
-    "print(f\"Total Number of images with wrong strong classficiation: {len(wrong_strong_idx)} ({round(len(wrong_strong_idx)/len(doubtlab_df)*100, 2)}%)\")\n",
-    "print(f\"Total Number of images with wrong: {len(wrong_idx)} ({round(len(wrong_idx)/len(doubtlab_df)*100, 2)}%)\")\n",
-    "print(f\"Total Number of images with low probas: {len(low_idx)} ({round(len(low_idx)/len(doubtlab_df)*100, 2)}%)\")\n",
-    "print(f\"Total Number of images with cleanlab: {len(cleanlab_idx)} ({round(len(cleanlab_idx)/len(doubtlab_df)*100, 2)}%)\")\n",
-    "print(f\"Total Number of doubts images: {len(doubt_idx)} ({round(len(doubt_idx)/len(doubtlab_df)*100, 2)}%)\")"
    ]
   },
   {
@@ -459,12 +464,13 @@
    ],
    "source": [
     "wrong_strong_idx = doubtlab_df[doubtlab_df[\"long\"] == 1.0].index\n",
-    "print(f\"Total Number of images with wrong strong classficiation: {len(wrong_strong_idx)} ({round(len(wrong_strong_idx)/len(doubtlab_df)*100, 2)}%)\")\n",
     "SUB_FOLDERS = [\"control\", \"sick\"]\n",
     "counter = 0\n",
-    "plt.figure(figsize=(10,10))\n",
     "for idx in np.random.choice(wrong_strong_idx, 25, replace=False):\n",
-    "    plt.subplot(5,5,counter+1)\n",
     "    plt.xticks([])\n",
     "    plt.yticks([])\n",
     "    plt.grid(False)\n",
@@ -473,9 +479,9 @@
     "    plt.imshow(im)\n",
     "\n",
     "    predict_proba = max(probas[idx])\n",
-    "    predicted_class = np.argmax(probas[idx])    \n",
     "    plt.xlabel(f\"{SUB_FOLDERS[label]} ({SUB_FOLDERS[predicted_class]} {predict_proba:.2f})\")\n",
-    "    counter +=1\n",
     "plt.show()\n",
     "\n"
    ]
@@ -487,12 +493,12 @@
    "outputs": [],
    "source": [
     "wrong_idx = doubtlab_df[doubtlab_df[\"wrong\"] == 1.0].index\n",
-    "print(f\"Total Number of images with wrong: {len(wrong_idx)} ({round(len(wrong_idx)/len(doubtlab_df)*100, 2)}%)\")\n",
     "SUB_FOLDERS = [\"control\", \"sick\"]\n",
     "counter = 0\n",
-    "plt.figure(figsize=(10,10))\n",
     "for idx in np.random.choice(wrong_idx, 25, replace=False):\n",
-    "    plt.subplot(5,5,counter+1)\n",
     "    plt.xticks([])\n",
     "    plt.yticks([])\n",
     "    plt.grid(False)\n",
@@ -501,9 +507,9 @@
     "    plt.imshow(im)\n",
     "\n",
     "    predict_proba = max(probas[idx])\n",
-    "    predicted_class = np.argmax(probas[idx])    \n",
     "    plt.xlabel(f\"{SUB_FOLDERS[label]} ({SUB_FOLDERS[predicted_class]} {predict_proba:.2f})\")\n",
-    "    counter +=1\n",
     "plt.show()\n",
     "\n"
    ]
@@ -515,12 +521,12 @@
    "outputs": [],
    "source": [
     "low_idx = doubtlab_df[doubtlab_df[\"proba\"] == 1.0].index\n",
-    "print(f\"Total Number of images with low probas: {len(low_idx)} ({round(len(low_idx)/len(doubtlab_df)*100, 2)}%)\")\n",
     "SUB_FOLDERS = [\"control\", \"sick\"]\n",
     "counter = 0\n",
-    "plt.figure(figsize=(10,10))\n",
     "for idx in np.random.choice(low_idx, 25, replace=False):\n",
-    "    plt.subplot(5,5,counter+1)\n",
     "    plt.xticks([])\n",
     "    plt.yticks([])\n",
     "    plt.grid(False)\n",
@@ -529,9 +535,9 @@
     "    plt.imshow(im)\n",
     "\n",
     "    predict_proba = max(probas[idx])\n",
-    "    predicted_class = np.argmax(probas[idx])    \n",
     "    plt.xlabel(f\"{SUB_FOLDERS[label]} ({SUB_FOLDERS[predicted_class]} {predict_proba:.2f})\")\n",
-    "    counter +=1\n",
     "plt.show()\n",
     "\n"
    ]
@@ -543,12 +549,13 @@
    "outputs": [],
    "source": [
     "cleanlab_idx = doubtlab_df[doubtlab_df[\"cleanlab\"] == 1.0].index\n",
-    "print(f\"Total Number of images with cleanlab: {len(cleanlab_idx)} ({round(len(cleanlab_idx)/len(doubtlab_df)*100, 2)}%)\")\n",
     "SUB_FOLDERS = [\"control\", \"sick\"]\n",
     "counter = 0\n",
-    "plt.figure(figsize=(10,10))\n",
     "for idx in np.random.choice(cleanlab_idx, 25, replace=False):\n",
-    "    plt.subplot(5,5,counter+1)\n",
     "    plt.xticks([])\n",
     "    plt.yticks([])\n",
     "    plt.grid(False)\n",
@@ -557,9 +564,9 @@
     "    plt.imshow(im)\n",
     "\n",
     "    predict_proba = max(probas[idx])\n",
-    "    predicted_class = np.argmax(probas[idx])    \n",
     "    plt.xlabel(f\"{SUB_FOLDERS[label]} ({SUB_FOLDERS[predicted_class]} {predict_proba:.2f})\")\n",
-    "    counter +=1\n",
     "plt.show()\n",
     "\n"
    ]
@@ -573,11 +580,12 @@
     "from pigeon import annotate\n",
     "from PIL import Image\n",
     "from IPython.display import display\n",
-    "re_annotated_img_files = [ all_files[i] for i in cleanlab_idx ]\n",
     "annotations = annotate(\n",
-    "  re_annotated_img_files,\n",
-    "  options=[\"control\",\"sick\",\"unsure\"],\n",
-    "  display_fn=lambda filename: display(Image.open(filename, 'r'))\n",
     ")"
    ]
   },

    ],
    "source": [
     "# List all file in data directory with Pathlib\n",
     "import numpy as np\n",
     "from tensorflow.keras.preprocessing import image\n",
     "from tensorflow.keras.utils import img_to_array\n",
     "\n",
+    "LABELS_DICT = {\"control\": 0, \"sick\": 1}\n",
     "SUB_FOLDERS = [\"control\", \"sick\"]\n",
     "\n",
+    "\n",
     "def get_mouse_model(file_path):\n",
+    "    file_name = file_path.split(\"/\")[-1]\n",
+    "    if \"wt\" in file_name.lower():\n",
+    "        return \"wt\"\n",
+    "    elif \"bin1\" in file_name.lower():\n",
+    "        return \"bin1\"\n",
+    "    elif \"dnm2\" in file_name.lower():\n",
+    "        return \"dnm2\"\n",
+    "    else:\n",
+    "        return \"unknown\"\n",
+    "\n",
     "\n",
     "def dirty_filter_file_names(file_name):\n",
     "    file_name = file_name.split(\"/\")[-1]\n",
     "    file_name = '_'.join(file_name.split(\"_\")[1:])\n",
     "    return file_name\n",
     "\n",
+    "\n",
     "def generate_dataset(folder, sub_folders=[\"control\", \"inter\", \"sick\"]):\n",
     "    n_elem = 0\n",
     "    for sub_folder in sub_folders:\n",
+    "        n_elem += len(glob.glob(os.path.join(folder, sub_folder, \"*.tif\")))\n",
     "    images_array = np.empty(shape=(n_elem, 256, 256, 3), dtype=np.uint8)\n",
     "    path_array = []\n",
     "    mouse_model = []\n",
     "    labels_array = np.empty(shape=n_elem, dtype=np.uint8)\n",
     "    counter = 0\n",
     "    for index, sub_folder in enumerate(sub_folders):\n",
+    "        path_files = os.path.join(folder, sub_folder, \"*.tif\")\n",
+    "        for img in glob.glob(path_files):\n",
+    "            im = img_to_array(image.load_img(img))\n",
+    "            # im_resized = image.smart_resize(im, (256, 256))\n",
+    "            path_array.append(img)\n",
+    "            mouse_model.append(get_mouse_model(img))\n",
+    "            mouse_model_full.append(dirty_filter_file_names(img))\n",
+    "            images_array[counter] = tf.image.resize(im, (256, 256))\n",
+    "            labels_array[counter] = index\n",
+    "            counter += 1\n",
     "    return images_array, path_array, labels_array, mouse_model, mouse_model_full\n",
     "\n",
+    "\n",
+    "img_data, all_files, labels, mouse_model, mouse_model_full = generate_dataset(\"data/all_images\",\n",
+    "                                                                              sub_folders=SUB_FOLDERS)\n",
     "file_dict = dict(\n",
     "    file=all_files,\n",
     "    label=labels,\n",
     "# Load image embetter \n",
     "import tensorflow as tf\n",
     "import os\n",
+    "\n",
+    "MODEL_NAME = \"data/model.keras\"\n",
     "model = tf.keras.models.load_model(MODEL_NAME)\n",
     "emb_model = tf.keras.models.Sequential()\n",
     "emb_model.add(model.get_layer('sequential'))\n",
+    "emb_model.add(tf.keras.models.Model(inputs=model.get_layer('resnet50v2').input,\n",
+    "                                    outputs=model.get_layer('resnet50v2').get_layer('avg_pool').output))\n",
     "embeddings = emb_model.predict(file_dict[\"image\"])\n",
     "with open('data/results/embedding/image_embedding_custom.npy', 'wb') as f:\n",
     "    np.save(f, embeddings)"
    "source": [
     "from umap import UMAP\n",
     "import numpy as np\n",
+    "\n",
     "embeddings = np.load(open('data/results/embedding/image_embedding_custom.npy', 'rb'))\n",
     "embedding_umap = UMAP().fit_transform(embeddings)\n",
     "embedding_umap.shape"
    ],
    "source": [
     "import matplotlib.pyplot as plt\n",
+    "\n",
     "cdict = {0: 'green', 1: 'red'}\n",
     "\n",
     "# Get the index of row from embedding_umap where the first column is greater than 0 and the second column is greater than 0\n",
     "\n",
+    "x_condition2 = ((embedding_umap[:, 0] > 0) & (embedding_umap[:, 0] < 1))\n",
+    "y_condition2 = ((embedding_umap[:, 1] > 4.75) & (embedding_umap[:, 1] < 5.5))\n",
     "idx_left_cluster = np.where((x_condition2 & y_condition2))[0]\n",
     "\n",
+    "x_condition = ((embedding_umap[:, 0] > 4.75) & (embedding_umap[:, 0] < 5.75))\n",
+    "y_condition = ((embedding_umap[:, 1] > 4.5) & (embedding_umap[:, 1] < 5.25))\n",
     "idx_right_cluster = np.where((x_condition & y_condition))[0]\n",
     "\n",
+    "fig, ax = plt.subplots(1, 1, figsize=(10, 10))\n",
     "for g in np.unique(labels):\n",
     "    ix = np.where(labels == g)\n",
+    "    ax.scatter(embedding_umap[:, 0][ix],\n",
+    "               embedding_umap[:, 1][ix],\n",
+    "               s=0.3,\n",
+    "               c=cdict[g],\n",
+    "               label=g)\n",
     "ax.legend()\n",
     "plt.show()"
    ]
     }
    ],
    "source": [
+    "cdict_model_full = {'BIN1_KO_AAV_EMPTY_TAG': \"red\",\n",
+    "                    'BIN1_KO_AAV_MTM1_TAD': \"blue\",\n",
+    "                    'BIN1_WT_AAV_EMPTY_TAG': \"green\",\n",
+    "                    'SDH_TAM_Bin1cKO_ko_pla': \"orange\",\n",
+    "                    'SDH_TAM_Dnm2S619L_sl_pla': \"brown\",\n",
+    "                    'SDH_TAM_Dnm2S619L_sl_tam': \"black\"}\n",
+    "fig, ax = plt.subplots(1, 1, figsize=(10, 10))\n",
     "for g in np.unique(mouse_model_full):\n",
     "    numpy_mouse_model_full = np.array(mouse_model_full)\n",
     "    ix = np.where(numpy_mouse_model_full == g)\n",
+    "    ax.scatter(embedding_umap[:, 0][ix],\n",
+    "               embedding_umap[:, 1][ix],\n",
+    "               s=0.3,\n",
+    "               c=cdict_model_full[g],\n",
+    "               label=g)\n",
     "ax.legend()\n",
     "plt.show()"
    ]
    ],
    "source": [
     "cdict_model = {\"wt\": 'green', \"bin1\": 'red', \"dnm2\": \"blue\", \"unknown\": \"black\"}\n",
+    "fig, ax = plt.subplots(1, 1, figsize=(10, 10))\n",
     "for g in np.unique(mouse_model):\n",
     "    numpy_mouse_model = np.array(mouse_model)\n",
     "    ix = np.where(numpy_mouse_model == g)\n",
+    "    ax.scatter(embedding_umap[:, 0][ix],\n",
+    "               embedding_umap[:, 1][ix],\n",
+    "               s=0.3,\n",
+    "               c=cdict_model[g],\n",
+    "               label=g)\n",
     "ax.legend()\n",
     "plt.show()"
    ]
    ],
    "source": [
     "from doubtlab.ensemble import DoubtEnsemble\n",
+    "from doubtlab.reason import ProbaReason, LongConfidenceReason, WrongPredictionReason, CleanlabReason\n",
+    "import pandas as pd\n",
     "\n",
     "# Let's precalculate the proba values.\n",
     "probas = model.predict(img_data)\n",
     "# This dataframe now contains the predicates\n",
     "doubtlab_df = pd.DataFrame(predicate_dict)\n",
     "# Create a new column with 1 if the previous predicates are true else 0\n",
+    "doubtlab_df['doubt'] = doubtlab_df[[\"proba\", \"long\", \"wrong\", \"cleanlab\"]].ne(0).any(axis=1)\n",
     "wrong_strong_idx = doubtlab_df[doubtlab_df[\"long\"] == 1.0].index\n",
     "wrong_idx = doubtlab_df[doubtlab_df[\"wrong\"] == 1.0].index\n",
     "low_idx = doubtlab_df[doubtlab_df[\"proba\"] == 1.0].index\n",
     "cleanlab_idx = doubtlab_df[doubtlab_df[\"cleanlab\"] == 1.0].index\n",
     "doubt_idx = doubtlab_df[doubtlab_df[\"doubt\"] == True].index\n",
+    "print(\n",
+    "    f\"Total Number of images with wrong strong classficiation: {len(wrong_strong_idx)} ({round(len(wrong_strong_idx) / len(doubtlab_df) * 100, 2)}%)\")\n",
+    "print(f\"Total Number of images with wrong: {len(wrong_idx)} ({round(len(wrong_idx) / len(doubtlab_df) * 100, 2)}%)\")\n",
+    "print(f\"Total Number of images with low probas: {len(low_idx)} ({round(len(low_idx) / len(doubtlab_df) * 100, 2)}%)\")\n",
+    "print(\n",
+    "    f\"Total Number of images with cleanlab: {len(cleanlab_idx)} ({round(len(cleanlab_idx) / len(doubtlab_df) * 100, 2)}%)\")\n",
+    "print(f\"Total Number of doubts images: {len(doubt_idx)} ({round(len(doubt_idx) / len(doubtlab_df) * 100, 2)}%)\")"
    ]
   },
   {
    ],
    "source": [
     "wrong_strong_idx = doubtlab_df[doubtlab_df[\"long\"] == 1.0].index\n",
+    "print(\n",
+    "    f\"Total Number of images with wrong strong classficiation: {len(wrong_strong_idx)} ({round(len(wrong_strong_idx) / len(doubtlab_df) * 100, 2)}%)\")\n",
     "SUB_FOLDERS = [\"control\", \"sick\"]\n",
     "counter = 0\n",
+    "plt.figure(figsize=(10, 10))\n",
     "for idx in np.random.choice(wrong_strong_idx, 25, replace=False):\n",
+    "    plt.subplot(5, 5, counter + 1)\n",
     "    plt.xticks([])\n",
     "    plt.yticks([])\n",
     "    plt.grid(False)\n",
     "    plt.imshow(im)\n",
     "\n",
     "    predict_proba = max(probas[idx])\n",
+    "    predicted_class = np.argmax(probas[idx])\n",
     "    plt.xlabel(f\"{SUB_FOLDERS[label]} ({SUB_FOLDERS[predicted_class]} {predict_proba:.2f})\")\n",
+    "    counter += 1\n",
     "plt.show()\n",
     "\n"
    ]
    "outputs": [],
    "source": [
     "wrong_idx = doubtlab_df[doubtlab_df[\"wrong\"] == 1.0].index\n",
+    "print(f\"Total Number of images with wrong: {len(wrong_idx)} ({round(len(wrong_idx) / len(doubtlab_df) * 100, 2)}%)\")\n",
     "SUB_FOLDERS = [\"control\", \"sick\"]\n",
     "counter = 0\n",
+    "plt.figure(figsize=(10, 10))\n",
     "for idx in np.random.choice(wrong_idx, 25, replace=False):\n",
+    "    plt.subplot(5, 5, counter + 1)\n",
     "    plt.xticks([])\n",
     "    plt.yticks([])\n",
     "    plt.grid(False)\n",
     "    plt.imshow(im)\n",
     "\n",
     "    predict_proba = max(probas[idx])\n",
+    "    predicted_class = np.argmax(probas[idx])\n",
     "    plt.xlabel(f\"{SUB_FOLDERS[label]} ({SUB_FOLDERS[predicted_class]} {predict_proba:.2f})\")\n",
+    "    counter += 1\n",
     "plt.show()\n",
     "\n"
    ]
    "outputs": [],
    "source": [
     "low_idx = doubtlab_df[doubtlab_df[\"proba\"] == 1.0].index\n",
+    "print(f\"Total Number of images with low probas: {len(low_idx)} ({round(len(low_idx) / len(doubtlab_df) * 100, 2)}%)\")\n",
     "SUB_FOLDERS = [\"control\", \"sick\"]\n",
     "counter = 0\n",
+    "plt.figure(figsize=(10, 10))\n",
     "for idx in np.random.choice(low_idx, 25, replace=False):\n",
+    "    plt.subplot(5, 5, counter + 1)\n",
     "    plt.xticks([])\n",
     "    plt.yticks([])\n",
     "    plt.grid(False)\n",
     "    plt.imshow(im)\n",
     "\n",
     "    predict_proba = max(probas[idx])\n",
+    "    predicted_class = np.argmax(probas[idx])\n",
     "    plt.xlabel(f\"{SUB_FOLDERS[label]} ({SUB_FOLDERS[predicted_class]} {predict_proba:.2f})\")\n",
+    "    counter += 1\n",
     "plt.show()\n",
     "\n"
    ]
    "outputs": [],
    "source": [
     "cleanlab_idx = doubtlab_df[doubtlab_df[\"cleanlab\"] == 1.0].index\n",
+    "print(\n",
+    "    f\"Total Number of images with cleanlab: {len(cleanlab_idx)} ({round(len(cleanlab_idx) / len(doubtlab_df) * 100, 2)}%)\")\n",
     "SUB_FOLDERS = [\"control\", \"sick\"]\n",
     "counter = 0\n",
+    "plt.figure(figsize=(10, 10))\n",
     "for idx in np.random.choice(cleanlab_idx, 25, replace=False):\n",
+    "    plt.subplot(5, 5, counter + 1)\n",
     "    plt.xticks([])\n",
     "    plt.yticks([])\n",
     "    plt.grid(False)\n",
     "    plt.imshow(im)\n",
     "\n",
     "    predict_proba = max(probas[idx])\n",
+    "    predicted_class = np.argmax(probas[idx])\n",
     "    plt.xlabel(f\"{SUB_FOLDERS[label]} ({SUB_FOLDERS[predicted_class]} {predict_proba:.2f})\")\n",
+    "    counter += 1\n",
     "plt.show()\n",
     "\n"
    ]
     "from pigeon import annotate\n",
     "from PIL import Image\n",
     "from IPython.display import display\n",
+    "\n",
+    "re_annotated_img_files = [all_files[i] for i in cleanlab_idx]\n",
     "annotations = annotate(\n",
+    "    re_annotated_img_files,\n",
+    "    options=[\"control\", \"sick\", \"unsure\"],\n",
+    "    display_fn=lambda filename: display(Image.open(filename, 'r'))\n",
     ")"
    ]
   },

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff