diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000000000000000000000000000000000000..f3d5c415e090be510b646e88a7a523cdbab93d14 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000000000000000000000000000000000000..11fc491ef1dae316f2b06bbb40eaba9c757fdfd1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..14376956ab93b51250e4dafe431a86c1fd2ab85e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "expman"] + path = expman + url = https://github.com/fabiocarrara/expman +[submodule "models/deeplab"] + path = models/deeplab + url = https://github.com/david8862/tf-keras-deeplabv3p-model-set diff --git a/convert_model.py b/convert_model.py new file mode 100644 index 0000000000000000000000000000000000000000..9821918ec542d88d2d2f99ee40d9f6423c817a88 --- /dev/null +++ b/convert_model.py @@ -0,0 +1,56 @@ +import tensorflow as tf +from tensorflow.keras import backend as K +from adabelief_tf import AdaBeliefOptimizer + +def iou_coef(y_true, y_pred): + y_true = tf.cast(y_true, tf.float32) + y_pred = tf.cast(y_pred, tf.float32) + intersection = K.sum(K.abs(y_true * y_pred), axis=[1, 2, 3]) + union = K.sum(y_true, axis=[1, 2, 3]) + K.sum(y_pred, axis=[1, 2, 3]) - intersection + return K.mean((intersection + 1e-6) / (union + 1e-6)) + +def dice_coef(y_true, y_pred): + y_true = tf.cast(y_true, tf.float32) + y_pred = tf.cast(y_pred, tf.float32) + intersection = K.sum(K.abs(y_true * y_pred), axis=[1, 2, 3]) + return K.mean((2. * intersection + 1e-6) / (K.sum(y_true, axis=[1, 2, 3]) + K.sum(y_pred, axis=[1, 2, 3]) + 1e-6)) + +def boundary_loss(y_true, y_pred): + y_true = tf.cast(y_true, tf.float32) + y_pred = tf.cast(y_pred, tf.float32) + dy_true, dx_true = tf.image.image_gradients(y_true) + dy_pred, dx_pred = tf.image.image_gradients(y_pred) + loss = tf.reduce_mean(tf.abs(dy_pred - dy_true) + tf.abs(dx_pred - dx_true)) + return loss * 0.5 + +def enhanced_binary_crossentropy(y_true, y_pred): + y_true = tf.cast(y_true, tf.float32) + y_pred = tf.cast(y_pred, tf.float32) + bce = tf.keras.losses.binary_crossentropy(y_true, y_pred) + boundary = boundary_loss(y_true, y_pred) + return bce + boundary + +def hard_swish(x): + return x * tf.nn.relu6(x + 3) * (1. / 6.) + +# Path to your current .keras model +keras_path = 'runs/b32_c-conv_d-|root|meye|data|NN_human_mouse_eyes|_g1.5_l0.001_num_c1_num_f16_num_s5_r128_se23_sp-random_up-relu_us0/best_model.keras' + +# Load the model with custom objects +custom_objects = { + 'AdaBeliefOptimizer': AdaBeliefOptimizer, + 'iou_coef': iou_coef, + 'dice_coef': dice_coef, + 'hard_swish': hard_swish, + 'enhanced_binary_crossentropy': enhanced_binary_crossentropy, + 'boundary_loss': boundary_loss +} + +print("Loading model from:", keras_path) +model = tf.keras.models.load_model(keras_path, custom_objects=custom_objects) + +# Save as .h5 +h5_path = keras_path.replace('.keras', '.h5') +print("Saving model to:", h5_path) +model.save(h5_path, save_format='h5') +print("Conversion complete!") \ No newline at end of file diff --git a/expman/expman/__init__.py b/expman/expman/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ce8241917216d557fa15f4e5b611438ce7789a86 --- /dev/null +++ b/expman/expman/__init__.py @@ -0,0 +1,7 @@ +from .experiment import Experiment, exp_filter, use_hash_naming +from .exp_group import ExpGroup + +abbreviate = Experiment.abbreviate +from_dir = Experiment.from_dir +gather = ExpGroup.gather +is_exp_dir = Experiment.is_exp_dir diff --git a/expman/expman/__main__.py b/expman/expman/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..14d3e67ca03149c455362e6d459abc626be9e7c5 --- /dev/null +++ b/expman/expman/__main__.py @@ -0,0 +1,58 @@ +import argparse + +from .exp_group import ExpGroup + + +def add_param(args): + exps = ExpGroup.gather(args.run) + for exp in exps: + exp.add_parameter(args.param, args.value) + + +def mv_param(args): + exps = ExpGroup.gather(args.run) + for exp in exps: + exp.rename_parameter(args.param, args.new_param) + + +def rm_param(args): + exps = ExpGroup.gather(args.run) + for exp in exps: + exp.remove_parameter(args.param) + + +def command_line(): + def guess(value): + """ try to guess a python type for the passed string parameter """ + try: + result = eval(value) + except (NameError, ValueError): + result = value + return result + + parser = argparse.ArgumentParser(description='Experiment Manager Utilities') + subparsers = parser.add_subparsers(dest='command') + subparsers.required = True + + parser_add = subparsers.add_parser('add-param') + parser_add.add_argument('run', default='runs/') + parser_add.add_argument('param', help='new param name') + parser_add.add_argument('value', type=guess, help='new param value') + parser_add.set_defaults(func=add_param) + + parser_rm = subparsers.add_parser('rm-param') + parser_rm.add_argument('run', default='runs/') + parser_rm.add_argument('param', help='param to remove') + parser_rm.set_defaults(func=rm_param) + + parser_mv = subparsers.add_parser('mv-param') + parser_mv.add_argument('run', default='runs/') + parser_mv.add_argument('param', help='param to rename') + parser_mv.add_argument('new_param', help='new param name') + parser_mv.set_defaults(func=mv_param) + + args = parser.parse_args() + args.func(args) + + +command_line() diff --git a/expman/expman/__pycache__/__init__.cpython-311.pyc b/expman/expman/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13539884ea530426a316c8758356c6588d6b3a96 Binary files /dev/null and b/expman/expman/__pycache__/__init__.cpython-311.pyc differ diff --git a/expman/expman/__pycache__/exp_group.cpython-311.pyc b/expman/expman/__pycache__/exp_group.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0d3194514d5104250f497afeae8c805dd3565319 Binary files /dev/null and b/expman/expman/__pycache__/exp_group.cpython-311.pyc differ diff --git a/expman/expman/__pycache__/experiment.cpython-311.pyc b/expman/expman/__pycache__/experiment.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..59b9705ea7ea4250433e9250828a5ecfc16e9fc7 Binary files /dev/null and b/expman/expman/__pycache__/experiment.cpython-311.pyc differ diff --git a/expman/expman/exp_group.py b/expman/expman/exp_group.py new file mode 100644 index 0000000000000000000000000000000000000000..2ae4f089f28f6b9cb7936c867b05858c33a951b8 --- /dev/null +++ b/expman/expman/exp_group.py @@ -0,0 +1,96 @@ +import os +import pandas as pd + +from glob import glob +from .experiment import Experiment + + +class ExpGroup: + @classmethod + def gather(cls, root='runs/'): + if Experiment.is_exp_dir(root): + exps = (root,) + else: + exps = glob(os.path.join(root, '*')) + exps = filter(Experiment.is_exp_dir, exps) + + exps = map(Experiment.from_dir, exps) + exps = filter(lambda x: x.existing, exps) + exps = tuple(exps) + return cls(exps) + + def __init__(self, experiments=()): + assert isinstance(experiments, (list, tuple)), "'experiments' must be a list or tuple" + self.experiments = experiments + + @staticmethod + def _collect_one(exp_id, exp, csv=None, index_col=None): + params = exp.params.to_frame().transpose().infer_objects() # as DataFrame + params['exp_id'] = exp_id + + if csv is None: + return params + + csv_path = exp.path_to(csv) + if os.path.exists(csv_path): + stuff = pd.read_csv(csv_path, index_col=index_col) + else: # try globbing + csv_files = os.path.join(exp.path, csv) + csv_files = list(glob(csv_files)) + if len(csv_files) == 0: + return pd.DataFrame() + + stuff = map(lambda x: pd.read_csv(x, index_col=index_col, float_precision='round_trip'), csv_files) + stuff = pd.concat(stuff, ignore_index=True) + + stuff['exp_id'] = exp_id + return pd.merge(params, stuff, on='exp_id') + + def collect(self, csv=None, index_col=None, prefix=''): + results = [self._collect_one(exp_id, exp, csv=csv, index_col=index_col) for exp_id, exp in enumerate(self.experiments)] + results = pd.concat(results, ignore_index=True, sort=False) + + if len(results): + # build minimal exp_name + exp_name = '' + params = results.loc[:, :'exp_id'].drop('exp_id', axis=1) + if len(params) > 1: + varying_params = params.loc[:, params.nunique() > 1] + exp_name = varying_params.apply(Experiment.abbreviate, axis=1) + idx = results.columns.get_loc('exp_id') + 1 + results.insert(idx, 'exp_name', prefix + exp_name) + + return results + + def filter(self, filters): + if isinstance(filters, str): + filters = string.split(',') + filters = map(lambda x: x.split('='), filters) + filters = {k: v for k, v in filters} + + def __filter_exp(e): + for param, value in filters.items(): + try: + p = e.params[param] + ptype = type(p) + if p != ptype(value): + return False + except: + return False + + return True + + filtered_exps = filter(__filter_exp, self.experiments) + filtered_exps = tuple(filtered_exps) + return ExpGroup(filtered_exps) + + def items(self, short_names=True, prefix=''): + if short_names: + params = self.collect(prefix=prefix) + exp_names = params['exp_name'].values + return zip(exp_names, self.experiments) + + return self.experiments + + def __iter__(self): + return iter(self.experiments) \ No newline at end of file diff --git a/expman/expman/experiment.py b/expman/expman/experiment.py new file mode 100644 index 0000000000000000000000000000000000000000..3e4e15aa4e4ff06f3a05e10f605a8eb8735c8cc8 --- /dev/null +++ b/expman/expman/experiment.py @@ -0,0 +1,233 @@ + +import argparse +import ast +import os +import hashlib +import shutil +import numbers +from glob import glob +from io import StringIO + +import numpy as np +import pandas as pd + + +hash_naming = False + +def use_hash_naming(use_hashes=True): + global hash_naming + assert isinstance(use_hashes, bool), "Value must be a boolean." + hash_naming = use_hashes + +def _guessed_cast(x): + try: + return ast.literal_eval(x) + except: + return x + +def exp_filter(string): + if '=' not in string: + raise argparse.ArgumentTypeError( + 'Filter {} is not in format =[, =[, ...]]'.format(string)) + filters = string.split(',') + filters = map(lambda x: x.split('='), filters) + filters = {k: _guessed_cast(v) for k, v in filters} + return filters + + +class Experiment: + + PARAM_FILENAME = 'params.json' + + @staticmethod + def _abbr(name, value, params): + + def prefix_len(a, b): + return len(os.path.commonprefix((a, b))) + + prefix = [name[:prefix_len(p, name) + 1] for p in params.keys() if p != name] + prefix = max(prefix, key=len) if len(prefix) > 0 else name + + sep = '' + if isinstance(value, str): + sep = '-' + elif isinstance(value, numbers.Number): + value = '{:g}'.format(value) + sep = '-' if prefix[-1].isdigit() else '' + elif isinstance(value, (list, tuple)): + value = map(str, value) + value = map(lambda v: v.replace(os.sep, '|'), value) + value = ','.join(list(value)) + sep = '-' + + return prefix, sep, value + + @classmethod + def abbreviate(cls, params): + if isinstance(params, pd.DataFrame): + params = params.iloc[0] + params = params.replace({np.nan: None}) + + if hash_naming: + exp_name = hashlib.md5(str(sorted(params.items())).encode()).hexdigest() + else: + abbrev_params = {k: '{}{}{}'.format(*cls._abbr(k, v, params)) for k, v in params.items()} + abbrev = sorted(abbrev_params.values()) + exp_name = '_'.join(abbrev) + + return exp_name + + @classmethod + def from_dir(cls, exp_dir): + root = os.path.dirname(exp_dir.rstrip('/')) + params = os.path.join(exp_dir, cls.PARAM_FILENAME) + + assert os.path.exists(exp_dir), "Experiment directory not found: '{}'".format(exp_dir) + assert os.path.exists(params), "Empty run directory found: '{}'".format(params) + + params = cls._read_params(params) + exp = cls(params, root=root, create=False) + return exp + + @classmethod + def is_exp_dir(cls, exp_dir): + if os.path.isdir(exp_dir): + params = os.path.join(exp_dir, cls.PARAM_FILENAME) + if os.path.exists(params): + return True + + return False + + @classmethod + def update_exp_dir(cls, exp_dir): + exp_dir = exp_dir.rstrip('/') + root = os.path.dirname(exp_dir) + name = os.path.basename(exp_dir) + params = os.path.join(exp_dir, cls.PARAM_FILENAME) + + assert os.path.exists(exp_dir), "Experiment directory not found: '{}'".format(exp_dir) + assert os.path.exists(params), "Empty run directory found: '{}'".format(params) + + params = cls._read_params(params) + new_name = cls.abbreviate(params) + + if name != new_name: + new_exp_dir = os.path.join(root, new_name) + assert not os.path.exists(new_exp_dir), \ + "Destination experiment directory already exists: '{}'".format(new_exp_dir) + + print('Renaming:\n {} into\n {}'.format(exp_dir, new_exp_dir)) + shutil.move(exp_dir, new_exp_dir) + + def __init__(self, params, root='runs/', ignore=(), create=True): + # relative dir containing this run + self.root = root + # params to be ignored in the run naming + self.ignore = ignore + # parameters of this run + if isinstance(params, argparse.Namespace): + params = vars(params) + + def _sanitize(v): + return tuple(v) if isinstance(v, list) else v + + params = {k: _sanitize(v) for k, v in params.items() if k not in self.ignore} + self.params = pd.Series(params, name='params') + + # whether to create the run directory if not exists + self.create = create + + self.name = self.abbreviate(self.params) + self.path = os.path.join(self.root, self.name) + self.existing = os.path.exists(self.path) + self.found = self.existing + + if not self.existing: + if self.create: + os.makedirs(self.path) + self.write_params() + self.existing = True + else: + print("Run directory '{}' not found, but not created.".format(self.path)) + + else: + param_fname = self.path_to(self.PARAM_FILENAME) + assert os.path.exists(param_fname), "Empty run, parameters not found: '{}'".format(param_fname) + self.params = self._read_params(param_fname) + + + def __str__(self): + s = StringIO() + print('Experiment Dir: {}'.format(self.path), file=s) + print('Params:', file=s) + + # Set display options differently + with pd.option_context('display.max_rows', None, + 'display.max_columns', None, + 'display.width', None): + print(self.params.to_string(), file=s) + + return s.getvalue() + + def __repr__(self): + return self.__str__() + + def path_to(self, path): + path = os.path.join(self.path, path) + return path + + def add_parameter(self, key, value): + assert key not in self.params, "Parameter already exists: '{}'".format(key) + self.params[key] = value + self._update_run_dir() + self.write_params() + + def rename_parameter(self, key, new_key): + assert key in self.params, "Cannot rename non-existent parameter: '{}'".format(key) + assert new_key not in self.params, "Destination name for parameter exists: '{}'".format(key) + + self.params[new_key] = self.params[key] + del self.params[key] + + self._update_run_dir() + self.write_params() + + def remove_parameter(self, key): + assert key in self.params, "Cannot remove non-existent parameter: '{}'".format(key) + del self.params[key] + self._update_run_dir() + self.write_params() + + def _update_run_dir(self): + old_run_dir = self.path + if self.existing: + self.name = self.abbreviate(self.params) + self.path = os.path.join(self.root, self.name) + assert not os.path.exists(self.path), "Cannot rename run, new name exists: '{}'".format(self.path) + shutil.move(old_run_dir, self.path) + + @staticmethod + def _read_params(path): + # read json to pd.Series + params = pd.read_json(path, typ='series') + # transform lists to tuples (for hashability) + params = params.apply(lambda x: tuple(x) if isinstance(x, list) else x) + return params + + def write_params(self): + # write Series as json + self.params.to_json(self.path_to(self.PARAM_FILENAME)) + +def test(): + parser = argparse.ArgumentParser(description='Experiment Manager Test') + parser.add_argument('-e', '--epochs', type=int, default=70) + parser.add_argument('-b', '--batch-size', type=int, default=64) + parser.add_argument('-m', '--model', choices=('1d-conv', 'paper'), default='1d-conv') + parser.add_argument('-s', '--seed', type=int, default=23) + parser.add_argument('--no-cuda', action='store_true') + parser.set_defaults(no_cuda=False) + args = parser.parse_args() + + run = Experiment(args, root='prova', ignore=['no_cuda']) + print(run) + print(run.path_to('ckpt/best.h5')) diff --git a/losses.py b/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..52d0b6a6eebd9fd750cff6157b42dc5e4e808102 --- /dev/null +++ b/losses.py @@ -0,0 +1,18 @@ +import tensorflow as tf +from tensorflow.keras import backend as K + +def boundary_loss(y_true, y_pred): + """Additional loss focusing on boundaries""" + # Compute gradients + dy_true, dx_true = tf.image.image_gradients(y_true) + dy_pred, dx_pred = tf.image.image_gradients(y_pred) + + # Compute boundary loss + loss = tf.reduce_mean(tf.abs(dy_pred - dy_true) + tf.abs(dx_pred - dx_true)) + return loss * 0.5 # weight factor + +def enhanced_binary_crossentropy(y_true, y_pred): + """Combine standard BCE with boundary loss""" + bce = tf.keras.losses.binary_crossentropy(y_true, y_pred) + boundary = boundary_loss(y_true, y_pred) + return bce + boundary \ No newline at end of file diff --git a/matlab/Meye.m b/matlab/Meye.m new file mode 100644 index 0000000000000000000000000000000000000000..fde5857a5e93cb4c002cdf81def946ebbc5e9d9d --- /dev/null +++ b/matlab/Meye.m @@ -0,0 +1,310 @@ +classdef Meye + + properties (Access=private) + model + end + + + methods + + % CONSTRUCTOR + %------------------------------------------------------------------ + function self = Meye(modelPath) + % Class constructor + arguments + modelPath char {mustBeText} + end + + % Change the current directory to the directory where the + % original class is, so that the package with the custom layers + % is created there + classPath = getClassPath(self); + oldFolder = cd(classPath); + % Import the model saved as ONNX + self.model = importONNXNetwork(modelPath, ... + 'GenerateCustomLayers',true, ... + 'PackageName','customLayers_meye',... + 'InputDataFormats', 'BSSC',... + 'OutputDataFormats',{'BSSC','BC'}); + + % Manually change the "nearest" option to "linear" inside of + % the automatically generated custom layers. This is necessary + % due to the fact that MATLAB still does not support the proper + % translation between ONNX layers and DLtoolbox layers + self.nearest2Linear([classPath filesep '+customLayers_meye']) + + % Go back to the old current folder + cd(oldFolder) + end + + + % PREDICTION OF SINGLE IMAGES + %------------------------------------------------------------------ + function [pupilMask, eyeProb, blinkProb] = predictImage(self, inputImage, options) + % Predicts pupil location on a single image + arguments + self + inputImage + options.roiPos = [] + options.threshold = [] + end + + roiPos = options.roiPos; + + % Convert the image to grayscale if RGB + if size(inputImage,3) > 1 + inputImage = im2gray(inputImage); + end + + % Crop the frame to the desired ROI + if ~isempty(roiPos) + crop = inputImage(roiPos(2):roiPos(2)+roiPos(4)-1,... + roiPos(1):roiPos(1)+roiPos(3)-1); + else + crop = inputImage; + end + + % Preprocessing + img = double(imresize(crop,[128 128])); + img = img / max(img,[],'all'); + + % Do the prediction + [rawMask, info] = predict(self.model, img); + eyeProb = info(1); + blinkProb = info(2); + + % Reinsert the cropped prediction in the frame + if ~isempty(roiPos) + pupilMask = zeros(size(inputImage)); + pupilMask(roiPos(2):roiPos(2)+roiPos(4)-1,... + roiPos(1):roiPos(1)+roiPos(3)-1) = imresize(rawMask, [roiPos(4), roiPos(3)],"bilinear"); + else + pupilMask = imresize(rawMask,size(inputImage),"bilinear"); + end + + % Apply a threshold to the image if requested + if ~isempty(options.threshold) + pupilMask = pupilMask > options.threshold; + end + + end + + + % PREDICT A MOVIE AND GET A TABLE WITH THE RESULTS + %------------------------------------------------------------------ + function tab = predictMovie(self, moviePath, options) + % Predict an entire video file and returns a results Table + % + % tab = predictMovie(moviePath, name-value) + % + % INPUT(S) + % - moviePath: (char/string) Full path of a video file. + % - name-value pairs + % - roiPos: [x,y,width,height] 4-elements vector defining a + % rectangle containing the eye. Works best if width and + % height are similar. If empty, a prediction will be done on + % a full frame(Default: []). + % - threshold: [0-1] The pupil prediction is binarized based + % on a threshold value to measure pupil size. (Default:0.4) + % + % OUTPUT(S) + % - tab: a MATLAB table containing data of the analyzed video + + arguments + self + moviePath char {mustBeText} + options.roiPos double = [] + options.threshold = 0.4; + end + + % Initialize a video reader + v = VideoReader(moviePath); + totFrames = v.NumFrames; + + % Initialize Variables + frameN = zeros(totFrames,1,'double'); + frameTime = zeros(totFrames,1,'double'); + binaryMask = cell(totFrames,1); + pupilArea = zeros(totFrames,1,'double'); + isEye = zeros(totFrames,1,'double'); + isBlink = zeros(totFrames,1,'double'); + + tic + for i = 1:totFrames + % Progress report + if toc>10 + fprintf('%.1f%% - Processing frame (%u/%u)\n', (i/totFrames)*100 , i, totFrames) + tic + end + + % Read a frame and make its prediction + frame = read(v, i, 'native'); + [pupilMask, eyeProb, blinkProb] = self.predictImage(frame, roiPos=options.roiPos,... + threshold=options.threshold); + + % Save results for this frame + frameN(i) = i; + frameTime(i) = v.CurrentTime; + binaryMask{i} = pupilMask > options.threshold; + pupilArea(i) = sum(binaryMask{i},"all"); + isEye(i) = eyeProb; + isBlink(i) = blinkProb; + end + % Save all the results in a final table + tab = table(frameN,frameTime,binaryMask,pupilArea,isEye,isBlink); + end + + + + % PREVIEW OF A PREDICTED MOVIE + %------------------------------------------------------------------ + function predictMovie_Preview(self, moviePath, options) + % Displays a live-preview of prediction for a video file + + arguments + self + moviePath char {mustBeText} + options.roiPos double = [] + options.threshold double = [] + end + roiPos = options.roiPos; + + + % Initialize a video reader + v = VideoReader(moviePath); + % Initialize images to show + blankImg = zeros(v.Height, v.Width, 'uint8'); + cyanColor = cat(3, blankImg, blankImg+255, blankImg+255); + pupilTransparency = blankImg; + + % Create a figure for the preview + figHandle = figure(... + 'Name','MEYE video preview',... + 'NumberTitle','off',... + 'ToolBar','none',... + 'MenuBar','none', ... + 'Color',[.1, .1, .1]); + + ax = axes('Parent',figHandle,... + 'Units','normalized',... + 'Position',[0 0 1 .94]); + + imHandle = imshow(blankImg,'Parent',ax); + hold on + cyanHandle = imshow(cyanColor,'Parent',ax); + cyanHandle.AlphaData = pupilTransparency; + rect = rectangle('LineWidth',1.5, 'LineStyle','-.','EdgeColor',[1,0,0],... + 'Parent',ax,'Position',[0,0,0,0]); + hold off + title(ax,'MEYE Video Preview', 'Color',[1,1,1]) + + % Movie-Showing loop + while exist("figHandle","var") && ishandle(figHandle) && hasFrame(v) + try + tic + frame = readFrame(v); + + % Actually do the prediction + [pupilMask, eyeProb, blinkProb] = self.predictImage(frame, roiPos=roiPos,... + threshold=options.threshold); + + % Update graphic elements + imHandle.CData = frame; + cyanHandle.AlphaData = imresize(pupilMask, [v.Height, v.Width]); + if ~isempty(roiPos) + rect.Position = roiPos; + end + titStr = sprintf('Eye: %.2f%% - Blink:%.2f%% - FPS:%.1f',... + eyeProb*100, blinkProb*100, 1/toc); + ax.Title.String = titStr; + drawnow + catch ME + warning(ME.message) + close(figHandle) + end + end + disp('Stop preview.') + end + + + end + + + %------------------------------------------------------------------ + %------------------------------------------------------------------ + % INTERNAL FUNCTIONS + %------------------------------------------------------------------ + %------------------------------------------------------------------ + methods(Access=private) + %------------------------------------------------------------------ + function path = getClassPath(~) + % Returns the full path of where the class file is + + fullPath = mfilename('fullpath'); + [path,~,~] = fileparts(fullPath); + end + + %------------------------------------------------------------------ + function [fplist,fnlist] = listfiles(~, folderpath, token) + listing = dir(folderpath); + index = 0; + fplist = {}; + fnlist = {}; + for i = 1:size(listing,1) + s = listing(i).name; + if contains(s,token) + index = index+1; + fplist{index} = [folderpath filesep s]; + fnlist{index} = s; + end + end + end + + % nearest2Linear + %------------------------------------------------------------------ + function nearest2Linear(self, inputPath) + fP = self.listfiles(inputPath, 'Shape_To_Upsample'); + + foundFileToChange = false; + beforePatter = '"half_pixel", "nearest",'; + afterPattern = '"half_pixel", "linear",'; + for i = 1:length(fP) + + % Get the content of the file + fID = fopen(fP{i}, 'r'); + f = fread(fID,'*char')'; + fclose(fID); + + % Send a verbose warning the first time we are manually + % correcting the upsampling layers bug + if ~foundFileToChange && contains(f,beforePatter) + foundFileToChange = true; + msg = ['This is a message from MEYE developers.\n' ... + 'In the current release of the Deep Learning Toolbox ' ... + 'MATLAB does not translate well all the layers in the ' ... + 'ONNX network to native MATLAB layers. In particular the ' ... + 'automatically generated custom layers that have to do ' ... + 'with UPSAMPLING are generated with the ''nearest'' instead of ' ... + 'the ''linear'' mode.\nWe automatically correct for this bug when you ' ... + 'instantiate a Meye object (henche this warning).\nEverything should work fine, ' ... + 'and we hope that in future MATLAB releases this hack wont be ' ... + 'needed anymore.\n' ... + 'If you find bugs or performance issues, please let us know ' ... + 'with an issue ' ... + 'HERE.']; + warning(sprintf(msg)) + end + + % Replace the 'nearest' option with 'linear' + newF = strrep(f, beforePatter, afterPattern); + + % Save the file back in its original location + fID = fopen(fP{i}, 'w'); + fprintf(fID,'%s',newF); + fclose(fID); + end + end + end +end + + diff --git a/matlab/README.md b/matlab/README.md new file mode 100644 index 0000000000000000000000000000000000000000..db265cb379d6a6108a2ba9bfed6d37b3d2e1f7f4 --- /dev/null +++ b/matlab/README.md @@ -0,0 +1,57 @@ +# MEYE pupillometry on MATLAB + +> Try MEYE on a standalone [Web-App](https://www.pupillometry.it/) + +> Learn more on the original [MEYE repo](https://github.com/fabiocarrara/meye) + +> Label your own dataset with [pLabeler](https://github.com/LeonardoLupori/pLabeler) + +Starting from MATLAB version 2021b, MEYE is also available for use on MATLAB! + +Here's a brief tutorial on how to use it in you own experiments. + +## What do you need? + +- [MATLAB 2021b](https://it.mathworks.com/products/matlab.html) or later +- [MATLAB Image Processing Toolbox](https://it.mathworks.com/products/image.html) +- [MATLAB Deep Learning Toolbox](https://it.mathworks.com/products/deep-learning.html) + An additional _support package_ of this toolbox has to be downloaded manually from the Add-On explorer in MATLAB: + - _Deep Learning Toolbox™ Converter for ONNX Model Format_ + ![image](https://user-images.githubusercontent.com/39329654/152327789-dde0af9b-d531-40be-b1a0-5ba17c508a13.png) +- A MEYE model in [ONNX](https://onnx.ai/) format. You can download our latest model [here](https://github.com/fabiocarrara/meye/releases). +![onnxModel](https://user-images.githubusercontent.com/39329654/152552616-1b800398-5794-4f51-b4ed-2e3339cb2d0d.png) + + +## Quick start! + +```matlab +% Create an instance of Meye +meye = Meye('path/to/model.onnx'); + +% Example 1 +% Make predictions on a single Image +% +% Load an image for which you want to predict the pupil +img = imread('path/to/img.tif'); +% Make a prediction on a frame +[pupil, isEye, isBlink] = meye.predictImage(img); + +% Example 2 +% Make predictions on a video file and preview the results +% +meye.predictMovie_Preview('path/to/video'); +``` + +## Examples + +Inside the file [example.m](example.m) you can find 5 extensively commented examples of some use cases for MEYE on MATLAB. +These examples require you to download example data from [here](https://drive.google.com/drive/folders/1BG6O5BEkwXkNKC_1XuB3H9wbx3DeNWwF?usp=sharing). To run the examples succesfully, make sure that the downloaded files are in the same folder as the `example.m` file. + +# Known issues + +## Small issue with _Upsample_ layers +When [importing](https://it.mathworks.com/help/deeplearning/ref/importonnxnetwork.html) a ONNX network, MATLAB tries to translate all the layers of the network from ONNX Operators to built-in MATLAB layers (see [here](https://it.mathworks.com/help/deeplearning/ref/importonnxnetwork.html#mw_dc6cd14c-e8d0-4370-af81-96626a888d9c)). +This operation is not succesful for all the layers and MATLAB tries to overcome erros by automatically generating custom layers to replace the ones that it wasnt able to translate. These _custom_ layers are stored in a folder as MATLAB `.m` class files. +We found a small bug in the way MATLAB translates `Upsample` layers while importing MEYE network. In particular, the custom generated layers perform the upsample with the `nearest` interpolation method, while it should be used the `linear` method for best results. +For now, we solved this bug by automatically replacing the `nearest` method with the `linear` one in all the custom generated layers. This restores optimal performance with no additional computational costs, but it's a bit hacky. +We hope that in future releases MATLAB's process of translation to its own built-in layers will be smoother and this trick will not be needed anymore. \ No newline at end of file diff --git a/matlab/example.m b/matlab/example.m new file mode 100644 index 0000000000000000000000000000000000000000..94d2d68343ad58ffb66aaf1404481cf8751b2d44 --- /dev/null +++ b/matlab/example.m @@ -0,0 +1,211 @@ +%% Download all the example material +% +% 1 - Download the latest MEYE model in ONNX format +% ------------------------------------------------------------------------- +% Download the .onnx file from the assets here: +% https://github.com/fabiocarrara/meye/releases + +% EXAMPLE data can be found in this folder: +% https://drive.google.com/drive/folders/1BG6O5BEkwXkNKC_1XuB3H9wbx3DeNWwF?usp=sharing +% +% 2 - Download an example image of a simple mouse eye from: +% https://drive.google.com/file/d/1hcWcC1cAmzY4r-SIWDIgUY0-gpbmetUL/view?usp=sharing +% +% 3 - Download an example of a large image here: +% https://drive.google.com/file/d/16QixvUMtojqfrcy4WXlYJ7CP3K8vrz_C/view?usp=sharing +% +% 4 - Download an example pupillometry video here: +% https://drive.google.com/file/d/1TYj80dzIR1ZjpEvfefH_akhbUjwpvJta/view?usp=sharing + + +%% EXAMPLE 1 +% ------------------------------------------------------------------------- +% Predict the pupil from a simple image of an eye + +% Clean up the workspace +clearvars, clc + +% Change these values according to the filenames of the MEYE model and the +% simple pupil image +MODEL_NAME = 'meye_20220124.onnx'; +IMAGE_NAME = 'pupilImage_simple.png'; + + +% Initialize a MEYE object +meye = Meye(MODEL_NAME); + +% Load the simple image +img = imread(IMAGE_NAME); + +% Predict a single image +[pupilMask, eyeProb, blinkProb] = meye.predictImage(img); + +% Plot the results of the prediction +subplot(1,3,1) +imshow(img) +title('Original Image') + +subplot(1,3,2) +imagesc(pupilMask) +title(sprintf('Prediction (Eye:%.2f%% - Blink:%.2f%%)',eyeProb*100,blinkProb*100)) +axis off, axis image + +subplot(1,3,3) +imshowpair(img, pupilMask) +title('Merge') + + +%% EXAMPLE 2 +% ------------------------------------------------------------------------- +% Binarize the pupil prediction and get the pupil size in pixels + +% Clean up the workspace +clearvars, close all, clc + +% Change these values according to the filenames of the MEYE model and the +% simple pupil image +MODEL_NAME = 'meye_20220124.onnx'; +IMAGE_NAME = 'pupilImage_simple.png'; + + +% Initialize a MEYE object +meye = Meye(MODEL_NAME); + +% Load the simple image +img = imread(IMAGE_NAME); + +% Predict a single image +% You can automatically binarize the prediction by passing the "threshold" +% optional argument. This number can be between 0 and 1. If omitted, the +% function returns a raw probability map instead of a binarized image +pupilBinaryMask = meye.predictImage(img, 'threshold', 0.4); + +imshowpair(img, pupilBinaryMask) +title(sprintf('Pupil Size: %u px', sum(pupilBinaryMask,'all'))) + + +%% EXAMPLE 3 +% ------------------------------------------------------------------------- +% Predict the pupil on a large image where the eye is a small portion of +% the image + +% Clean up the workspace +clearvars, close all, clc + +% Change these values according to the filenames of the MEYE model and the +% simple pupil image +MODEL_NAME = 'meye_20220124.onnx'; +IMAGE_NAME = 'pupilImage_large.png'; + + +% Initialize a MEYE object +meye = Meye(MODEL_NAME); + +% Load the simple image +img = imread(IMAGE_NAME); + +% Predict the image +pupilMask = meye.predictImage(img); + +% As you can see from this image, the prediction is not perfect. This is +% because MEYE was trained on images that tightly contained the eye. +subplot(1,2,1) +imshowpair(img, pupilMask) +title('Tomal Image prediction (low-quality)') + +% In order to solve this issue it is possible to restrict the prediction to +% a rectangular Region of Interest (ROI) in the image. This is done simply +% by passing the optional argument "roiPos" to the predictImage function. +% The roiPos is a 4-elements vector containing X,Y, width, height of a +% rectangular shape. Note that X and Y are the coordinates of the top left +% corner of the ROI + +ROI = [90,90,200,200]; +pupilMask = meye.predictImage(img, 'roiPos', ROI); + +% Plot the results with the ROI and see the difference between the 2 methods +subplot(1,2,2) +imshowpair(img, pupilMask) +rectangle('Position',ROI, 'LineStyle','-.','EdgeColor',[1,0,0]) +title('ROI prediction (high quality)') +linkaxes +set(gcf,'Position',[300,600,1000,320]) + + +%% EXAMPLE 4 +% ------------------------------------------------------------------------- +% Show a preview of the prediction of an entire pupillometry video. +% +% As you saw you can adjust a few parameters for the prediction. +% If you want to get a quick preview of how your pre-recorded video will be +% processed, you can use the method predictMovie_Preview. +% Here you can play around with different ROI positions and threshold +% values and see what are the results before analyzing the whole video. + +% Clean up the workspace +clearvars, close all, clc + +% Change these values according to the filenames of the MEYE model and the +% simple pupil image +MODEL_NAME = 'meye_20220124.onnx'; +VIDEO_NAME = 'mouse_example.mp4'; + +% Initialize a MEYE object +meye = Meye(MODEL_NAME); + +% Try to play around moving or resizing the ROI to see how the performances change +ROI = [70, 60, 200, 200]; + +% Change the threshold value to binarize the pupil prediction. +% Use [] to see the raw probability map. Use a number in the range [0:1] to binarize it +threshold = 0.4; + +meye.predictMovie_Preview(VIDEO_NAME,"roiPos", ROI,"threshold",threshold); + + + +%% EXAMPLE 5 +% Predict the entire video and get the results table + +% Clean up the workspace +clearvars, close all, clc + +% Change these values according to the filenames of the MEYE model and the +% simple pupil image +MODEL_NAME = 'meye_20220124.onnx'; +VIDEO_NAME = 'mouse_example.mp4'; + +% Initialize a MEYE object +meye = Meye(MODEL_NAME); + +% Try to play around moving or resizing the ROI to see how the performances change +ROI = [70, 60, 200, 200]; + +% Change the threshold value to binarize the pupil prediction. +% Use [] to see the raw probability map. Use a number in the range [0:1] to binarize it +threshold = 0.4; + +% Predict the whole movie and save results in a table +T = meye.predictMovie(VIDEO_NAME, "roiPos", ROI, "threshold", threshold); + +% Show some of the values in the table +disp(head(T)) + +% Plot some of the results +subplot 311 +plot(T.frameTime,T.isEye, 'LineWidth', 2) +title('Eye Probability') +ylabel('Probability'), +xlim([T.frameTime(1) T.frameTime(end)]) + +subplot 312 +plot(T.frameTime,T.isBlink, 'LineWidth', 2) +title('Blink Probability') +ylabel('Probability') +xlim([T.frameTime(1) T.frameTime(end)]) + +subplot 313 +plot(T.frameTime,T.pupilArea, 'LineWidth', 2) +title('Pupil Size') +xlabel('Time (s)'), ylabel('Pupil Area (px)') +xlim([T.frameTime(1) T.frameTime(end)]) diff --git a/models/deeplab.py b/models/deeplab.py new file mode 100644 index 0000000000000000000000000000000000000000..e0625cd015378a2923222b57c2947a49ac9d1861 --- /dev/null +++ b/models/deeplab.py @@ -0,0 +1,78 @@ +import sys +sys.path += ['models/deeplab'] + +import tensorflow as tf + +from tensorflow.keras import backend as K +from tensorflow.keras import layers as L +from tensorflow.keras.models import Model, Sequential + +from deeplabv3p.models.deeplabv3p_resnet50 import Deeplabv3pResNet50 +from deeplabv3p.models.deeplabv3p_mobilenetv3 import Deeplabv3pMobileNetV3Small, Deeplabv3pLiteMobileNetV3Small, Deeplabv3pMobileNetV3Large, Deeplabv3pLiteMobileNetV3Large +from deeplabv3p.models.deeplabv3p_xception import Deeplabv3pXception +from deeplabv3p.models.deeplabv3p_peleenet import Deeplabv3pPeleeNet, Deeplabv3pLitePeleeNet + +AVAILABLE_BACKBONES = { + 'resnet50': Deeplabv3pResNet50, + 'xception': Deeplabv3pXception, + 'mobilenetv3-large': Deeplabv3pMobileNetV3Large, + 'lite-mobilenetv3-large': Deeplabv3pLiteMobileNetV3Large, + 'mobilenetv3-small': Deeplabv3pMobileNetV3Small, + 'lite-mobilenetv3-small': Deeplabv3pLiteMobileNetV3Small, + 'peleenet': Deeplabv3pPeleeNet, + 'lite-peleenet': Deeplabv3pLitePeleeNet, +} + +AVAILABLE_PRETRAINED_WEIGHTS = { + 'resnet50': 'imagenet', + 'xception': None, # 'pascalvoc', # needs fix in upstream + 'mobilenetv3-large': 'imagenet', + 'lite-mobilenetv3-large': 'imagenet', + 'mobilenetv3-small': 'imagenet', + 'lite-mobilenetv3-small': 'imagenet', + 'peleenet': 'imagenet', + 'lite-peleenet': 'imagenet', +} + +def build_model(input_shape, output_shape, config): + + assert input_shape[:2] == output_shape[:2], "Only same input-output HW shapes are supported." + num_classes = output_shape[2] + + # backbone pretends RGB images to use pretrained weights + needs_rgb_conversion = input_shape[2] != 3 + backbone_input_shape = (input_shape[:2] + (3,)) if needs_rgb_conversion else input_shape + backbone_name = config.get('backbone', 'resnet50') + weights = config.get('weights', AVAILABLE_PRETRAINED_WEIGHTS[backbone_name]) + backbone_fn = AVAILABLE_BACKBONES[backbone_name] + backbone, backbone_len = backbone_fn(input_shape=backbone_input_shape, num_classes=num_classes, weights=weights, OS=8) + + # segmentation mask + out_mask = backbone.get_layer('pred_resize').output + out_mask = L.Activation('sigmoid', name='mask')(out_mask) + + # metadata tags (is_eye and is_blink) + middle = backbone.get_layer('image_pooling').output + middle = L.Flatten()(middle) + out_tags = L.Dense(2, activation='sigmoid', name='tags')(middle) + + model = Model(inputs=backbone.input, outputs=[out_mask, out_tags]) + + if needs_rgb_conversion: + gray_input = L.Input(shape=input_shape) + rgb_input = L.Lambda(lambda x: K.tile(x, (1, 1, 1, 3)) , name='gray2rgb')(gray_input) # we assume BHWC + out_mask, out_tags = model(rgb_input) + + # rename outputs + out_mask = L.Lambda(lambda x: x, name='mask')(out_mask) + out_tags = L.Lambda(lambda x: x, name='tags')(out_tags) + model = Model(inputs=gray_input, outputs=[out_mask, out_tags]) + + return model + + +if __name__ == "__main__": + shape = (128, 128, 1) + model = build_model(shape, shape, {'weights': None})#, 'backbone': 'lite-mobilenetv3-small'}) + model.summary() + import pdb; pdb.set_trace() diff --git a/models/deeplab/README.md b/models/deeplab/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ff8723009538fb7d4ef4564f0591d1304d6e589c --- /dev/null +++ b/models/deeplab/README.md @@ -0,0 +1,380 @@ +# TF Keras DeepLab v3+ Modelset + +## Introduction + +An end-to-end semantic segmentation pipeline with DeepLabv3+ models. Implement with tf.keras, including data collection/annotation, model training/tuning, model evaluation and on device deployment. Support different backbones and different head architecture: + +#### Backbone +- [x] Xception +- [x] ResNet50 +- [x] MobileNetV2 +- [x] MobilenetV3(Large/Small) +- [x] PeleeNet ([paper](https://arxiv.org/abs/1804.06882)) + +#### Head +- [x] ASPP +- [x] ASPP Lite(Only Global Pooling + 1x1 Conv) +- [x] Decoder +- [x] Different Output Stride(8/16/32) + +#### Loss +- [x] Categorical Cross Entropy Loss +- [x] Balanced Class Weighted Cross Entropy Loss +- [x] Adaptive Class Weighted Cross Entropy Loss +- [x] Focal Loss + +#### Postprocess +- [x] Numpy CRF (Conditional Random Fields) postprocess implementation + + +#### Train tech +- [x] Transfer training from Imagenet/PascalVOC +- [x] Dynamic learning rate decay (Cosine/Exponential/Polynomial/PiecewiseConstant) +- [x] Weights Average policy for optimizer (EMA/SWA/Lookahead, valid for TF-2.x with tfa) +- [x] GridMask data augmentation ([paper](https://arxiv.org/abs/2001.04086)) +- [x] Multi-GPU training with SyncBatchNorm support (valid for TF-2.2 and later) + +#### On-device deployment +- [x] Tensorflow-Lite Float32/UInt8 model inference +- [x] MNN Float32/UInt8 model inference + + +## Quick Start + +1. Install requirements on Ubuntu 16.04/18.04: + +``` +# pip install -r requirements.txt +``` + +2. Download Deeplabv3+ PascalVOC pretrained weights. It's provided by [keras-deeplab-v3-plus](https://github.com/bonlime/keras-deeplab-v3-plus) and imported from [original TF checkpoint](https://github.com/tensorflow/models/tree/master/research/deeplab) +3. Run Deeplab segmentation on your image or video. + +``` +# wget -O weights/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5 https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.1/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5 +# python deeplab.py --model_type=xception --weights_path=weights/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5 --classes_path=configs/voc_classes.txt --output_stride=16 --image +# python deeplab.py --model_type=xception --weights_path=weights/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5 --classes_path=configs/voc_classes.txt --output_stride=16 --input= + +``` + +Image segment sample: + +

+ +

+ + + +## Guide of train/evaluate/demo + +### Train + +1. Prepare dataset + 1. PascalVOC2012 & SBD (VOC2012 train_aug) semantic segmentation dataset + * Run a simple script to download, convert & merge PascalVOC 2012 and SBD: + + ``` + # pushd tools/dataset_converter/voc_augment/ + # ./dataset_prepare.sh + # popd + + ``` + Dataset images & labels will be placed at `VOC2012/` + + 2. MS COCO 2017 segmentation dataset + * Run a simple script to download COCO2017 dataset, and convert annotated instance mask to PNG format semantic segmentation label image: + + ``` + # pushd tools/dataset_converter/mscoco2017/ + # ./dataset_prepare.sh + # popd + + ``` + You can dig into related script for details. Dataset images & labels will be placed at `mscoco2017/` + + 3. ADE20K semantic segmentation dataset + * Run a simple script to download, merge & convert ADE20K dataset: + + ``` + # pushd tools/dataset_converter/ade20k/ + # ./dataset_prepare.sh + # popd + + ``` + Dataset images & labels will be placed at `ADEChallengeData2016/` + + 4. Cityscapes semantic segmentation dataset + * Download the Cityscapes dataset package from `https://www.cityscapes-dataset.com/` (need registration) and put to `tools/dataset_converter/cityscapes/`. Then run a simple script to merge & convert: + + ``` + # pushd tools/dataset_converter/cityscapes/ + # ./dataset_prepare.sh + # popd + + ``` + Dataset images & labels will be placed at `Cityscapes/` + + 5. Customized semantic segmentation dataset + * Collecting target JPG format images and place at `/images` + * Generate semantic segmentation label image. You can use [labelme](https://github.com/wkentaro/labelme) to annotate your image with polygonal segmentation mask and save to a json file. Then run [json_to_dataset.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/tools/dataset_converter/labelme/json_to_dataset.py) to convert json annotations to PascalVOC style PNG format label images: + ``` + # cd tools/dataset_converter/labelme && python json_to_dataset.py -h + usage: json_to_dataset.py [-h] --json_file_path JSON_FILE_PATH + [--classes_path CLASSES_PATH] --png_label_path + PNG_LABEL_PATH + + convert labelme json label to voc png label + + optional arguments: + -h, --help show this help message and exit + --json_file_path JSON_FILE_PATH + path to labelme annotated json label files + --classes_path CLASSES_PATH + path to class definitions, + default=../../../configs/voc_classes.txt + --png_label_path PNG_LABEL_PATH + output path of converted png label images + ``` + + For class names file format, refer to [voc_classes.txt](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/configs/voc_classes.txt) (not including background class, which would be added as index 0 in code by default). + + * Place the PNG label images at `/labels` + * Create PascalVOC style dataset split (train/val/test) txt files. One line for a image and only include image base name, like: + ``` + 2007_000033 + 2007_000042 + 2007_000061 + ... + ``` + + You can put these dataset files together at `` to create an independent dataset directory + + +2. [train.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/train.py) +``` +# python train.py -h +usage: train.py [-h] [--model_type MODEL_TYPE] [--weights_path WEIGHTS_PATH] + [--model_input_shape MODEL_INPUT_SHAPE] + [--output_stride {8,16,32}] [--dataset_path DATASET_PATH] + [--dataset_file DATASET_FILE] + [--val_dataset_file VAL_DATASET_FILE] [--val_split VAL_SPLIT] + [--classes_path CLASSES_PATH] [--batch_size BATCH_SIZE] + [--optimizer {adam,rmsprop,sgd}] [--loss {crossentropy,focal}] + [--weighted_type {None,adaptive,balanced}] + [--learning_rate LEARNING_RATE] + [--average_type {None,ema,swa,lookahead}] + [--decay_type {None,cosine,exponential,polynomial,piecewise_constant}] + [--transfer_epoch TRANSFER_EPOCH] [--freeze_level {0,1,2}] + [--init_epoch INIT_EPOCH] [--total_epoch TOTAL_EPOCH] + [--gpu_num GPU_NUM] [--model_pruning] [--eval_online] + [--eval_epoch_interval EVAL_EPOCH_INTERVAL] + [--save_eval_checkpoint] + +optional arguments: + -h, --help show this help message and exit + --model_type MODEL_TYPE + DeepLabv3+ model type: + mobilenetv2/mobilenetv2_lite/resnet50, + default=mobilenetv2_lite + --weights_path WEIGHTS_PATH + Pretrained model/weights file for fine tune + --model_input_shape MODEL_INPUT_SHAPE + model image input shape as x, + default=512x512 + --output_stride {8,16,32} + model output stride, default=16 + --dataset_path DATASET_PATH + dataset path containing images and label png file, + default=VOC2012/ + --dataset_file DATASET_FILE + train samples txt file, + default=VOC2012/ImageSets/Segmentation/trainval.txt + --val_dataset_file VAL_DATASET_FILE + val samples txt file, default=None + --val_split VAL_SPLIT + validation data persentage in dataset if no val + dataset provide, default=0.1 + --classes_path CLASSES_PATH + path to class definitions, + default=configs/voc_classes.txt + --batch_size BATCH_SIZE + batch size for training, default=16 + --optimizer {adam,rmsprop,sgd} + optimizer for training (adam/rmsprop/sgd), default=sgd + --loss {crossentropy,focal} + loss type for training (crossentropy/focal), + default=crossentropy + --weighted_type {None,adaptive,balanced} + class balance weighted type, default=None + --learning_rate LEARNING_RATE + Initial learning rate, default=0.01 + --average_type {None,ema,swa,lookahead} + weights average type, default=None + --decay_type {None,cosine,exponential,polynomial,piecewise_constant} + Learning rate decay type, default=None + --transfer_epoch TRANSFER_EPOCH + Transfer training stage epochs, default=5 + --freeze_level {0,1,2} + Freeze level of the model in transfer training stage. + 0:NA/1:backbone/2:only open prediction layer + --init_epoch INIT_EPOCH + initial training epochs for fine tune training, + default=0 + --total_epoch TOTAL_EPOCH + total training epochs, default=150 + --gpu_num GPU_NUM Number of GPU to use, default=1 + --model_pruning Use model pruning for optimization, only for TF 1.x + --eval_online Whether to do evaluation on validation dataset during + training + --eval_epoch_interval EVAL_EPOCH_INTERVAL + Number of iteration(epochs) interval to do evaluation, + default=10 + --save_eval_checkpoint + Whether to save checkpoint with best evaluation result +``` + +Following is a reference config cmd for training mobilenetv2 lite model on PascalVOC2012 & SBD dataset: +``` +# python train.py --model_type=mobilenetv2_lite --output_stride=16 --dataset_path=VOC2012/ --dataset_file=VOC2012/ImageSets/Segmentation/train.txt --val_dataset_file=VOC2012/ImageSets/Segmentation/val.txt --batch_size=16 --freeze_level=1 --transfer_epoch=5 --total_epoch=150 --eval_online --eval_epoch_interval=1 --save_eval_checkpoint --weighted_type=adaptive +``` + +Checkpoints during training could be found at `logs/000/`. Choose a best one as result + +You can also use Tensorboard to monitor the loss trend during train: +``` +# tensorboard --logdir=logs/000 +``` + +MultiGPU usage: use `--gpu_num N` to use N GPUs. It use [tf.distribute.MirroredStrategy](https://www.tensorflow.org/guide/distributed_training#mirroredstrategy) to support MultiGPU environment. + + +### Model dump +We' better to dump out inference model from training checkpoint for eval or demo. Following script cmd work for that. + +``` +# python deeplab.py --model_type=mobilenetv2_lite --weights_path=logs/000/.h5 --classes_path=configs/voc_classes.txt --model_input_shape=512x512 --output_stride=16 --dump_model --output_model_file=model.h5 +``` + +Change model_type, input shape & output stride to get different inference model. If "--model_pruning" was added in training, you also need to use "--pruning_model" here for dumping out the pruned model. + +NOTE: One trained model could be dump out for different input shape & output stride (of course with different accuracy performance). + + +### Evaluation +Use [eval.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/eval.py) to do evaluation on the inference model with your test data. It will calculate following metrics: + +* mIOU +* FWIOU (Frequency Weighted IOU) +* PA (Pixel Accuracy) +* MPA (Mean Pixel Accuracy) + +It will also draw confusion matrix chart and IOU result for each class under "result" dir, and optionally save all the segmentation result images & predicted PNG labels for checking. + +``` +# python eval.py --model_path=model.h5 --dataset_path=VOC2012/ --dataset_file=VOC2012/ImageSets/Segmentation/val.txt --classes_path=configs/voc_classes.txt --model_input_shape=512x512 --save_result +``` + +If you enable "--eval_online" option in train.py, evaluation on validation dataset will be executed during training. But that may cost more time for train process. + + +Following is a sample result trained on MobilenetV2_Lite model with VOC2012+SBD dataset: +

+ + +

+ + +Some experiment on VOC2012+SBD dataset and comparison: + +| Model type | InputSize | Output Stride | TrainSet | TestSet | mIOU | FLOPS | Param | Speed | Ps | +| ----- | ------ | ------ | ------ | ----- | ----- | ----- | ----- | ----- | ----- | +| [ResNet50](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.1/deeplabv3p_resnet50_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 73.71% | 73.95G | 26.72M | 38ms | Keras on Titan XP | +| [MobileNetV3Large](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.1/deeplabv3p_mobilenetv3large_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 72.33% | 9.52G | 3.51M | 29ms | Keras on Titan XP | +| [PeleeNet Lite](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.2/deeplabv3p_peleenet_lite_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 68.23% | 7.64G | 2.59M | 37.8ms | Keras on Titan XP | +| [MobileNetV2 Lite](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.0/deeplabv3p_mobilenetv2_lite_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 67.83% | 5.24G | 2.11M | 23ms | Keras on Titan XP | +| [MobileNetV3Small Lite](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.1/deeplabv3p_mobilenetv3small_lite_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 64.81% | 1.36G | 1.06M | 20ms | Keras on Titan XP | + +**NOTE**: If you meet any model loading problem with these pretrained weights due to h5 format compatibility issue, try to run "Model dump" with it again to regenerate the inference model. + + +### Demo +1. [deeplab.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/deeplab.py) +> * Demo script for trained model + +image inference mode +``` +# python deeplab.py --model_type=mobilenetv2_lite --weights_path=model.h5 --classes_path=configs/voc_classes.txt --model_input_shape=512x512 --output_stride=16 --image +``` +video inference mode +``` +# python deeplab.py --model_type=mobilenetv2_lite --weights_path=model.h5 --classes_path=configs/voc_classes.txt --model_input_shape=512x512 --output_stride=16 --input=test.mp4 +``` +For video detection mode, you can use "input=0" to capture live video from web camera and "output=