diff --git a/Old/FAR-HO/augmentation_transforms.py b/Old/FAR-HO/augmentation_transforms.py deleted file mode 100755 index ef17188..0000000 --- a/Old/FAR-HO/augmentation_transforms.py +++ /dev/null @@ -1,456 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Transforms used in the Augmentation Policies.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import inspect -import random -import numpy as np -# pylint:disable=g-multiple-import -from PIL import ImageOps, ImageEnhance, ImageFilter, Image -# pylint:enable=g-multiple-import - - -IMAGE_SIZE = 28 -# What is the dataset mean and std of the images on the training set -MEANS = [0.49139968, 0.48215841, 0.44653091] -STDS = [0.24703223, 0.24348513, 0.26158784] -PARAMETER_MAX = 10 # What is the max 'level' a transform could be predicted - - -def random_flip(x): - """Flip the input x horizontally with 50% probability.""" - if np.random.rand(1)[0] > 0.5: - return np.fliplr(x) - return x - - -def zero_pad_and_crop(img, amount=4): - """Zero pad by `amount` zero pixels on each side then take a random crop. - - Args: - img: numpy image that will be zero padded and cropped. - amount: amount of zeros to pad `img` with horizontally and verically. - - Returns: - The cropped zero padded img. The returned numpy array will be of the same - shape as `img`. - """ - padded_img = np.zeros((img.shape[0] + amount * 2, img.shape[1] + amount * 2, - img.shape[2])) - padded_img[amount:img.shape[0] + amount, amount: - img.shape[1] + amount, :] = img - top = np.random.randint(low=0, high=2 * amount) - left = np.random.randint(low=0, high=2 * amount) - new_img = padded_img[top:top + img.shape[0], left:left + img.shape[1], :] - return new_img - - -def create_cutout_mask(img_height, img_width, num_channels, size): - """Creates a zero mask used for cutout of shape `img_height` x `img_width`. - - Args: - img_height: Height of image cutout mask will be applied to. - img_width: Width of image cutout mask will be applied to. - num_channels: Number of channels in the image. - size: Size of the zeros mask. - - Returns: - A mask of shape `img_height` x `img_width` with all ones except for a - square of zeros of shape `size` x `size`. This mask is meant to be - elementwise multiplied with the original image. Additionally returns - the `upper_coord` and `lower_coord` which specify where the cutout mask - will be applied. - """ - assert img_height == img_width - - # Sample center where cutout mask will be applied - height_loc = np.random.randint(low=0, high=img_height) - width_loc = np.random.randint(low=0, high=img_width) - - # Determine upper right and lower left corners of patch - upper_coord = (max(0, height_loc - size // 2), max(0, width_loc - size // 2)) - lower_coord = (min(img_height, height_loc + size // 2), - min(img_width, width_loc + size // 2)) - mask_height = lower_coord[0] - upper_coord[0] - mask_width = lower_coord[1] - upper_coord[1] - assert mask_height > 0 - assert mask_width > 0 - - mask = np.ones((img_height, img_width, num_channels)) - zeros = np.zeros((mask_height, mask_width, num_channels)) - mask[upper_coord[0]:lower_coord[0], upper_coord[1]:lower_coord[1], :] = ( - zeros) - return mask, upper_coord, lower_coord - - -def cutout_numpy(img, size=16): - """Apply cutout with mask of shape `size` x `size` to `img`. - - The cutout operation is from the paper https://arxiv.org/abs/1708.04552. - This operation applies a `size`x`size` mask of zeros to a random location - within `img`. - - Args: - img: Numpy image that cutout will be applied to. - size: Height/width of the cutout mask that will be - - Returns: - A numpy tensor that is the result of applying the cutout mask to `img`. - """ - img_height, img_width, num_channels = (img.shape[0], img.shape[1], - img.shape[2]) - assert len(img.shape) == 3 - mask, _, _ = create_cutout_mask(img_height, img_width, num_channels, size) - return img * mask - - -def float_parameter(level, maxval): - """Helper function to scale `val` between 0 and maxval . - - Args: - level: Level of the operation that will be between [0, `PARAMETER_MAX`]. - maxval: Maximum value that the operation can have. This will be scaled - to level/PARAMETER_MAX. - - Returns: - A float that results from scaling `maxval` according to `level`. - """ - return float(level) * maxval / PARAMETER_MAX - - -def int_parameter(level, maxval): - """Helper function to scale `val` between 0 and maxval . - - Args: - level: Level of the operation that will be between [0, `PARAMETER_MAX`]. - maxval: Maximum value that the operation can have. This will be scaled - to level/PARAMETER_MAX. - - Returns: - An int that results from scaling `maxval` according to `level`. - """ - return int(level * maxval / PARAMETER_MAX) - - -def pil_wrap(img): - """Convert the `img` numpy tensor to a PIL Image.""" - return Image.fromarray( - np.uint8((img * STDS + MEANS) * 255.0)).convert('RGBA') - - -def pil_unwrap(pil_img): - """Converts the PIL img to a numpy array.""" - pic_array = (np.array(pil_img.getdata()).reshape((IMAGE_SIZE, IMAGE_SIZE, 4)) / 255.0) - i1, i2 = np.where(pic_array[:, :, 3] == 0) - pic_array = (pic_array[:, :, :3] - MEANS) / STDS - pic_array[i1, i2] = [0, 0, 0] - return pic_array - - -def apply_policy(policy, img): - """Apply the `policy` to the numpy `img`. - - Args: - policy: A list of tuples with the form (name, probability, level) where - `name` is the name of the augmentation operation to apply, `probability` - is the probability of applying the operation and `level` is what strength - the operation to apply. - img: Numpy image that will have `policy` applied to it. - - Returns: - The result of applying `policy` to `img`. - """ - #print('img shape :',img.shape) - #print('Policy len :',len(policy)) - pil_img = pil_wrap(img) - for xform in policy: - #print('xform :', len(xform)) - assert len(xform) == 3 - name, probability, level = xform - #xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability, level) - xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability.eval(), level) - pil_img = xform_fn(pil_img) - return pil_unwrap(pil_img) - - -class TransformFunction(object): - """Wraps the Transform function for pretty printing options.""" - - def __init__(self, func, name): - self.f = func - self.name = name - - def __repr__(self): - return '<' + self.name + '>' - - def __call__(self, pil_img): - return self.f(pil_img) - - -class TransformT(object): - """Each instance of this class represents a specific transform.""" - - def __init__(self, name, xform_fn): - self.name = name - self.xform = xform_fn - - def pil_transformer(self, probability, level): - - def return_function(im): - if random.random() < probability: - im = self.xform(im, level) - return im - - name = self.name + '({:.1f},{})'.format(probability, level) - return TransformFunction(return_function, name) - - def do_transform(self, image, level): - f = self.pil_transformer(PARAMETER_MAX, level) - return pil_unwrap(f(pil_wrap(image))) - - -################## Transform Functions ################## -identity = TransformT('identity', lambda pil_img, level: pil_img) -flip_lr = TransformT( - 'FlipLR', - lambda pil_img, level: pil_img.transpose(Image.FLIP_LEFT_RIGHT)) -flip_ud = TransformT( - 'FlipUD', - lambda pil_img, level: pil_img.transpose(Image.FLIP_TOP_BOTTOM)) -# pylint:disable=g-long-lambda -auto_contrast = TransformT( - 'AutoContrast', - lambda pil_img, level: ImageOps.autocontrast( - pil_img.convert('RGB')).convert('RGBA')) -equalize = TransformT( - 'Equalize', - lambda pil_img, level: ImageOps.equalize( - pil_img.convert('RGB')).convert('RGBA')) -invert = TransformT( - 'Invert', - lambda pil_img, level: ImageOps.invert( - pil_img.convert('RGB')).convert('RGBA')) -# pylint:enable=g-long-lambda -blur = TransformT( - 'Blur', lambda pil_img, level: pil_img.filter(ImageFilter.BLUR)) -smooth = TransformT( - 'Smooth', - lambda pil_img, level: pil_img.filter(ImageFilter.SMOOTH)) - - -def _rotate_impl(pil_img, level): - """Rotates `pil_img` from -30 to 30 degrees depending on `level`.""" - degrees = int_parameter(level, 30) - if random.random() > 0.5: - degrees = -degrees - return pil_img.rotate(degrees) - - -rotate = TransformT('Rotate', _rotate_impl) - - -def _posterize_impl(pil_img, level): - """Applies PIL Posterize to `pil_img`.""" - level = int_parameter(level, 4) - return ImageOps.posterize(pil_img.convert('RGB'), 4 - level).convert('RGBA') - - -posterize = TransformT('Posterize', _posterize_impl) - - -def _shear_x_impl(pil_img, level): - """Applies PIL ShearX to `pil_img`. - - The ShearX operation shears the image along the horizontal axis with `level` - magnitude. - - Args: - pil_img: Image in PIL object. - level: Strength of the operation specified as an Integer from - [0, `PARAMETER_MAX`]. - - Returns: - A PIL Image that has had ShearX applied to it. - """ - level = float_parameter(level, 0.3) - if random.random() > 0.5: - level = -level - return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, level, 0, 0, 1, 0)) - - -shear_x = TransformT('ShearX', _shear_x_impl) - - -def _shear_y_impl(pil_img, level): - """Applies PIL ShearY to `pil_img`. - - The ShearY operation shears the image along the vertical axis with `level` - magnitude. - - Args: - pil_img: Image in PIL object. - level: Strength of the operation specified as an Integer from - [0, `PARAMETER_MAX`]. - - Returns: - A PIL Image that has had ShearX applied to it. - """ - level = float_parameter(level, 0.3) - if random.random() > 0.5: - level = -level - return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, level, 1, 0)) - - -shear_y = TransformT('ShearY', _shear_y_impl) - - -def _translate_x_impl(pil_img, level): - """Applies PIL TranslateX to `pil_img`. - - Translate the image in the horizontal direction by `level` - number of pixels. - - Args: - pil_img: Image in PIL object. - level: Strength of the operation specified as an Integer from - [0, `PARAMETER_MAX`]. - - Returns: - A PIL Image that has had TranslateX applied to it. - """ - level = int_parameter(level, 10) - if random.random() > 0.5: - level = -level - return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, level, 0, 1, 0)) - - -translate_x = TransformT('TranslateX', _translate_x_impl) - - -def _translate_y_impl(pil_img, level): - """Applies PIL TranslateY to `pil_img`. - - Translate the image in the vertical direction by `level` - number of pixels. - - Args: - pil_img: Image in PIL object. - level: Strength of the operation specified as an Integer from - [0, `PARAMETER_MAX`]. - - Returns: - A PIL Image that has had TranslateY applied to it. - """ - level = int_parameter(level, 10) - if random.random() > 0.5: - level = -level - return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, 0, 1, level)) - - -translate_y = TransformT('TranslateY', _translate_y_impl) - - -def _crop_impl(pil_img, level, interpolation=Image.BILINEAR): - """Applies a crop to `pil_img` with the size depending on the `level`.""" - cropped = pil_img.crop((level, level, IMAGE_SIZE - level, IMAGE_SIZE - level)) - resized = cropped.resize((IMAGE_SIZE, IMAGE_SIZE), interpolation) - return resized - - -crop_bilinear = TransformT('CropBilinear', _crop_impl) - - -def _solarize_impl(pil_img, level): - """Applies PIL Solarize to `pil_img`. - - Translate the image in the vertical direction by `level` - number of pixels. - - Args: - pil_img: Image in PIL object. - level: Strength of the operation specified as an Integer from - [0, `PARAMETER_MAX`]. - - Returns: - A PIL Image that has had Solarize applied to it. - """ - level = int_parameter(level, 256) - return ImageOps.solarize(pil_img.convert('RGB'), 256 - level).convert('RGBA') - - -solarize = TransformT('Solarize', _solarize_impl) - - -def _cutout_pil_impl(pil_img, level): - """Apply cutout to pil_img at the specified level.""" - size = int_parameter(level, 20) - if size <= 0: - return pil_img - img_height, img_width, num_channels = (IMAGE_SIZE, IMAGE_SIZE, 3) - _, upper_coord, lower_coord = ( - create_cutout_mask(img_height, img_width, num_channels, size)) - pixels = pil_img.load() # create the pixel map - for i in range(upper_coord[0], lower_coord[0]): # for every col: - for j in range(upper_coord[1], lower_coord[1]): # For every row - pixels[i, j] = (125, 122, 113, 0) # set the colour accordingly - return pil_img - -cutout = TransformT('Cutout', _cutout_pil_impl) - - -def _enhancer_impl(enhancer): - """Sets level to be between 0.1 and 1.8 for ImageEnhance transforms of PIL.""" - def impl(pil_img, level): - v = float_parameter(level, 1.8) + .1 # going to 0 just destroys it - return enhancer(pil_img).enhance(v) - return impl - - -color = TransformT('Color', _enhancer_impl(ImageEnhance.Color)) -contrast = TransformT('Contrast', _enhancer_impl(ImageEnhance.Contrast)) -brightness = TransformT('Brightness', _enhancer_impl( - ImageEnhance.Brightness)) -sharpness = TransformT('Sharpness', _enhancer_impl(ImageEnhance.Sharpness)) - -ALL_TRANSFORMS = [ - flip_lr, - flip_ud, - auto_contrast, - equalize, - invert, - rotate, - posterize, - crop_bilinear, - solarize, - color, - contrast, - brightness, - sharpness, - shear_x, - shear_y, - translate_x, - translate_y, - cutout, - blur, - smooth -] - -NAME_TO_TRANSFORM = {t.name: t for t in ALL_TRANSFORMS} -TRANSFORM_NAMES = NAME_TO_TRANSFORM.keys() diff --git a/Old/FAR-HO/blue_utils.py b/Old/FAR-HO/blue_utils.py deleted file mode 100755 index 59ee62e..0000000 --- a/Old/FAR-HO/blue_utils.py +++ /dev/null @@ -1,131 +0,0 @@ -import matplotlib.pyplot as plt -from far_ho.examples.datasets import Datasets, Dataset - -import os -import numpy as np -import tensorflow as tf - -import augmentation_transforms as augmentation_transforms ##### ATTENTION FICHIER EN DOUBLE => A REGLER MIEUX #### - -def viz_data(dataset, fig_name='data_sample',aug_policy=None): - - plt.figure(figsize=(10,10)) - for i in range(25): - plt.subplot(5,5,i+1) - plt.xticks([]) - plt.yticks([]) - plt.grid(False) - - img = dataset.data[i][:,:,0] - if aug_policy : - img = augment_img(img,aug_policy) - #print('im shape',img.shape) - plt.imshow(img, cmap=plt.cm.binary) - plt.xlabel(np.nonzero(dataset.target[i])[0].item()) - - plt.savefig(fig_name) - -def augment_img(data, policy): - - #print('Im shape',data.shape) - data = np.stack((data,)*3, axis=-1) #BOF BOF juste pour forcer 3 channels - #print('Im shape',data.shape) - final_img = augmentation_transforms.apply_policy(policy, data) - #final_img = augmentation_transforms.random_flip(augmentation_transforms.zero_pad_and_crop(final_img, 4)) - # Apply cutout - #final_img = augmentation_transforms.cutout_numpy(final_img) - - im_rgb = np.array(final_img, np.float32) - im_gray = np.dot(im_rgb[...,:3], [0.2989, 0.5870, 0.1140]) #Just pour retourner a 1 channel - - return im_gray - - -### https://www.kaggle.com/raoulma/mnist-image-class-tensorflow-cnn-99-51-test-acc#5.-Build-the-neural-network-with-tensorflow- -## build the neural network class -# weight initialization -def weight_variable(shape, name = None): - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial, name = name) - -# bias initialization -def bias_variable(shape, name = None): - initial = tf.constant(0.1, shape=shape) # positive bias - return tf.Variable(initial, name = name) - -# 2D convolution -def conv2d(x, W, name = None): - return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name = name) - -# max pooling -def max_pool_2x2(x, name = None): - return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], - padding='SAME', name = name) - -def cnn(x_data_tf,y_data_tf, name='model'): - # tunable hyperparameters for nn architecture - s_f_conv1 = 3; # filter size of first convolution layer (default = 3) - n_f_conv1 = 36; # number of features of first convolution layer (default = 36) - s_f_conv2 = 3; # filter size of second convolution layer (default = 3) - n_f_conv2 = 36; # number of features of second convolution layer (default = 36) - s_f_conv3 = 3; # filter size of third convolution layer (default = 3) - n_f_conv3 = 36; # number of features of third convolution layer (default = 36) - n_n_fc1 = 576; # number of neurons of first fully connected layer (default = 576) - - # 1.layer: convolution + max pooling - W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, 1, n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32) - b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32) - h_conv1_tf = tf.nn.relu(conv2d(x_data_tf, - W_conv1_tf) + b_conv1_tf, - name = 'h_conv1_tf') # (.,28,28,32) - h_pool1_tf = max_pool_2x2(h_conv1_tf, - name = 'h_pool1_tf') # (.,14,14,32) - - # 2.layer: convolution + max pooling - W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2, - n_f_conv1, n_f_conv2], - name = 'W_conv2_tf') - b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf') - h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf, - W_conv2_tf) + b_conv2_tf, - name ='h_conv2_tf') #(.,14,14,32) - h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32) - - # 3.layer: convolution + max pooling - W_conv3_tf = weight_variable([s_f_conv3, s_f_conv3, - n_f_conv2, n_f_conv3], - name = 'W_conv3_tf') - b_conv3_tf = bias_variable([n_f_conv3], name = 'b_conv3_tf') - h_conv3_tf = tf.nn.relu(conv2d(h_pool2_tf, - W_conv3_tf) + b_conv3_tf, - name = 'h_conv3_tf') #(.,7,7,32) - h_pool3_tf = max_pool_2x2(h_conv3_tf, - name = 'h_pool3_tf') # (.,4,4,32) - - # 4.layer: fully connected - W_fc1_tf = weight_variable([4*4*n_f_conv3,n_n_fc1], - name = 'W_fc1_tf') # (4*4*32, 1024) - b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024) - h_pool3_flat_tf = tf.reshape(h_pool3_tf, [-1,4*4*n_f_conv3], - name = 'h_pool3_flat_tf') # (.,1024) - h_fc1_tf = tf.nn.relu(tf.matmul(h_pool3_flat_tf, - W_fc1_tf) + b_fc1_tf, - name = 'h_fc1_tf') # (.,1024) - - # add dropout - #keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf') - #h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf') - - # 5.layer: fully connected - W_fc2_tf = weight_variable([n_n_fc1, 10], name = 'W_fc2_tf') - b_fc2_tf = bias_variable([10], name = 'b_fc2_tf') - z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf), - b_fc2_tf, name = 'z_pred_tf')# => (.,10) - # predicted probabilities in one-hot encoding - y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf') - - # tensor of correct predictions - y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1), - tf.argmax(y_data_tf, 1), - name = 'y_pred_correct_tf') - return y_pred_proba_tf \ No newline at end of file diff --git a/Old/FAR-HO/far_pba_cifar.py b/Old/FAR-HO/far_pba_cifar.py deleted file mode 100755 index 60dc509..0000000 --- a/Old/FAR-HO/far_pba_cifar.py +++ /dev/null @@ -1,166 +0,0 @@ -#https://github.com/arcelien/pba/blob/master/autoaugment/train_cifar.py -from __future__ import absolute_import, print_function, division - -import os -import numpy as np -import tensorflow as tf -#import tensorflow.contrib.layers as layers -import far_ho as far -import far_ho.examples as far_ex -#import pprint - -import autoaugment.augmentation_transforms as augmentation_transforms -#import autoaugment.policies as found_policies -from autoaugment.wrn import build_wrn_model - - -def build_model(inputs, num_classes, is_training, hparams): - """Constructs the vision model being trained/evaled. - Args: - inputs: input features/images being fed to the image model build built. - num_classes: number of output classes being predicted. - is_training: is the model training or not. - hparams: additional hyperparameters associated with the image model. - Returns: - The logits of the image model. - """ - scopes = setup_arg_scopes(is_training) - with contextlib.nested(*scopes): - if hparams.model_name == 'pyramid_net': - logits = build_shake_drop_model( - inputs, num_classes, is_training) - elif hparams.model_name == 'wrn': - logits = build_wrn_model( - inputs, num_classes, hparams.wrn_size) - elif hparams.model_name == 'shake_shake': - logits = build_shake_shake_model( - inputs, num_classes, hparams, is_training) - return logits - - -class CifarModel(object): - """Builds an image model for Cifar10/Cifar100.""" - - def __init__(self, hparams): - self.hparams = hparams - - def build(self, mode): - """Construct the cifar model.""" - assert mode in ['train', 'eval'] - self.mode = mode - self._setup_misc(mode) - self._setup_images_and_labels() - self._build_graph(self.images, self.labels, mode) - - self.init = tf.group(tf.global_variables_initializer(), - tf.local_variables_initializer()) - - def _setup_misc(self, mode): - """Sets up miscellaneous in the cifar model constructor.""" - self.lr_rate_ph = tf.Variable(0.0, name='lrn_rate', trainable=False) - self.reuse = None if (mode == 'train') else True - self.batch_size = self.hparams.batch_size - if mode == 'eval': - self.batch_size = 25 - - def _setup_images_and_labels(self): - """Sets up image and label placeholders for the cifar model.""" - if FLAGS.dataset == 'cifar10': - self.num_classes = 10 - else: - self.num_classes = 100 - self.images = tf.placeholder(tf.float32, [self.batch_size, 32, 32, 3]) - self.labels = tf.placeholder(tf.float32, - [self.batch_size, self.num_classes]) - - def assign_epoch(self, session, epoch_value): - session.run(self._epoch_update, feed_dict={self._new_epoch: epoch_value}) - - def _build_graph(self, images, labels, mode): - """Constructs the TF graph for the cifar model. - Args: - images: A 4-D image Tensor - labels: A 2-D labels Tensor. - mode: string indicating training mode ( e.g., 'train', 'valid', 'test'). - """ - is_training = 'train' in mode - if is_training: - self.global_step = tf.train.get_or_create_global_step() - - logits = build_model( - images, - self.num_classes, - is_training, - self.hparams) - self.predictions, self.cost = helper_utils.setup_loss( - logits, labels) - self.accuracy, self.eval_op = tf.metrics.accuracy( - tf.argmax(labels, 1), tf.argmax(self.predictions, 1)) - self._calc_num_trainable_params() - - # Adds L2 weight decay to the cost - self.cost = helper_utils.decay_weights(self.cost, - self.hparams.weight_decay_rate) - #### Attention: differe implem originale - - self.init = tf.group(tf.global_variables_initializer(), - tf.local_variables_initializer()) - - -######################################################## - -######## PBA ############ - -#Parallele Cifar model trainer -tf.flags.DEFINE_string('model_name', 'wrn', - 'wrn, shake_shake_32, shake_shake_96, shake_shake_112, ' - 'pyramid_net') -tf.flags.DEFINE_string('checkpoint_dir', '/tmp/training', 'Training Directory.') -tf.flags.DEFINE_string('data_path', '/tmp/data', - 'Directory where dataset is located.') -tf.flags.DEFINE_string('dataset', 'cifar10', - 'Dataset to train with. Either cifar10 or cifar100') -tf.flags.DEFINE_integer('use_cpu', 1, '1 if use CPU, else GPU.') -## ??? - -FLAGS = tf.flags.FLAGS -FLAGS.dataset -FLAGS.data_path -FLAGS.model_name = 'wrn' - -hparams = tf.contrib.training.HParams( - train_size=50000, - validation_size=0, - eval_test=1, - dataset=FLAGS.dataset, - data_path=FLAGS.data_path, - batch_size=128, - gradient_clipping_by_global_norm=5.0) - if FLAGS.model_name == 'wrn': - hparams.add_hparam('model_name', 'wrn') - hparams.add_hparam('num_epochs', 200) - hparams.add_hparam('wrn_size', 160) - hparams.add_hparam('lr', 0.1) - hparams.add_hparam('weight_decay_rate', 5e-4) - -data_loader = data_utils.DataSet(hparams) -data_loader.reset() - -with tf.Graph().as_default(): #, tf.device('/cpu:0' if FLAGS.use_cpu else '/gpu:0'): -"""Builds the image models for train and eval.""" - # Determine if we should build the train and eval model. When using - # distributed training we only want to build one or the other and not both. - with tf.variable_scope('model', use_resource=False): - m = CifarModel(self.hparams) - m.build('train') - #self._num_trainable_params = m.num_trainable_params - #self._saver = m.saver - #with tf.variable_scope('model', reuse=True, use_resource=False): - # meval = CifarModel(self.hparams) - # meval.build('eval') - - -##### FAR-HO #### -for _ in range(n_hyper_iterations): - - diff --git a/Old/FAR-HO/test.py b/Old/FAR-HO/test.py deleted file mode 100755 index 3364c00..0000000 --- a/Old/FAR-HO/test.py +++ /dev/null @@ -1,92 +0,0 @@ -import os -import numpy as np -import tensorflow as tf -import tensorflow.contrib.layers as layers -import far_ho as far -import far_ho.examples as far_ex -import matplotlib.pyplot as plt - -sess = tf.InteractiveSession() - - -def get_data(): - # load a small portion of mnist data - datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=(.1, .1,)) - return datasets.train, datasets.validation - - -def g_logits(x,y): - with tf.variable_scope('model'): - h1 = layers.fully_connected(x, 300) - logits = layers.fully_connected(h1, int(y.shape[1])) - return logits - - -x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x') -y = tf.placeholder(tf.float32, shape=(None, 10), name='y') -logits = g_logits(x,y) -train_set, validation_set = get_data() - -lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples)) -lr = far.get_hyperparameter('lr', initializer=0.01) - -ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits) -L = tf.reduce_mean(tf.sigmoid(lambdas)*ce) -E = tf.reduce_mean(ce) - -accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32)) - -inner_optimizer = far.GradientDescentOptimizer(lr) -outer_optimizer = tf.train.AdamOptimizer() -rev_it =10 -hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it) -hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer) - -T = 20 # Number of inner iterations -train_set_supplier = train_set.create_supplier(x, y) -validation_set_supplier = validation_set.create_supplier(x, y) -tf.global_variables_initializer().run() - -print('inner:', L.eval(train_set_supplier())) -print('outer:', E.eval(validation_set_supplier())) -# print('-'*50) -n_hyper_iterations = 200 -inner_losses = [] -outer_losses = [] -train_accs = [] -val_accs = [] - -for _ in range(n_hyper_iterations): - hyper_step(T, - inner_objective_feed_dicts=train_set_supplier, - outer_objective_feed_dicts=validation_set_supplier) - - inner_obj = L.eval(train_set_supplier()) - outer_obj = E.eval(validation_set_supplier()) - inner_losses.append(inner_obj) - outer_losses.append(outer_obj) - print('inner:', inner_obj) - print('outer:', outer_obj) - - train_acc = accuracy.eval(train_set_supplier()) - val_acc = accuracy.eval(validation_set_supplier()) - train_accs.append(train_acc) - val_accs.append(val_acc) - print('training accuracy', train_acc) - print('validation accuracy', val_acc) - - print('learning rate', lr.eval()) - print('norm of examples weight', tf.norm(lambdas).eval()) - print('-'*50) - -plt.subplot(211) -plt.plot(inner_losses, label='training loss') -plt.plot(outer_losses, label='validation loss') -plt.legend(loc=0, frameon=True) -#plt.xlim(0, 19) -plt.subplot(212) -plt.plot(train_accs, label='training accuracy') -plt.plot(val_accs, label='validation accuracy') -plt.legend(loc=0, frameon=True) - -plt.savefig('H%d - I%d - R%d'%(n_hyper_iterations,T,rev_it)) diff --git a/Old/FAR-HO/test_cnn.py b/Old/FAR-HO/test_cnn.py deleted file mode 100755 index ffbcb8d..0000000 --- a/Old/FAR-HO/test_cnn.py +++ /dev/null @@ -1,126 +0,0 @@ -import warnings -warnings.filterwarnings("ignore") - -import os -import numpy as np -import tensorflow as tf -import tensorflow.contrib.layers as layers -import far_ho as far -import far_ho.examples as far_ex - -tf.logging.set_verbosity(tf.logging.ERROR) - -import matplotlib.pyplot as plt -import blue_utils as butil - -#Reset -try: - sess.close() -except: pass -rnd = np.random.RandomState(1) -tf.reset_default_graph() -sess = tf.InteractiveSession() - -def get_data(data_split): - # load a small portion of mnist data - datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False) - print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target) - [print("Nb samples : ", d.num_examples) for d in datasets] - return datasets.train, datasets.validation, datasets.test - -#Model -# FC : reshape = True -def g_logits(x,y, name='model'): - with tf.variable_scope(name): - h1 = layers.fully_connected(x, 300) - logits = layers.fully_connected(h1, int(y.shape[1])) - return logits - -#### Hyper-parametres #### -n_hyper_iterations = 500 -T = 20 # Number of inner iterations -rev_it =10 -hp_lr = 1.e-3 -########################## - -#MNIST -#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x') -#y = tf.placeholder(tf.float32, shape=(None, 10), name='y') -#logits = g_logits(x, y) - -#CNN : reshape = False -x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x') -y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y') - -logits = butil.cnn(x,y) - -train_set, validation_set, test_set = get_data(data_split=(.05, .05,)) - -butil.viz_data(train_set) -print('Data sampled !') - -# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples)) -#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, .1), 1.e-7)) -#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5)) -#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.)) -lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4)) -mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9)) -rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.00001), 0.00001)) - -ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits) -L = tf.reduce_mean(ce) + rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles -E = tf.reduce_mean(ce) - -accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32)) - -inner_optimizer = far.MomentumOptimizer(lr, mu) -outer_optimizer = tf.train.AdamOptimizer(hp_lr) -hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it) -hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer) - -train_set_supplier = train_set.create_supplier(x, y, batch_size=256) # stochastic GD -validation_set_supplier = validation_set.create_supplier(x, y) - -his_params = [] - -tf.global_variables_initializer().run() - -for hyt in range(n_hyper_iterations): - hyper_step(T, - inner_objective_feed_dicts=train_set_supplier, - outer_objective_feed_dicts=validation_set_supplier) - res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()), - E.eval(validation_set_supplier()), - accuracy.eval(train_set_supplier()), - accuracy.eval(validation_set_supplier())] - his_params.append(res) - - print('Hyper-it :',hyt,'/',n_hyper_iterations) - print('inner:', L.eval(train_set_supplier())) - print('outer:', E.eval(validation_set_supplier())) - print('training accuracy:', res[5]) - print('validation accuracy:', res[6]) - #print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval()) - print('-'*50) - -test_set_supplier = test_set.create_supplier(x, y) -print('Test accuracy:',accuracy.eval(test_set_supplier())) - -fig, ax = plt.subplots(ncols=4, figsize=(15, 3)) -ax[0].set_title('Learning rate') -ax[0].plot([e[0] for e in his_params]) - -ax[1].set_title('Momentum factor') -ax[1].plot([e[1] for e in his_params]) - -#ax[2].set_title('L2 regulariz.') -#ax[2].plot([e[2] for e in his_params]) -ax[2].set_title('Tr. and val. acc') -ax[2].plot([e[5] for e in his_params]) -ax[2].plot([e[6] for e in his_params]) - -ax[3].set_title('Tr. and val. errors') -ax[3].plot([e[3] for e in his_params]) -ax[3].plot([e[4] for e in his_params]) - -plt.savefig('res_cnn_H{}_I{}'.format(n_hyper_iterations,T)) diff --git a/Old/FAR-HO/test_cnn_aug.py b/Old/FAR-HO/test_cnn_aug.py deleted file mode 100755 index db48936..0000000 --- a/Old/FAR-HO/test_cnn_aug.py +++ /dev/null @@ -1,141 +0,0 @@ -import warnings -warnings.filterwarnings("ignore") - -import os -import numpy as np -import tensorflow as tf -import tensorflow.contrib.layers as layers -import far_ho as far -import far_ho.examples as far_ex - -tf.logging.set_verbosity(tf.logging.ERROR) - -import matplotlib.pyplot as plt -import blue_utils as butil - -#Reset -try: - sess.close() -except: pass -rnd = np.random.RandomState(1) -tf.reset_default_graph() -sess = tf.InteractiveSession() - -def get_data(data_split): - # load a small portion of mnist data - datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False) - print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target) - [print("Nb samples : ", d.num_examples) for d in datasets] - return datasets.train, datasets.validation, datasets.test - -#Model -# FC : reshape = True -def g_logits(x,y, name='model'): - with tf.variable_scope(name): - h1 = layers.fully_connected(x, 300) - logits = layers.fully_connected(h1, int(y.shape[1])) - return logits - -#### Hyper-parametres #### -n_hyper_iterations = 10 -T = 10 # Number of inner iterations -rev_it =10 -hp_lr = 0.02 -########################## - -#MNIST -#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x') -#y = tf.placeholder(tf.float32, shape=(None, 10), name='y') -#logits = g_logits(x, y) - -#CNN : reshape = False -x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x') -y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y') - -logits = butil.cnn(x,y) - -train_set, validation_set, test_set = get_data(data_split=(.1, .1,)) - -probX = far.get_hyperparameter('probX', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9)) -probY = far.get_hyperparameter('probY', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9)) - -#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4)) -#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9)) - -#probX, probY = 0.5, 0.5 -#policy = [('TranslateX', probX, 8), ('TranslateY', probY, 8)] -policy = [('TranslateX', probX, 8), ('FlipUD', probY, 8)] -print('Hyp :',far.utils.hyperparameters(scope=None)) - -#butil.viz_data(train_set, aug_policy= policy) -#print('Data sampled !') - -#Ajout artificiel des transfo a la loss juste pour qu il soit compter dans la dynamique du graph -probX_loss = tf.sigmoid(probX) -probY_loss = tf.sigmoid(probY) - -ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits) -L = tf.reduce_mean(probX_loss*probY_loss*ce) -E = tf.reduce_mean(ce) - -accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32)) - -inner_optimizer = far.AdamOptimizer() -outer_optimizer = tf.train.AdamOptimizer(hp_lr) -hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it) -hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer) - -train_set_supplier = train_set.create_supplier(x, y, batch_size=256, aug_policy=policy) # stochastic GD -validation_set_supplier = validation_set.create_supplier(x, y) - -#print(train_set.dim_data,validation_set.dim_data) - -his_params = [] - -tf.global_variables_initializer().run() - -butil.viz_data(train_set, fig_name= 'Start_sample',aug_policy= policy) -print('Data sampled !') - -for hyt in range(n_hyper_iterations): - hyper_step(T, - inner_objective_feed_dicts=train_set_supplier, - outer_objective_feed_dicts=validation_set_supplier, - _skip_hyper_ts=True) - res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()), - E.eval(validation_set_supplier()), - accuracy.eval(train_set_supplier()), - accuracy.eval(validation_set_supplier())] - his_params.append(res) - - butil.viz_data(train_set, fig_name= 'Train_sample_{}'.format(hyt),aug_policy= policy) - print('Data sampled !') - - print('Hyper-it :',hyt,'/',n_hyper_iterations) - print('inner:', L.eval(train_set_supplier())) - print('outer:', E.eval(validation_set_supplier())) - print('training accuracy:', res[4]) - print('validation accuracy:', res[5]) - print('Transformation : ProbX -',res[0],'/ProbY -',res[1]) - #print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval()) - print('-'*50) - -test_set_supplier = test_set.create_supplier(x, y) -print('Test accuracy:',accuracy.eval(test_set_supplier())) - -fig, ax = plt.subplots(ncols=4, figsize=(15, 3)) -ax[0].set_title('ProbX') -ax[0].plot([e[0] for e in his_params]) - -ax[1].set_title('ProbY') -ax[1].plot([e[1] for e in his_params]) - -ax[2].set_title('Tr. and val. errors') -ax[2].plot([e[2] for e in his_params]) -ax[2].plot([e[3] for e in his_params]) - -ax[3].set_title('Tr. and val. acc') -ax[3].plot([e[4] for e in his_params]) -ax[3].plot([e[5] for e in his_params]) - -plt.savefig('res_cnn_aug_H{}_I{}'.format(n_hyper_iterations,T)) diff --git a/Old/FAR-HO/test_fc.py b/Old/FAR-HO/test_fc.py deleted file mode 100755 index 24eb596..0000000 --- a/Old/FAR-HO/test_fc.py +++ /dev/null @@ -1,133 +0,0 @@ -#https://github.com/lucfra/FAR-HO/blob/master/far_ho/examples/autoMLDemos/Far-HO%20Demo%2C%20AutoML%202018%2C%20ICML%20workshop.ipynb -import warnings -warnings.filterwarnings("ignore") - -import os -import numpy as np -import tensorflow as tf -import tensorflow.contrib.layers as layers -import far_ho as far -import far_ho.examples as far_ex - -tf.logging.set_verbosity(tf.logging.ERROR) - -import matplotlib.pyplot as plt -#import blue_utils as butil - -#Reset -try: - sess.close() -except: pass -rnd = np.random.RandomState(1) -tf.reset_default_graph() -sess = tf.InteractiveSession() - -def get_data(data_split): - # load a small portion of mnist data - datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=True) - print("Data shape : ", datasets.train.dim_data, " / Label shape : ", datasets.train.dim_target) - [print("Nb samples : ", d.num_examples) for d in datasets] - return datasets.train, datasets.validation, datasets.test - -#Model -# FC : reshape = True -def g_logits(x,y, name='model'): - with tf.variable_scope(name): - h1 = layers.fully_connected(x, 300) - logits = layers.fully_connected(h1, int(y.shape[1])) - return logits - -#### Hyper-parametres #### -n_hyper_iterations = 90 -T = 20 # Number of inner iterations -rev_it =10 -hp_lr = 0.1 -epochs =10 -batch_size = 256 -########################## - -#MNIST -x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x') -y = tf.placeholder(tf.float32, shape=(None, 10), name='y') -logits = g_logits(x, y) - -#CNN : reshape = False -#x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x') -#y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y') - -#logits = butil.cnn(x,y) - -train_set, validation_set, test_set = get_data(data_split=(.6, .3,)) - -#butil.viz_data(train_set) - -# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples)) -lr = far.get_hyperparameter('lr', initializer=1e-2, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 1.e-7)) -mu = far.get_hyperparameter('mu', initializer=0.95, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5)) -#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.)) - - -ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits) -L = tf.reduce_mean(ce) #+ rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles -E = tf.reduce_mean(ce) - -accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32)) - -inner_optimizer = far.MomentumOptimizer(lr, mu) -#inner_optimizer = far.GradientDescentOptimizer(lr) -outer_optimizer = tf.train.AdamOptimizer(hp_lr) -hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it) -hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)#, global_step=tf.train.get_or_create_step()) - -train_set_supplier = train_set.create_supplier(x, y, batch_size=batch_size)#, epochs=1) # stochastic GD -validation_set_supplier = validation_set.create_supplier(x, y) - - -print('Hyper iterations par epochs',int(train_set.num_examples/batch_size*epochs/T)) - -his_params = [] - -tf.global_variables_initializer().run() - -for hyt in range(n_hyper_iterations): - hyper_step(T, - inner_objective_feed_dicts=train_set_supplier, - outer_objective_feed_dicts=validation_set_supplier, - _skip_hyper_ts=False) - res = sess.run(far.hyperparameters()) + [0, L.eval(train_set_supplier()), - E.eval(validation_set_supplier()), - accuracy.eval(train_set_supplier()), - accuracy.eval(validation_set_supplier())] - - his_params.append(res) - - print('Hyper-it :',hyt,'/',n_hyper_iterations) - print('inner:', res[3]) - print('outer:', res[4]) - print('training accuracy:', res[5]) - print('validation accuracy:', res[6]) - #print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval()) - print('-'*50) - -test_set_supplier = test_set.create_supplier(x, y) -print('Test accuracy:',accuracy.eval(test_set_supplier())) - -fig, ax = plt.subplots(ncols=4, figsize=(15, 3)) -ax[0].set_title('Learning rate') -ax[0].plot([e[0] for e in his_params]) - -ax[1].set_title('Momentum factor') -ax[1].plot([e[1] for e in his_params]) - -#ax[2].set_title('L2 regulariz.') -#ax[2].plot([e[2] for e in his_params]) -ax[2].set_title('Tr. and val. acc') -ax[2].plot([e[5] for e in his_params]) -ax[2].plot([e[6] for e in his_params]) - -ax[3].set_title('Tr. and val. errors') -ax[3].plot([e[3] for e in his_params]) -ax[3].plot([e[4] for e in his_params]) - -plt.savefig('resultats/res_fc_H{}_I{}'.format(n_hyper_iterations,T)) -#plt.savefig('resultats/res_fc_H{}_I{}_noHyp'.format(n_hyper_iterations,T)) diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/.gitignore b/Old/Gradient-Descent-The-Ultimate-Optimizer/.gitignore deleted file mode 100755 index 8c17325..0000000 --- a/Old/Gradient-Descent-The-Ultimate-Optimizer/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -venv/ -__pycache__ -data/ -log/ -.vscode/ diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/20190929-paper.pdf b/Old/Gradient-Descent-The-Ultimate-Optimizer/20190929-paper.pdf deleted file mode 100755 index 4f0b65a..0000000 Binary files a/Old/Gradient-Descent-The-Ultimate-Optimizer/20190929-paper.pdf and /dev/null differ diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/README.md b/Old/Gradient-Descent-The-Ultimate-Optimizer/README.md deleted file mode 100755 index cfa0e6f..0000000 --- a/Old/Gradient-Descent-The-Ultimate-Optimizer/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# Gradient Descent: The Ultimate Optimizer - -[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black) - -| ⚠️ WARNING: THIS IS NOT MY WORK ⚠️ | -| --- | - -This repository contains the paper and code to the paper [Gradient Descent: -The Ultimate Optimizer](https://arxiv.org/abs/1909.13371). - -I couldn't find the code (which is found in the appendix at the end of the -paper) anywhere on the web. What I present here is the code of the paper with -instructions on how to set it up. - -Getting the code in a runnable state required some fixes on my part so the -code might be slightly different than that presented in the paper. - -## Set up - -```sh -git clone https://github.com/Rainymood/Gradient-Descent-The-Ultimate-Optimizer -cd Gradient-Descent-The-Ultimate-Optimizer -virtualenv -p python3 venv -source venv/bin/activate -pip install -r requirements.txt -python main.py -``` - -When you are done you can exit the virtualenv with - -```shell -deactivate -``` diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/data_aug.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/data_aug.py deleted file mode 100755 index a18ddf0..0000000 --- a/Old/Gradient-Descent-The-Ultimate-Optimizer/data_aug.py +++ /dev/null @@ -1,244 +0,0 @@ -from hyperopt import * -#from hyperopt_v2 import * - -import torchvision.transforms.functional as TF -import torchvision.transforms as T - -#from scipy import ndimage -import kornia - -import random - - -class MNIST_FullyConnected_Augmented(Optimizable): - """ - A fully-connected NN for the MNIST task. This is Optimizable but not itself - an optimizer. - """ - - def __init__(self, num_inp, num_hid, num_out, optimizer, device = torch.device('cuda')): - self.device = device - #print(self.device) - parameters = { - "w1": torch.zeros(num_inp, num_hid, device=self.device).t(), - "b1": torch.zeros(num_hid, device=self.device).t(), - "w2": torch.zeros(num_hid, num_out, device=self.device).t(), - "b2": torch.zeros(num_out, device=self.device).t(), - - #Data augmentation - "prob": torch.tensor(0.5, device=self.device), - "mag": torch.tensor(180.0, device=self.device), - } - super().__init__(parameters, optimizer) - - def initialize(self): - nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5)) - nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5)) - self.optimizer.initialize() - #print(self.device) - - def forward(self, x): - """Compute a prediction.""" - #print("Prob:",self.parameters["prob"].item()) - if random.random() < self.parameters["prob"]: - #angle = 45 - #x = TF.rotate(x, angle) - #print(self.device) - #x = F.linear(x, torch.ones(28*28, 28*28, device=self.device).t()*self.parameters["mag"], bias=None) - x = x + self.parameters["mag"] - - x = F.linear(x, self.parameters["w1"], self.parameters["b1"]) - x = torch.tanh(x) - x = F.linear(x, self.parameters["w2"], self.parameters["b2"]) - x = torch.tanh(x) - x = F.log_softmax(x, dim=1) - return x - - def adjust(self): - self.optimizer.adjust(self.parameters) - - def __str__(self): - return "mnist_FC_augmented / " + str(self.optimizer) - -class LeNet(Optimizable, nn.Module): - def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')): - nn.Module.__init__(self) - self.device = device - parameters = { - "w1": torch.zeros(20, num_inp, 5, 5, device=self.device), - "b1": torch.zeros(20, device=self.device), - "w2": torch.zeros(50, 20, 5, 5, device=self.device), - "b2": torch.zeros(50, device=self.device), - "w3": torch.zeros(500,4*4*50, device=self.device), - "b3": torch.zeros(500, device=self.device), - "w4": torch.zeros(10, 500, device=self.device), - "b4": torch.zeros(10, device=self.device), - - #Data augmentation - "prob": torch.tensor(1.0, device=self.device), - "mag": torch.tensor(180.0, device=self.device), - } - super().__init__(parameters, optimizer) - - def initialize(self): - nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5)) - nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5)) - nn.init.kaiming_uniform_(self.parameters["w3"], a=math.sqrt(5)) - nn.init.kaiming_uniform_(self.parameters["w4"], a=math.sqrt(5)) - self.optimizer.initialize() - - def forward(self, x): - - if random.random() < self.parameters["prob"]: - - batch_size = x.shape[0] - # create transformation (rotation) - alpha = self.parameters["mag"] # in degrees - angle = torch.ones(batch_size, device=self.device) * alpha - - # define the rotation center - center = torch.ones(batch_size, 2, device=self.device) - center[..., 0] = x.shape[3] / 2 # x - center[..., 1] = x.shape[2] / 2 # y - - #print(x.shape, center) - # define the scale factor - scale = torch.ones(batch_size, device=self.device) - - # compute the transformation matrix - M = kornia.get_rotation_matrix2d(center, angle, scale) - - # apply the transformation to original image - x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w) - - #print("Start Shape ", x.shape) - out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"])) - #print("Shape ", out.shape) - out = F.max_pool2d(out, 2) - #print("Shape ", out.shape) - out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"])) - #print("Shape ", out.shape) - out = F.max_pool2d(out, 2) - #print("Shape ", out.shape) - out = out.view(out.size(0), -1) - #print("Shape ", out.shape) - out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"])) - #print("Shape ", out.shape) - out = F.linear(out, self.parameters["w4"], self.parameters["b4"]) - #print("Shape ", out.shape) - return F.log_softmax(out, dim=1) - - def adjust(self): - self.optimizer.adjust(self.parameters) - - def __str__(self): - return "mnist_CNN_augmented / " + str(self.optimizer) - -class LeNet_v2(Optimizable, nn.Module): - def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')): - - nn.Module.__init__(self) - self.device = device - self.conv1 = nn.Conv2d(num_inp, 20, 5, 1) - self.conv2 = nn.Conv2d(20, 50, 5, 1) - #self.fc1 = nn.Linear(4*4*50, 500) - self.fc1 = nn.Linear(1250, 500) - self.fc2 = nn.Linear(500, 10) - - #print(self.conv1.weight) - parameters = { - "w1": self.conv1.weight, - "b1": self.conv1.bias, - "w2": self.conv2.weight, - "b2": self.conv2.bias, - "w3": self.fc1.weight, - "b3": self.fc1.bias, - "w4": self.fc2.weight, - "b4": self.fc2.bias, - - #Data augmentation - "prob": torch.tensor(0.5, device=self.device), - "mag": torch.tensor(1.0, device=self.device), - } - Optimizable.__init__(self, parameters, optimizer) - - ''' - def forward(self, x): #Sature la memoire ??? - x = F.relu(self.conv1(x)) - x = F.max_pool2d(x, 2, 2) - x = F.relu(self.conv2(x)) - x = F.max_pool2d(x, 2, 2) - #x = x.view(-1, 4*4*50) - x = x.view(x.size(0), -1) - x = F.relu(self.fc1(x)) - x = self.fc2(x) - return F.log_softmax(x, dim=1) - ''' - def forward(self, x): - - if random.random() < self.parameters["prob"].item(): - #print(self.parameters["prob"]) - #x = [T.ToTensor()( - # TF.affine(img=T.ToPILImage()(im), angle=self.parameters["mag"], translate=(0,0), scale=1, shear=0, resample=0, fillcolor=None)) - # for im in torch.unbind(x,dim=0)] - #x = torch.stack(x,dim=0) - - #x = [ndimage.rotate(im, self.parameters["mag"], reshape=False) - # for im in torch.unbind(x,dim=0)] - #x = torch.stack(x,dim=0) - - #x = [im + self.parameters["mag"] - # for im in torch.unbind(x,dim=0)] - #x = torch.stack(x,dim=0) - - batch_size = x.shape[0] - # create transformation (rotation) - alpha = self.parameters["mag"] * 180 # in degrees - angle = torch.ones(batch_size, device=self.device) * alpha - - # define the rotation center - center = torch.ones(batch_size, 2, device=self.device) - center[..., 0] = x.shape[3] / 2 # x - center[..., 1] = x.shape[2] / 2 # y - - #print(x.shape, center) - # define the scale factor - scale = torch.ones(batch_size, device=self.device) - - # compute the transformation matrix - M = kornia.get_rotation_matrix2d(center, angle, scale) - - # apply the transformation to original image - x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w) - - #print("Start Shape ", x.shape) - out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"])) - #print("Shape ", out.shape) - out = F.max_pool2d(out, 2) - #print("Shape ", out.shape) - out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"])) - #print("Shape ", out.shape) - out = F.max_pool2d(out, 2) - #print("Shape ", out.shape) - out = out.view(out.size(0), -1) - #print("Shape ", out.shape) - out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"])) - #print("Shape ", out.shape) - out = F.linear(out, self.parameters["w4"], self.parameters["b4"]) - #print("Shape ", out.shape) - return F.log_softmax(out, dim=1) - - def initialize(self): - self.optimizer.initialize() - - def adjust(self): - self.optimizer.adjust(self.parameters) - - def adjust_val(self): - self.optimizer.adjust_val(self.parameters) - - def eval(self): - self.parameters['prob']=torch.tensor(0.0, device=self.device) - - def __str__(self): - return "mnist_CNN_augmented / " + str(self.optimizer) \ No newline at end of file diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug.py deleted file mode 100755 index 160e97c..0000000 --- a/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug.py +++ /dev/null @@ -1,52 +0,0 @@ -import torch -from torch.utils.data import Dataset, DataLoader -from torchvision import transforms -import torchvision.transforms.functional as TF - -class MNIST_aug(Dataset): - - training_file = 'training.pt' - test_file = 'test.pt' - classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', - '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine'] - - def __init__(self): - self.images = [TF.to_pil_image(x) for x in torch.ByteTensor(10, 3, 48, 48)] - self.set_stage(0) # initial stage - - def __getitem__(self, index): - image = self.images[index] - - # Just apply your transformations here - image = self.crop(image) - x = TF.to_tensor(image) - return x - - def set_stage(self, stage): - if stage == 0: - print('Using (32, 32) crops') - self.crop = transforms.RandomCrop((32, 32)) - elif stage == 1: - print('Using (28, 28) crops') - self.crop = transforms.RandomCrop((28, 28)) - - def __len__(self): - return len(self.images) - - -dataset = MyData() -loader = DataLoader(dataset, - batch_size=2, - num_workers=2, - shuffle=True) - -for batch_idx, data in enumerate(loader): - print('Batch idx {}, data shape {}'.format( - batch_idx, data.shape)) - -loader.dataset.set_stage(1) - -for batch_idx, data in enumerate(loader): - print('Batch idx {}, data shape {}'.format( - batch_idx, data.shape)) - diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug_v2.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug_v2.py deleted file mode 100755 index d2a992b..0000000 --- a/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug_v2.py +++ /dev/null @@ -1,150 +0,0 @@ -#from hyperopt import * -from hyperopt_v2 import * - -import torchvision.transforms.functional as TF -import torchvision.transforms as T - -#from scipy import ndimage -import kornia - -import random - - -class LeNet_v3(nn.Module): - def __init__(self, num_inp, num_out): - super(LeNet_v3, self).__init__() - self.params = nn.ParameterDict({ - 'w1': nn.Parameter(torch.zeros(20, num_inp, 5, 5)), - 'b1': nn.Parameter(torch.zeros(20)), - 'w2': nn.Parameter(torch.zeros(50, 20, 5, 5)), - 'b2': nn.Parameter(torch.zeros(50)), - 'w3': nn.Parameter(torch.zeros(500,4*4*50)), - 'b3': nn.Parameter(torch.zeros(500)), - 'w4': nn.Parameter(torch.zeros(10, 500)), - 'b4': nn.Parameter(torch.zeros(10)) - }) - - - def initialize(self): - nn.init.kaiming_uniform_(self.params["w1"], a=math.sqrt(5)) - nn.init.kaiming_uniform_(self.params["w2"], a=math.sqrt(5)) - nn.init.kaiming_uniform_(self.params["w3"], a=math.sqrt(5)) - nn.init.kaiming_uniform_(self.params["w4"], a=math.sqrt(5)) - - def forward(self, x): - #print("Start Shape ", x.shape) - out = F.relu(F.conv2d(input=x, weight=self.params["w1"], bias=self.params["b1"])) - #print("Shape ", out.shape) - out = F.max_pool2d(out, 2) - #print("Shape ", out.shape) - out = F.relu(F.conv2d(input=out, weight=self.params["w2"], bias=self.params["b2"])) - #print("Shape ", out.shape) - out = F.max_pool2d(out, 2) - #print("Shape ", out.shape) - out = out.view(out.size(0), -1) - #print("Shape ", out.shape) - out = F.relu(F.linear(out, self.params["w3"], self.params["b3"])) - #print("Shape ", out.shape) - out = F.linear(out, self.params["w4"], self.params["b4"]) - #print("Shape ", out.shape) - return F.log_softmax(out, dim=1) - - - def print_grad_fn(self): - for n, p in self.params.items(): - print(n, p.grad_fn) - - def __str__(self): - return "mnist_CNN_augmented / " - -class Data_aug(nn.Module): - def __init__(self): - super(Data_aug, self).__init__() - self.data_augmentation = True - self.params = nn.ParameterDict({ - "prob": nn.Parameter(torch.tensor(0.5)), - "mag": nn.Parameter(torch.tensor(180.0)) - }) - - #self.params["mag"].register_hook(print) - - def forward(self, x): - - if self.data_augmentation and self.training and random.random() < self.params["prob"]: - #print('Aug') - batch_size = x.shape[0] - # create transformation (rotation) - alpha = self.params["mag"] # in degrees - angle = torch.ones(batch_size, device=x.device) * alpha - - # define the rotation center - center = torch.ones(batch_size, 2, device=x.device) - center[..., 0] = x.shape[3] / 2 # x - center[..., 1] = x.shape[2] / 2 # y - - #print(x.shape, center) - # define the scale factor - scale = torch.ones(batch_size, device=x.device) - - # compute the transformation matrix - M = kornia.get_rotation_matrix2d(center, angle, scale) - - # apply the transformation to original image - x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w) - - return x - - def eval(self): - self.params['prob']=torch.tensor(0.0, device=self.device) - nn.Module.eval(self) - - def data_augmentation(self, mode=True): - self.data_augmentation=mode - - def print_grad_fn(self): - for n, p in self.params.items(): - print(n, p.grad_fn) - - def __str__(self): - return "Data_Augmenter / " - -class Augmented_model(nn.Module): - def __init__(self, model, data_augmenter): - #self.model = model - #self.data_aug = data_augmenter - super(Augmented_model, self).__init__()#nn.Module.__init__(self) - #super().__init__() - self.mods = nn.ModuleDict({ - 'data_aug': data_augmenter, - 'model': model - }) - #for name, param in self.mods.named_parameters(): - # print(name, type(param.data), param.size()) - - #params = self.mods.named_parameters() #self.parameters() - #parameters = [param for param in self.model.parameters()] + [param for param in self.data_aug.parameters()] - #Optimizable.__init__(self, params, optimizer) - - def initialize(self): - self.mods['model'].initialize() - - def forward(self, x): - return self.mods['model'](self.mods['data_aug'](x)) - - #def adjust(self): - # self.optimizer.adjust(self) #Parametres des dict - - def data_augmentation(self, mode=True): - self.mods['data_aug'].data_augmentation=mode - - def begin(self): - for param in self.parameters(): - param.requires_grad_() # keep gradient information… - param.retain_grad() # even if not a leaf… - - def print_grad_fn(self): - for n, m in self.mods.items(): - m.print_grad_fn() - - def __str__(self): - return str(self.mods['data_aug'])+ str(self.mods['model'])# + str(self.optimizer) \ No newline at end of file diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph b/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph deleted file mode 100755 index 96389f9..0000000 --- a/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph +++ /dev/null @@ -1,5 +0,0 @@ -digraph { - graph [size="12,12"] - node [align=left fontsize=12 height=0.2 ranksep=0.1 shape=box style=filled] - 94296775052080 [label=NoneType fillcolor=darkolivegreen1] -} diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph.svg b/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph.svg deleted file mode 100755 index a682cbc..0000000 --- a/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph.svg +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - -%3 - - - -94296775052080 - -NoneType - - - diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt.py deleted file mode 100755 index 1506f30..0000000 --- a/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt.py +++ /dev/null @@ -1,345 +0,0 @@ -import math -import torch -import torchvision -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim - - -class Optimizable():#nn.Module): - """ - This is the interface for anything that has parameters that need to be - optimized, somewhat like torch.nn.Model but with the right plumbing for - hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter - interface which does not give us enough control about the detachments.) - Nominal operation of an Optimizable at the lowest level is as follows: - o = MyOptimizable(…) - o.initialize() - loop { - o.begin() - o.zero_grad() - loss = –compute loss function from parameters– - loss.backward() - o.adjust() - } - Optimizables recursively handle updates to their optimiz*ers*. - """ - #def __init__(self): - # super(Optimizable, self).__init__() - # self.parameters = nn.Parameter(torch.zeros(())) - - def __init__(self, parameters, optimizer): - #super(Optimizable, self).__init__() - self.parameters = parameters # a dict mapping names to tensors - self.optimizer = optimizer # which must itself be Optimizable! - self.all_params_with_gradients = [] - #self.device = device - - def initialize(self): - """Initialize parameters, e.g. with a Kaiming initializer.""" - pass - - def begin(self): - """Enable gradient tracking on current parameters.""" - self.all_params_with_gradients = [] #Reintialisation pour eviter surcharge de la memoire - for name, param in self.parameters.items(): - #for param in self.parameters: - param.requires_grad_() # keep gradient information… - param.retain_grad() # even if not a leaf… - #param.to(self.device) - #if param.device == torch.device('cuda:0'): - # print(name, param.device) - self.all_params_with_gradients.append(param) - self.optimizer.begin() - - def zero_grad(self): - """ Set all gradients to zero. """ - for param in self.all_params_with_gradients: - #param = param.to(self.device) - param.grad = torch.zeros(param.shape, device=param.device) - self.optimizer.zero_grad() - - """ Note: at this point you would probably call .backwards() on the loss - function. """ - - def adjust(self): - """ Update parameters """ - pass - - - def print_grad_fn(self): - self.optimizer.print_grad_fn() - for n, p in self.parameters.items(): - print(n," - ", p.grad_fn) - - def param_grad(self): - return self.all_params_with_gradients - - def param(self, param_name): - return self.parameters[param_name].item() - - -class MNIST_FullyConnected(Optimizable): - """ - A fully-connected NN for the MNIST task. This is Optimizable but not itself - an optimizer. - """ - - def __init__(self, num_inp, num_hid, num_out, optimizer): - parameters = { - "w1": torch.zeros(num_inp, num_hid).t(), - "b1": torch.zeros(num_hid).t(), - "w2": torch.zeros(num_hid, num_out).t(), - "b2": torch.zeros(num_out).t(), - } - super().__init__(parameters, optimizer) - - def initialize(self): - nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5)) - nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5)) - self.optimizer.initialize() - - def forward(self, x): - """Compute a prediction.""" - x = F.linear(x, self.parameters["w1"], self.parameters["b1"]) - x = torch.tanh(x) - x = F.linear(x, self.parameters["w2"], self.parameters["b2"]) - x = torch.tanh(x) - x = F.log_softmax(x, dim=1) - return x - - def adjust(self): - self.optimizer.adjust(self.parameters) - - def __str__(self): - return "mnist / " + str(self.optimizer) - - -class NoOpOptimizer(Optimizable):#, nn.Module): - """ - NoOpOptimizer sits on top of a stack, and does not affect what lies below. - """ - - def __init__(self): - #super(Optimizable, self).__init__() - pass - - def initialize(self): - pass - - def begin(self): - pass - - def zero_grad(self): - pass - - def adjust(self, params): - pass - - def adjust_val(self, params): - pass - - def print_grad_fn(self): - pass - - def __str__(self): - return "static" - -class Adam(Optimizable): - """ - A fully hyperoptimizable Adam optimizer - """ - - def clamp(x): - return (x.tanh() + 1.0) / 2.0 - - def unclamp(y): - z = y * 2.0 - 1.0 - return ((1.0 + z) / (1.0 - z)).log() / 2.0 - - def __init__( - self, - alpha=0.001, - beta1=0.9, - beta2=0.999, - log_eps=-8.0, - optimizer=NoOpOptimizer(), - device = torch.device('cuda') - ): - self.device = device - parameters = { - "alpha": torch.tensor(alpha, device=self.device), - "beta1": Adam.unclamp(torch.tensor(beta1, device=self.device)), - "beta2": Adam.unclamp(torch.tensor(beta2, device=self.device)), - "log_eps": torch.tensor(log_eps, device=self.device), - } - super().__init__(parameters, optimizer) - self.num_adjustments = 0 - self.num_adjustments_val = 0 - self.cache = {} - - for name, param in parameters.items(): - param.requires_grad_() # keep gradient information… - param.retain_grad() # even if not a leaf… - #param.to(self.device) - #if param.device == torch.device('cuda:0'): - # print(name, param.device) - - def adjust(self, params): #Update param d'apprentissage - self.num_adjustments += 1 - self.optimizer.adjust(self.parameters) - #print('Adam update') - t = self.num_adjustments - beta1 = Adam.clamp(self.parameters["beta1"]) - beta2 = Adam.clamp(self.parameters["beta2"]) - for name, param in params.items(): - if name == "mag": continue - if name not in self.cache: - self.cache[name] = { - "m": torch.zeros(param.shape, device=self.device), - "v": torch.zeros(param.shape, device=self.device) - + 10.0 ** self.parameters["log_eps"].data - # NOTE that we add a little ‘fudge factor' here because sqrt is not - # differentiable at exactly zero - } - #print(name, param.device) - g = param.grad.detach() - self.cache[name]["m"] = m = ( - beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g - ) - self.cache[name]["v"] = v = ( - beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g - ) - self.all_params_with_gradients.append(m) - self.all_params_with_gradients.append(v) - m_hat = m / (1.0 - beta1 ** float(t)) - v_hat = v / (1.0 - beta2 ** float(t)) - dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"]) - params[name] = param.detach() - self.parameters["alpha"] * dparam - #print(name) - - def adjust_val(self, params): #Update param Transformations - self.num_adjustments_val += 1 - self.optimizer.adjust_val(self.parameters) - #print('Adam update') - t = self.num_adjustments_val - beta1 = Adam.clamp(self.parameters["beta1"]) - beta2 = Adam.clamp(self.parameters["beta2"]) - for name, param in params.items(): - if name != "mag": continue - if name not in self.cache: - self.cache[name] = { - "m": torch.zeros(param.shape, device=self.device), - "v": torch.zeros(param.shape, device=self.device) - + 10.0 ** self.parameters["log_eps"].data - # NOTE that we add a little ‘fudge factor' here because sqrt is not - # differentiable at exactly zero - } - #print(name, param.device) - g = param.grad.detach() - self.cache[name]["m"] = m = ( - beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g - ) - self.cache[name]["v"] = v = ( - beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g - ) - self.all_params_with_gradients.append(m) - self.all_params_with_gradients.append(v) - m_hat = m / (1.0 - beta1 ** float(t)) - v_hat = v / (1.0 - beta2 ** float(t)) - dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"]) - params[name] = param.detach() - self.parameters["alpha"] * dparam - #print(name) - - def __str__(self): - return "adam(" + str(self.parameters) + ") / " + str(self.optimizer) -''' -class SGD(Optimizable): - """ - A hyperoptimizable SGD - """ - - def __init__(self, alpha=0.01, optimizer=NoOpOptimizer()): - parameters = {"alpha": torch.tensor(alpha)} - super().__init__(parameters, optimizer) - - def adjust(self, params): - self.optimizer.adjust(self.parameters) - for name, param in params.items(): - g = param.grad.detach() - params[name] = param.detach() - g * self.parameters["alpha"] - - def __str__(self): - return "sgd(%f) / " % self.parameters["alpha"] + str(self.optimizer) - -class SGDPerParam(Optimizable): - """ - Like above, but can be taught a separate step size for each parameter it - tunes. - """ - - def __init__(self, alpha=0.01, params=[], optimizer=NoOpOptimizer()): - parameters = {name + "_alpha": torch.tensor(alpha) for name in params} - super().__init__(parameters, optimizer) - - def adjust(self, params): - self.optimizer.adjust(self.parameters) - for name, param in params.items(): - g = param.grad.detach() - params[name] = param.detach() - g * self.parameters[name + "_alpha"] - - def __str__(self): - return "sgd(%s) / " % str( - {k: t.item() for k, t in self.parameters.items()} - ) + str(self.optimizer) -''' -''' -class AdamBaydin(Optimizable): - """ Same as above, but only optimizes the learning rate, treating the - remaining hyperparameters as constants. """ - - def __init__( - self, - alpha=0.001, - beta1=0.9, - beta2=0.999, - log_eps=-8.0, - optimizer=NoOpOptimizer(), - ): - parameters = {"alpha": torch.tensor(alpha)} - self.beta1 = beta1 - self.beta2 = beta2 - self.log_eps = log_eps - super().__init__(parameters, optimizer) - self.num_adjustments = 0 - self.cache = {} - - def adjust(self, params): - self.num_adjustments += 1 - self.optimizer.adjust(self.parameters) - t = self.num_adjustments - beta1 = self.beta1 - beta2 = self.beta2 - for name, param in params.items(): - if name not in self.cache: - self.cache[name] = { - "m": torch.zeros(param.shape), - "v": torch.zeros(param.shape) + 10.0 ** self.log_eps, - } - g = param.grad.detach() - self.cache[name]["m"] = m = ( - beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g - ) - self.cache[name]["v"] = v = ( - beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g - ) - self.all_params_with_gradients.append(m) - self.all_params_with_gradients.append(v) - m_hat = m / (1.0 - beta1 ** float(t)) - v_hat = v / (1.0 - beta2 ** float(t)) - dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.log_eps) - params[name] = param.detach() - self.parameters["alpha"] * dparam - - def __str__(self): - return "adam(" + str(self.parameters) + ") / " + str(self.optimizer) -''' \ No newline at end of file diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt_v2.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt_v2.py deleted file mode 100755 index c100085..0000000 --- a/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt_v2.py +++ /dev/null @@ -1,296 +0,0 @@ -import math -import torch -import torchvision -import torch.nn as nn -import torch.nn.functional as F -from torch.optim.optimizer import Optimizer - -class Optimizable(): - """ - This is the interface for anything that has parameters that need to be - optimized, somewhat like torch.nn.Model but with the right plumbing for - hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter - interface which does not give us enough control about the detachments.) - Nominal operation of an Optimizable at the lowest level is as follows: - o = MyOptimizable(…) - o.initialize() - loop { - o.begin() - o.zero_grad() - loss = –compute loss function from parameters– - loss.backward() - o.adjust() - } - Optimizables recursively handle updates to their optimiz*ers*. - """ - #def __init__(self): - # super(Optimizable, self).__init__() - # self.parameters = nn.Parameter(torch.zeros(())) - - def __init__(self, parameters, optimizer): - self.params = parameters # a dict mapping names to tensors - self.optimizer = optimizer # which must itself be Optimizable! - self.all_params_with_gradients = [] - #self.device = device - - def initialize(self): - """Initialize parameters, e.g. with a Kaiming initializer.""" - pass - - def begin(self): - """Enable gradient tracking on current parameters.""" - self.all_params_with_gradients = nn.ParameterList() #Reintialisation pour eviter surcharge de la memoire - print("Opti param :", type(self.params)) - #for name, param in self.params: - if isinstance(self.params,dict): #Dict - for name, param in self.params: - param.requires_grad_() # keep gradient information… - param.retain_grad() # even if not a leaf… - self.all_params_with_gradients.append(param) - if isinstance(self.params,list): #List - for param in self.params: - param.requires_grad_() # keep gradient information… - param.retain_grad() # even if not a leaf… - self.all_params_with_gradients.append(param) - self.optimizer.begin() - - def zero_grad(self): - """ Set all gradients to zero. """ - for param in self.all_params_with_gradients: - param.grad = torch.zeros(param.shape, device=param.device) - self.optimizer.zero_grad() - - """ Note: at this point you would probably call .backwards() on the loss - function. """ - - def adjust(self): - """ Update parameters """ - pass - - -class NoOpOptimizer(Optimizable):#, nn.Module): - """ - NoOpOptimizer sits on top of a stack, and does not affect what lies below. - """ - - def __init__(self): - #super(Optimizable, self).__init__() - pass - - def initialize(self): - pass - - def begin(self): - #print("NoOpt begin") - pass - - def zero_grad(self): - pass - - def adjust(self, params): - pass - - def step(self): - pass - - def print_grad_fn(self): - pass - - def __str__(self): - return "static" - - -class SGD(Optimizer, nn.Module): #Eviter Optimizer - """ - A hyperoptimizable SGD - """ - - def __init__(self, params, lr=0.01, height=0): - self.height=height - #params : a optimiser - #reste (defaults) param de l'opti - print('SGD - H', height) - nn.Module.__init__(self) - - optim_keys = ('lr','') #A mettre dans Optimizable ? #'' pour eviter iteration dans la chaine de charactere... - ''' - self_params = {"lr": torch.tensor(lr), - "momentum": 0, - "dampening":0, - "weight_decay":0, - "nesterov": False} - ''' - #self_params = dict(lr=torch.tensor(lr), - # momentum=0, dampening=0, weight_decay=0, nesterov=False) - - self_params = nn.ParameterDict({ - "lr": nn.Parameter(torch.tensor(lr)), - "momentum": nn.Parameter(torch.tensor(0.0)), - "dampening": nn.Parameter(torch.tensor(0.0)), - "weight_decay": nn.Parameter(torch.tensor(0.0)), - }) - - for k in self_params.keys() & optim_keys: - self_params[k].requires_grad_() # keep gradient information… - self_params[k].retain_grad() # even if not a leaf… - #self_params[k].register_hook(print) - - if height==0: - optimizer = NoOpOptimizer() - else: - #def dict_generator(): yield {k: self_params[k] for k in self_params.keys() & optim_keys} - #(dict for dict in {k: self_params[k] for k in self_params.keys() & optim_keys}) #Devrait mar - optimizer = SGD(params=(self_params[k]for k in self_params.keys() & optim_keys), lr=lr, height=height-1) - #optimizer.register_backward_hook(print) - - self.optimizer = optimizer - #if(height==0): - # for n,p in params.items(): - # print(n,p) - - #Optimizable.__init__(self, self_params, optimizer) - - #print(type(params)) - #for p in params: - # print(type(p)) - Optimizer.__init__(self, params, self_params) - - for group in self.param_groups: - for p in group['params']: - print(type(p.data), p.size()) - print('End SGD-H', height) - - def begin(self): - for group in self.param_groups: - for p in group['params']: - #print(type(p.data), p.size()) - p.requires_grad_() # keep gradient information… - p.retain_grad() # even if not a leaf… - #p.register_hook(lambda x: print(self.height, x.grad_fn)) - - self.optimizer.begin() - - def print_grad_fn(self): - self.optimizer.print_grad_fn() - for group in self.param_groups: - for i, p in enumerate(group['params']): - print(self.height," - ", i, p.grad_fn) - - #def adjust(self, params): - # self.optimizer.adjust(self.params) - # for name, param in params.items(): - # g = param.grad.detach() - # params[name] = param.detach() - g * self.params["lr"] - - def step(self): - """Performs a single optimization step. - - Arguments: - closure (callable, optional): A closure that reevaluates the model - and returns the loss. - """ - print('SGD start') - self.optimizer.step() - - for group in self.param_groups: - for i, p in enumerate(group['params']): - if p.grad is None: - continue - #d_p = p.grad.data - d_p = p.grad.detach() - - #print(group['lr']) - p.data.add_(-group['lr'].item(), d_p) - #group['params'][i] = p.detach() - d_p * group['lr'] - p.data-= group['lr']*d_p #Data ne pas utiliser perte info - - for p in group['params']: - if p.grad is None: - print(p, p.grad) - continue - - print("SGD end") - #return loss - - def __str__(self): - return "sgd(%f) / " % self.params["lr"] + str(self.optimizer) - - -class Adam(Optimizable, nn.Module): - """ - A fully hyperoptimizable Adam optimizer - """ - - def clamp(x): - return (x.tanh() + 1.0) / 2.0 - - def unclamp(y): - z = y * 2.0 - 1.0 - return ((1.0 + z) / (1.0 - z)).log() / 2.0 - - def __init__( - self, - alpha=0.001, - beta1=0.9, - beta2=0.999, - log_eps=-8.0, - optimizer=NoOpOptimizer(), - device = torch.device('cuda') - ): - #super(Adam, self).__init__() - nn.Module.__init__(self) - self.device = device - params = nn.ParameterDict({ - "alpha": nn.Parameter(torch.tensor(alpha, device=self.device)), - "beta1": nn.Parameter(Adam.unclamp(torch.tensor(beta1, device=self.device))), - "beta2": nn.Parameter(Adam.unclamp(torch.tensor(beta2, device=self.device))), - "log_eps": nn.Parameter(torch.tensor(log_eps, device=self.device)), - }) - Optimizable.__init__(self, params, optimizer) - self.num_adjustments = 0 - self.cache = {} - - for name, param in params.items(): - param.requires_grad_() # keep gradient information… - param.retain_grad() # even if not a leaf… - - def adjust(self, params, pytorch_mod=False): - self.num_adjustments += 1 - self.optimizer.adjust(self.params) - t = self.num_adjustments - beta1 = Adam.clamp(self.params["beta1"]) - beta2 = Adam.clamp(self.params["beta2"]) - - updated_param = [] - if pytorch_mod: - params = params.named_parameters(prefix='') #Changer nom d'input... - - for name, param in params: - if name not in self.cache: - self.cache[name] = { - "m": torch.zeros(param.shape, device=self.device), - "v": torch.zeros(param.shape, device=self.device) - + 10.0 ** self.params["log_eps"].data - # NOTE that we add a little ‘fudge factor' here because sqrt is not - # differentiable at exactly zero - } - #print(name, param.device) - g = param.grad.detach() - self.cache[name]["m"] = m = ( - beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g - ) - self.cache[name]["v"] = v = ( - beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g - ) - self.all_params_with_gradients.append(nn.Parameter(m)) #Risque de surcharger la memoire => Dict mieux ? - self.all_params_with_gradients.append(nn.Parameter(v)) - m_hat = m / (1.0 - beta1 ** float(t)) - v_hat = v / (1.0 - beta2 ** float(t)) - dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.params["log_eps"]) - updated_param[name] = param.detach() - self.params["alpha"] * dparam - - if pytorch_mod: params.update(updated_param) #Changer nom d'input... - else: params = updated_param - - def __str__(self): - return "adam(" + str(self.params) + ") / " + str(self.optimizer) diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/main.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/main.py deleted file mode 100755 index 6ed0f6f..0000000 --- a/Old/Gradient-Descent-The-Ultimate-Optimizer/main.py +++ /dev/null @@ -1,182 +0,0 @@ -import numpy as np -import json, math, time, os -from hyperopt import * -import gc - -BATCH_SIZE = 300 - -mnist_train = torchvision.datasets.MNIST( - "./data", train=True, download=True, transform=torchvision.transforms.ToTensor() -) - -mnist_test = torchvision.datasets.MNIST( - "./data", train=False, download=True, transform=torchvision.transforms.ToTensor() -) - -dl_train = torch.utils.data.DataLoader( - mnist_train, batch_size=BATCH_SIZE, shuffle=False -) -dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=10000, shuffle=False) - - -def test(model): - for i, (features_, labels_) in enumerate(dl_test): - features, labels = torch.reshape(features_, (10000, 28 * 28)), labels_ - pred = model.forward(features) - return pred.argmax(dim=1).eq(labels).sum().item() / 10000 * 100 - - -def train(model, epochs=3, height=1): - stats = [] - for epoch in range(epochs): - for i, (features_, labels_) in enumerate(dl_train): - t0 = time.process_time() - model.begin() - features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_ - pred = model.forward( - features - ) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/ - loss = F.nll_loss(pred, labels) - model.zero_grad() - loss.backward(create_graph=True) - model.adjust() - tf = time.process_time() - data = { - "time": tf - t0, - "iter": epoch * len(dl_train) + i, - "loss": loss.item(), - "params": { - k: v.item() - for k, v in model.optimizer.parameters.items() - if "." not in k - }, - } - stats.append(data) - return stats - - -def run(opt, name="out", usr={}, epochs=3, height=1): - torch.manual_seed(0x42) - model = MNIST_FullyConnected(28 * 28, 128, 10, opt) - print("Running...", str(model)) - model.initialize() - log = train(model, epochs, height) - acc = test(model) - out = {"acc": acc, "log": log, "usr": usr} - with open("log/%s.json" % name, "w+") as f: - json.dump(out, f, indent=True) - times = [x["time"] for x in log] - print("Times (ms):", np.mean(times), "+/-", np.std(times)) - print("Final accuracy:", acc) - return out - - -def sgd_experiments(): - run(SGD(0.01), "sgd", epochs=1) - out = run(SGD(0.01, optimizer=SGD(0.01)), "sgd+sgd", epochs=1) - alpha = out["log"][-1]["params"]["alpha"] - print(alpha) - run(SGD(alpha), "sgd-final", epochs=1) - - -def adam_experiments(): - run(Adam(), "adam", epochs=1) - print() - mo = SGDPerParam( - 0.001, ["alpha", "beta1", "beta2", "log_eps"], optimizer=SGD(0.0001) - ) - out = run(Adam(optimizer=mo), "adam+sgd", epochs=1) - p = out["log"][-1]["params"] - alpha = p["alpha"] - beta1 = Adam.clamp(torch.tensor(p["beta1"])).item() - beta2 = Adam.clamp(torch.tensor(p["beta2"])).item() - log_eps = p["log_eps"] - print(alpha, beta1, beta2, log_eps) - print(mo) - run( - Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps), - "adam+sgd-final", - epochs=1, - ) - print() - out = run(Adam(optimizer=Adam()), "adam2", epochs=1) - p = out["log"][-1]["params"] - alpha = p["alpha"] - beta1 = Adam.clamp(torch.tensor(p["beta1"])).item() - beta2 = Adam.clamp(torch.tensor(p["beta2"])).item() - log_eps = p["log_eps"] - print(alpha, beta1, beta2, log_eps) - run( - Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps), - "adam2-final", - epochs=1, - ) - print() - mo = SGDPerParam(0.001, ["alpha"], optimizer=SGD(0.0001)) - out = run(AdamBaydin(optimizer=mo), "adambaydin+sgd", epochs=1) - p = out["log"][-1]["params"] - alpha = p["alpha"] - print(alpha) - print(mo) - run(Adam(alpha=p["alpha"]), "adambaydin+sgd-final", epochs=1) - print() - out = run(AdamBaydin(optimizer=Adam()), "adambaydin2", epochs=1) - p = out["log"][-1]["params"] - alpha = p["alpha"] - print(alpha) - run(Adam(alpha=p["alpha"]), "adambaydin2-final", epochs=1) - - -def surface(): - run(SGD(10 ** -3, optimizer=SGD(10 ** -1)), "tst", epochs=1) - for log_alpha in np.linspace(-3, 2, 10): - run(SGD(10 ** log_alpha), "sgd@1e%+.2f" % log_alpha, epochs=1) - - -def make_sgd_stack(height, top): - if height == 0: - return SGD(alpha=top) - return SGD(alpha=top, optimizer=make_sgd_stack(height - 1, top)) - - -def make_adam_stack(height, top=0.0000001): - if height == 0: - return Adam(alpha=top) - return Adam(alpha=top, optimizer=make_adam_stack(height - 1)) - - -def stack_test(): - for top in np.linspace(-7, 3, 20): - for height in range(6): - print("height =", height, "to p=", top) - opt = make_sgd_stack(height, 10 ** top) - run( - opt, - "metasgd3-%d@%+.2f" % (height, top), - {"height": height, "top": top}, - epochs=1, - height=height, - ) - gc.collect() - - -def perf_test(): - for h in range(51): - print("height:", h) - # opt = make_sgd_stack(h, 0.01) - opt = make_adam_stack(h) - run(opt, "adamperf-%d" % h, {"height": h}, epochs=1) - gc.collect() - - -if __name__ == "__main__": - try: - os.mkdir("log") - except: - print("log/ exists already") - - surface() - sgd_experiments() - adam_experiments() - stack_test() - perf_test() diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/requirements.txt b/Old/Gradient-Descent-The-Ultimate-Optimizer/requirements.txt deleted file mode 100755 index 5aae77b..0000000 --- a/Old/Gradient-Descent-The-Ultimate-Optimizer/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -numpy==1.17.2 -Pillow==6.2.0 -six==1.12.0 -torch==1.2.0 -torchvision==0.4.0 diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/tests.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/tests.py deleted file mode 100755 index 936894f..0000000 --- a/Old/Gradient-Descent-The-Ultimate-Optimizer/tests.py +++ /dev/null @@ -1,344 +0,0 @@ -import numpy as np -import json, math, time, os -from data_aug import * -#from data_aug_v2 import * -import gc - -import matplotlib.pyplot as plt -from torchviz import make_dot, make_dot_from_trace - -from torch.utils.data import SubsetRandomSampler - -BATCH_SIZE = 300 -#TEST_SIZE = 10000 -TEST_SIZE = 300 -DATA_LIMIT = 10 - -''' -data_train = torchvision.datasets.MNIST( - "./data", train=True, download=True, - transform=torchvision.transforms.Compose([ - #torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0), - torchvision.transforms.ToTensor() - ]) -) -data_test = torchvision.datasets.MNIST( - "./data", train=False, download=True, transform=torchvision.transforms.ToTensor() -) - -''' -data_train = torchvision.datasets.CIFAR10( - "./data", train=True, download=True, - transform=torchvision.transforms.Compose([ - #torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0), - torchvision.transforms.ToTensor() - ]) -) - -data_test = torchvision.datasets.CIFAR10( - "./data", train=False, download=True, transform=torchvision.transforms.ToTensor() -) - -train_subset_indices=range(int(len(data_train)/2)) -val_subset_indices=range(int(len(data_train)/2),len(data_train)) - -dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices)) -dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices)) -dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False) - -def test(model, reshape_in=True, device = torch.device('cuda')): - for i, (features_, labels_) in enumerate(dl_test): - if reshape_in : - features, labels = torch.reshape(features_, (TEST_SIZE, 28 * 28)), labels_ - else: - features, labels =features_, labels_ - - features, labels = features.to(device), labels.to(device) - - pred = model.forward(features) - return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100 - -def train_one_epoch(model, optimizer, epoch=0, reshape_in=True, device = torch.device('cuda'), train_data=True): - if train_data: dl = dl_train - else: dl = dl_val - for i, (features_, labels_) in enumerate(dl): - if i > DATA_LIMIT : break - #t0 = time.process_time() - - if reshape_in : - features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_ - else: - features, labels =features_, labels_ - - features, labels = features.to(device), labels.to(device) - - #optimizer.begin() - #optimizer.zero_grad() - model.begin() - model.zero_grad() - pred = model.forward(features) - - #loss = F.nll_loss(pred, labels) - loss = F.cross_entropy(pred,labels) - - #model.print_grad_fn() - #optimizer.print_grad_fn() - #print('-'*50) - - loss.backward(create_graph=True) - - #optimizer.step() - if train_data: model.adjust() - else: model.adjust_val() - - #tf = time.process_time() - #data = { - # "time": tf - t0, - # "iter": epoch * len(dl_train) + i, - # "loss": loss.item(), - # "params": { - # k: v.item() - # for k, v in model.optimizer.parameters.items() - # if "." not in k - # }, - #} - #stats.append(data) - - #print_torch_mem(i) - return loss.item() - -def train_v2(model, optimizer, epochs=3, reshape_in=True, device = torch.device('cuda')): - log = [] - for epoch in range(epochs): - - #dl_train.dataset.transform=torchvision.transforms.Compose([ - # torchvision.transforms.RandomAffine(degrees=model.param('mag'), translate=None, scale=None, shear=None, resample=False, fillcolor=0), - # torchvision.transforms.ToTensor() - #]) - viz_data(fig_name='res/data_sample') - t0 = time.process_time() - loss = train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device) - train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device,train_data=False) - - #acc = test(model=model, reshape_in=reshape_in, device=device) - acc = 0 - - - tf = time.process_time() - data = { - "time": tf - t0, - "epoch": epoch, - "loss": loss, - "acc": acc, - "params": { - k: v.item() - for k, v in model.optimizer.parameters.items() - #for k, v in model.mods.data_aug.params.named_parameters() - if "." not in k - - }, - } - log.append(data) - - - print("Epoch :",epoch+1, "/",epochs, "- Loss :",log[-1]["loss"]) - param = [p for p in model.param_grad() if p.grad is not None] - if(len(param)!=0): - print(param[-2],' / ', param[-2].grad) - print(param[-1],' / ', param[-1].grad) - return log - -def train(model, epochs=3, height=1, reshape_in=True, device = torch.device('cuda')): - stats = [] - for epoch in range(epochs): - for i, (features_, labels_) in enumerate(dl_train): - t0 = time.process_time() - model.begin() - if reshape_in : - features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_ - else: - features, labels =features_, labels_ - - features, labels = features.to(device), labels.to(device) - - pred = model.forward( - features - ) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/ - #loss = F.nll_loss(pred, labels) - loss = F.cross_entropy(pred,labels) - - #print('-'*50) - #param = [p for p in model.param_grad() if p.grad is not None] - #if(len(param)!=0): - # print(param[-2],' / ', param[-2].grad) - # print(param[-1],' / ', param[-1].grad) - - model.zero_grad() - loss.backward(create_graph=True) - model.adjust() - tf = time.process_time() - data = { - "time": tf - t0, - "iter": epoch * len(dl_train) + i, - "loss": loss.item(), - "params": { - k: v.item() - for k, v in model.optimizer.parameters.items() - if "." not in k - }, - } - stats.append(data) - - print('-'*50) - i=0 - for obj in gc.get_objects(): - try: - if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)) and len(obj.size())>1: - print(i, type(obj), obj.size()) - i+=1 - except: - pass - print("Epoch :",epoch+1, "/",epochs, "- Loss :",stats[-1]["loss"]) - param = [p for p in model.param_grad() if p.grad is not None] - if(len(param)!=0): - print(param[-2],' / ', param[-2].grad) - print(param[-1],' / ', param[-1].grad) - return stats - -def run(opt, name="out", usr={}, epochs=10, height=1, cnn=True, device = torch.device('cuda')): - torch.manual_seed(0x42) - if not cnn: - reshape_in = True - #model = MNIST_FullyConnected(28 * 28, 128, 10, opt) - model = MNIST_FullyConnected_Augmented(28 * 28, 128, 10, opt, device=device) - - else: - reshape_in = False - #model = LeNet(1, 10,opt, device) - #model = LeNet_v2(1, 10,opt, device).to(device=device) - model = LeNet_v2(3, 10,opt, device).to(device=device) - optimizer=None - ''' - m = LeNet_v3(1, 10) - a = Data_aug() - model = Augmented_model(model=m, - data_augmenter=a, - optimizer=opt).to(device) #deux fois le meme optimizer ?... - ''' - ''' - m = LeNet_v3(1, 10) - a = Data_aug() - model = Augmented_model(model=m, data_augmenter=a).to(device) - #optimizer = SGD(model.parameters()) - optimizer = SGD(model.parameters(), lr=0.01, height=1) - ''' - - - #for idx, m in enumerate(model.modules()): - # print(idx, '->', m) - print("Running...", str(model)) - model.initialize() - #print_model(model) - #model.data_augmentation(False) - #model.eval() - - log = train_v2(model=model, optimizer=optimizer, epochs=epochs, reshape_in=reshape_in, device=device) - model.eval() - acc = test(model, reshape_in, device=device) - - - #param = [p for p in model.param_grad() if p.grad is not None] - #if(len(param)!=0): - # print(param[-2],' / ', param[-2].grad) - # print(param[-1],' / ', param[-1].grad) - - out = {"acc": acc, "log": log, "usr": usr} - with open("log/%s.json" % name, "w+") as f: - json.dump(out, f, indent=True) - times = [x["time"] for x in log] - print("Times (ms):", np.mean(times), "+/-", np.std(times)) - print("Final accuracy:", acc) - - #plot_res(log, fig_name='res/'+name) - - return out - -def make_adam_stack(height, top=0.0000001, device = torch.device('cuda')): - #print(height,device) - if height == 0: - return Adam(alpha=top, device=device) - return Adam(alpha=top, optimizer=make_adam_stack(height - 1, top, device=device), device=device) - -def plot_res(log, fig_name='res'): - - fig, ax = plt.subplots(ncols=3, figsize=(15, 3)) - ax[0].set_title('Loss') - ax[0].plot([x["loss"] for x in log]) - - ax[1].set_title('Acc') - ax[1].plot([x["acc"] for x in log]) - - ax[2].set_title('mag') - ax[2].plot([x["data_aug"] for x in log]) - - plt.savefig(fig_name) - -def print_torch_mem(add_info=''): - - nb=0 - max_size=0 - for obj in gc.get_objects(): - try: - if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1: - #print(i, type(obj), obj.size()) - size = np.sum(obj.size()) - if(size>max_size): max_size=size - nb+=1 - except: - pass - print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size) - -def print_model(model, fig_name='graph/graph'): #Semble ne pas marcher pour les models en fonctionnel - x = torch.randn(1,1,28,28, device=device) - dot=make_dot(model(x), params=dict(model.named_parameters())) - dot.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats - dot.render(fig_name) - print("Model graph generated !") - -def viz_data(fig_name='data_sample'): - - features_, labels_ = next(iter(dl_train)) - plt.figure(figsize=(10,10)) - #for i, (features_, labels_) in enumerate(dl_train): - for i in range(25): - if i==25: break - #print(features_.size(), labels_.size()) - - plt.subplot(5,5,i+1) - plt.xticks([]) - plt.yticks([]) - plt.grid(False) - - img = features_[i,0,:,:] - - #print('im shape',img.shape) - plt.imshow(img, cmap=plt.cm.binary) - plt.xlabel(labels_[i].item()) - - plt.savefig(fig_name) - -########################################## -if __name__ == "__main__": - try: - os.mkdir("log") - except: - print("log/ exists already") - - device = torch.device('cuda') - - run(make_adam_stack(height=1, top=0.001, device=device), - "Augmented_MNIST", - epochs=100, - cnn=True, - device = device) - print() \ No newline at end of file diff --git a/Old/PBA/LeNet.py b/Old/PBA/LeNet.py deleted file mode 100755 index 7a411b6..0000000 --- a/Old/PBA/LeNet.py +++ /dev/null @@ -1,73 +0,0 @@ -import numpy as np -import tensorflow as tf - -## build the neural network class -# weight initialization -def weight_variable(shape, name = None): - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial, name = name) - -# bias initialization -def bias_variable(shape, name = None): - initial = tf.constant(0.1, shape=shape) # positive bias - return tf.Variable(initial, name = name) - -# 2D convolution -def conv2d(x, W, name = None): - return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID', name = name) - -# max pooling -def max_pool_2x2(x, name = None): - return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], - padding='SAME', name = name) - -def LeNet(images, num_classes): - # tunable hyperparameters for nn architecture - s_f_conv1 = 5; # filter size of first convolution layer (default = 3) - n_f_conv1 = 20; # number of features of first convolution layer (default = 36) - s_f_conv2 = 5; # filter size of second convolution layer (default = 3) - n_f_conv2 = 50; # number of features of second convolution layer (default = 36) - n_n_fc1 = 500; # number of neurons of first fully connected layer (default = 576) - n_n_fc2 = 500; # number of neurons of first fully connected layer (default = 576) - - #print(images.shape) - # 1.layer: convolution + max pooling - W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, int(images.shape[3]), n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32) - b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32) - h_conv1_tf = tf.nn.relu(conv2d(images, W_conv1_tf) + b_conv1_tf, name = 'h_conv1_tf') # (.,28,28,32) - h_pool1_tf = max_pool_2x2(h_conv1_tf, name = 'h_pool1_tf') # (.,14,14,32) - #print(h_conv1_tf.shape) - #print(h_pool1_tf.shape) - # 2.layer: convolution + max pooling - W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2, n_f_conv1, n_f_conv2], name = 'W_conv2_tf') - b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf') - h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf, W_conv2_tf) + b_conv2_tf, name ='h_conv2_tf') #(.,14,14,32) - h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32) - - #print(h_pool2_tf.shape) - - # 4.layer: fully connected - W_fc1_tf = weight_variable([5*5*n_f_conv2,n_n_fc1], name = 'W_fc1_tf') # (4*4*32, 1024) - b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024) - h_pool2_flat_tf = tf.reshape(h_pool2_tf, [int(h_pool2_tf.shape[0]), -1], name = 'h_pool3_flat_tf') # (.,1024) - h_fc1_tf = tf.nn.relu(tf.matmul(h_pool2_flat_tf, W_fc1_tf) + b_fc1_tf, - name = 'h_fc1_tf') # (.,1024) - - # add dropout - #keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf') - #h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf') - print(h_fc1_tf.shape) - - # 5.layer: fully connected - W_fc2_tf = weight_variable([n_n_fc1, num_classes], name = 'W_fc2_tf') - b_fc2_tf = bias_variable([num_classes], name = 'b_fc2_tf') - z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf), b_fc2_tf, name = 'z_pred_tf')# => (.,10) - # predicted probabilities in one-hot encoding - #y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf') - - # tensor of correct predictions - #y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1), - # tf.argmax(y_data_tf, 1), - # name = 'y_pred_correct_tf') - logits = z_pred_tf - return logits #y_pred_proba_tf diff --git a/Old/PBA/model.py b/Old/PBA/model.py deleted file mode 100755 index 47a0aa9..0000000 --- a/Old/PBA/model.py +++ /dev/null @@ -1,353 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""PBA & AutoAugment Train/Eval module. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import contextlib -import os -import time - -import numpy as np -import tensorflow as tf - -import autoaugment.custom_ops as ops -from autoaugment.shake_drop import build_shake_drop_model -from autoaugment.shake_shake import build_shake_shake_model -import pba.data_utils as data_utils -import pba.helper_utils as helper_utils -from pba.wrn import build_wrn_model -from pba.resnet import build_resnet_model - -from pba.LeNet import LeNet - -arg_scope = tf.contrib.framework.arg_scope - - -def setup_arg_scopes(is_training): - """Sets up the argscopes that will be used when building an image model. - - Args: - is_training: Is the model training or not. - - Returns: - Arg scopes to be put around the model being constructed. - """ - - batch_norm_decay = 0.9 - batch_norm_epsilon = 1e-5 - batch_norm_params = { - # Decay for the moving averages. - 'decay': batch_norm_decay, - # epsilon to prevent 0s in variance. - 'epsilon': batch_norm_epsilon, - 'scale': True, - # collection containing the moving mean and moving variance. - 'is_training': is_training, - } - - scopes = [] - - scopes.append(arg_scope([ops.batch_norm], **batch_norm_params)) - return scopes - - -def build_model(inputs, num_classes, is_training, hparams): - """Constructs the vision model being trained/evaled. - - Args: - inputs: input features/images being fed to the image model build built. - num_classes: number of output classes being predicted. - is_training: is the model training or not. - hparams: additional hyperparameters associated with the image model. - - Returns: - The logits of the image model. - """ - scopes = setup_arg_scopes(is_training) - if len(scopes) != 1: - raise ValueError('Nested scopes depreciated in py3.') - with scopes[0]: - if hparams.model_name == 'pyramid_net': - logits = build_shake_drop_model(inputs, num_classes, is_training) - elif hparams.model_name == 'wrn': - logits = build_wrn_model(inputs, num_classes, hparams.wrn_size) - elif hparams.model_name == 'shake_shake': - logits = build_shake_shake_model(inputs, num_classes, hparams, - is_training) - elif hparams.model_name == 'resnet': - logits = build_resnet_model(inputs, num_classes, hparams, - is_training) - elif hparams.model_name == 'LeNet': - logits = LeNet(inputs, num_classes) - else: - raise ValueError("Unknown model name.") - return logits - - -class Model(object): - """Builds an model.""" - - def __init__(self, hparams, num_classes, image_size): - self.hparams = hparams - self.num_classes = num_classes - self.image_size = image_size - - def build(self, mode): - """Construct the model.""" - assert mode in ['train', 'eval'] - self.mode = mode - self._setup_misc(mode) - self._setup_images_and_labels(self.hparams.dataset) - self._build_graph(self.images, self.labels, mode) - - self.init = tf.group(tf.global_variables_initializer(), - tf.local_variables_initializer()) - - def _setup_misc(self, mode): - """Sets up miscellaneous in the model constructor.""" - self.lr_rate_ph = tf.Variable(0.0, name='lrn_rate', trainable=False) - self.reuse = None if (mode == 'train') else True - self.batch_size = self.hparams.batch_size - if mode == 'eval': - self.batch_size = self.hparams.test_batch_size - - def _setup_images_and_labels(self, dataset): - """Sets up image and label placeholders for the model.""" - if dataset == 'cifar10' or dataset == 'cifar100' or self.mode == 'train': - self.images = tf.placeholder(tf.float32, - [self.batch_size, self.image_size, self.image_size, 3]) - self.labels = tf.placeholder(tf.float32, - [self.batch_size, self.num_classes]) - else: - self.images = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3]) - self.labels = tf.placeholder(tf.float32, [None, self.num_classes]) - - def assign_epoch(self, session, epoch_value): - session.run( - self._epoch_update, feed_dict={self._new_epoch: epoch_value}) - - def _build_graph(self, images, labels, mode): - """Constructs the TF graph for the model. - - Args: - images: A 4-D image Tensor - labels: A 2-D labels Tensor. - mode: string indicating training mode ( e.g., 'train', 'valid', 'test'). - """ - is_training = 'train' in mode - if is_training: - self.global_step = tf.train.get_or_create_global_step() - - logits = build_model(images, self.num_classes, is_training, - self.hparams) - self.predictions, self.cost = helper_utils.setup_loss(logits, labels) - - self._calc_num_trainable_params() - - # Adds L2 weight decay to the cost - self.cost = helper_utils.decay_weights(self.cost, - self.hparams.weight_decay_rate) - - if is_training: - self._build_train_op() - - # Setup checkpointing for this child model - # Keep 2 or more checkpoints around during training. - with tf.device('/cpu:0'): - self.saver = tf.train.Saver(max_to_keep=10) - - self.init = tf.group(tf.global_variables_initializer(), - tf.local_variables_initializer()) - - def _calc_num_trainable_params(self): - self.num_trainable_params = np.sum([ - np.prod(var.get_shape().as_list()) - for var in tf.trainable_variables() - ]) - tf.logging.info('number of trainable params: {}'.format( - self.num_trainable_params)) - - def _build_train_op(self): - """Builds the train op for the model.""" - hparams = self.hparams - tvars = tf.trainable_variables() - grads = tf.gradients(self.cost, tvars) - if hparams.gradient_clipping_by_global_norm > 0.0: - grads, norm = tf.clip_by_global_norm( - grads, hparams.gradient_clipping_by_global_norm) - tf.summary.scalar('grad_norm', norm) - - # Setup the initial learning rate - initial_lr = self.lr_rate_ph - optimizer = tf.train.MomentumOptimizer( - initial_lr, 0.9, use_nesterov=True) - - self.optimizer = optimizer - apply_op = optimizer.apply_gradients( - zip(grads, tvars), global_step=self.global_step, name='train_step') - train_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - with tf.control_dependencies([apply_op]): - self.train_op = tf.group(*train_ops) - - -class ModelTrainer(object): - """Trains an instance of the Model class.""" - - def __init__(self, hparams): - self._session = None - self.hparams = hparams - - # Set the random seed to be sure the same validation set - # is used for each model - np.random.seed(0) - self.data_loader = data_utils.DataSet(hparams) - np.random.seed() # Put the random seed back to random - self.data_loader.reset() - - # extra stuff for ray - self._build_models() - self._new_session() - self._session.__enter__() - - def save_model(self, checkpoint_dir, step=None): - """Dumps model into the backup_dir. - - Args: - step: If provided, creates a checkpoint with the given step - number, instead of overwriting the existing checkpoints. - """ - model_save_name = os.path.join(checkpoint_dir, - 'model.ckpt') + '-' + str(step) - save_path = self.saver.save(self.session, model_save_name) - tf.logging.info('Saved child model') - return model_save_name - - def extract_model_spec(self, checkpoint_path): - """Loads a checkpoint with the architecture structure stored in the name.""" - self.saver.restore(self.session, checkpoint_path) - tf.logging.warning( - 'Loaded child model checkpoint from {}'.format(checkpoint_path)) - - def eval_child_model(self, model, data_loader, mode): - """Evaluate the child model. - - Args: - model: image model that will be evaluated. - data_loader: dataset object to extract eval data from. - mode: will the model be evalled on train, val or test. - - Returns: - Accuracy of the model on the specified dataset. - """ - tf.logging.info('Evaluating child model in mode {}'.format(mode)) - while True: - try: - accuracy = helper_utils.eval_child_model( - self.session, model, data_loader, mode) - tf.logging.info( - 'Eval child model accuracy: {}'.format(accuracy)) - # If epoch trained without raising the below errors, break - # from loop. - break - except (tf.errors.AbortedError, tf.errors.UnavailableError) as e: - tf.logging.info( - 'Retryable error caught: {}. Retrying.'.format(e)) - - return accuracy - - @contextlib.contextmanager - def _new_session(self): - """Creates a new session for model m.""" - # Create a new session for this model, initialize - # variables, and save / restore from checkpoint. - sess_cfg = tf.ConfigProto( - allow_soft_placement=True, log_device_placement=False) - sess_cfg.gpu_options.allow_growth = True - self._session = tf.Session('', config=sess_cfg) - self._session.run([self.m.init, self.meval.init]) - return self._session - - def _build_models(self): - """Builds the image models for train and eval.""" - # Determine if we should build the train and eval model. When using - # distributed training we only want to build one or the other and not both. - with tf.variable_scope('model', use_resource=False): - m = Model(self.hparams, self.data_loader.num_classes, self.data_loader.image_size) - m.build('train') - self._num_trainable_params = m.num_trainable_params - self._saver = m.saver - with tf.variable_scope('model', reuse=True, use_resource=False): - meval = Model(self.hparams, self.data_loader.num_classes, self.data_loader.image_size) - meval.build('eval') - self.m = m - self.meval = meval - - def _run_training_loop(self, curr_epoch): - """Trains the model `m` for one epoch.""" - start_time = time.time() - while True: - try: - train_accuracy = helper_utils.run_epoch_training( - self.session, self.m, self.data_loader, curr_epoch) - break - except (tf.errors.AbortedError, tf.errors.UnavailableError) as e: - tf.logging.info( - 'Retryable error caught: {}. Retrying.'.format(e)) - tf.logging.info('Finished epoch: {}'.format(curr_epoch)) - tf.logging.info('Epoch time(min): {}'.format( - (time.time() - start_time) / 60.0)) - return train_accuracy - - def _compute_final_accuracies(self, iteration): - """Run once training is finished to compute final test accuracy.""" - if (iteration >= self.hparams.num_epochs - 1): - test_accuracy = self.eval_child_model(self.meval, self.data_loader, - 'test') - else: - test_accuracy = 0 - tf.logging.info('Test Accuracy: {}'.format(test_accuracy)) - return test_accuracy - - def run_model(self, epoch): - """Trains and evalutes the image model.""" - valid_accuracy = 0. - training_accuracy = self._run_training_loop(epoch) - if self.hparams.validation_size > 0: - valid_accuracy = self.eval_child_model(self.meval, - self.data_loader, 'val') - tf.logging.info('Train Acc: {}, Valid Acc: {}'.format( - training_accuracy, valid_accuracy)) - return training_accuracy, valid_accuracy - - def reset_config(self, new_hparams): - self.hparams = new_hparams - self.data_loader.reset_policy(new_hparams) - return - - @property - def saver(self): - return self._saver - - @property - def session(self): - return self._session - - @property - def num_trainable_params(self): - return self._num_trainable_params diff --git a/Old/PBA/search.sh b/Old/PBA/search.sh deleted file mode 100755 index 08fa9c2..0000000 --- a/Old/PBA/search.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash -export PYTHONPATH="$(pwd)" - -cifar10_LeNet_search() { - local_dir="$PWD/results/" - data_path="$PWD/datasets/cifar-10-batches-py" - - python pba/search.py \ - --local_dir "$local_dir" \ - --model_name LeNet \ - --data_path "$data_path" --dataset cifar10 \ - --train_size 4000 --val_size 46000 \ - --checkpoint_freq 0 \ - --name "cifar10_search" --gpu 0.15 --cpu 2 \ - --num_samples 16 --perturbation_interval 3 --epochs 150 \ - --explore cifar10 --aug_policy cifar10 \ - --lr 0.1 --wd 0.0005 -} - -cifar10_search() { - local_dir="$PWD/results/" - data_path="$PWD/datasets/cifar-10-batches-py" - - python pba/search.py \ - --local_dir "$local_dir" \ - --model_name wrn_40_2 \ - --data_path "$data_path" --dataset cifar10 \ - --train_size 4000 --val_size 46000 \ - --checkpoint_freq 0 \ - --name "cifar10_search" --gpu 0.15 --cpu 2 \ - --num_samples 16 --perturbation_interval 3 --epochs 200 \ - --explore cifar10 --aug_policy cifar10 \ - --lr 0.1 --wd 0.0005 -} - -svhn_search() { - local_dir="$PWD/results/" - data_path="$PWD/datasets/" - - python pba/search.py \ - --local_dir "$local_dir" --data_path "$data_path" \ - --model_name wrn_40_2 --dataset svhn \ - --train_size 1000 --val_size 7325 \ - --checkpoint_freq 0 \ - --name "svhn_search" --gpu 0.19 --cpu 2 \ - --num_samples 16 --perturbation_interval 3 --epochs 160 \ - --explore cifar10 --aug_policy cifar10 --no_cutout \ - --lr 0.1 --wd 0.005 -} - -if [ "$1" = "rcifar10" ]; then - cifar10_search -elif [ "$1" = "rsvhn" ]; then - svhn_search -elif [ "$1" = "LeNet" ]; then - cifar10_LeNet_search -else - echo "invalid args" -fi diff --git a/Old/PBA/setup.py b/Old/PBA/setup.py deleted file mode 100755 index cc9b38b..0000000 --- a/Old/PBA/setup.py +++ /dev/null @@ -1,210 +0,0 @@ -"""Parse flags and set up hyperparameters.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import random -import tensorflow as tf - -from pba.augmentation_transforms_hp import NUM_HP_TRANSFORM - - -def create_parser(state): - """Create arg parser for flags.""" - parser = argparse.ArgumentParser() - parser.add_argument( - '--model_name', - default='wrn', - choices=('wrn_28_10', 'wrn_40_2', 'shake_shake_32', 'shake_shake_96', - 'shake_shake_112', 'pyramid_net', 'resnet', 'LeNet')) - parser.add_argument( - '--data_path', - default='/tmp/datasets/', - help='Directory where dataset is located.') - parser.add_argument( - '--dataset', - default='cifar10', - choices=('cifar10', 'cifar100', 'svhn', 'svhn-full', 'test')) - parser.add_argument( - '--recompute_dset_stats', - action='store_true', - help='Instead of using hardcoded mean/std, recompute from dataset.') - parser.add_argument('--local_dir', type=str, default='/tmp/ray_results/', help='Ray directory.') - parser.add_argument('--restore', type=str, default=None, help='If specified, tries to restore from given path.') - parser.add_argument('--train_size', type=int, default=5000, help='Number of training examples.') - parser.add_argument('--val_size', type=int, default=45000, help='Number of validation examples.') - parser.add_argument('--checkpoint_freq', type=int, default=50, help='Checkpoint frequency.') - parser.add_argument( - '--cpu', type=float, default=4, help='Allocated by Ray') - parser.add_argument( - '--gpu', type=float, default=1, help='Allocated by Ray') - parser.add_argument( - '--aug_policy', - type=str, - default='cifar10', - help= - 'which augmentation policy to use (in augmentation_transforms_hp.py)') - # search-use only - parser.add_argument( - '--explore', - type=str, - default='cifar10', - help='which explore function to use') - parser.add_argument( - '--epochs', - type=int, - default=0, - help='Number of epochs, or <=0 for default') - parser.add_argument( - '--no_cutout', action='store_true', help='turn off cutout') - parser.add_argument('--lr', type=float, default=0.1, help='learning rate') - parser.add_argument('--wd', type=float, default=0.0005, help='weight decay') - parser.add_argument('--bs', type=int, default=128, help='batch size') - parser.add_argument('--test_bs', type=int, default=25, help='test batch size') - parser.add_argument('--num_samples', type=int, default=1, help='Number of Ray samples') - - if state == 'train': - parser.add_argument( - '--use_hp_policy', - action='store_true', - help='otherwise use autoaug policy') - parser.add_argument( - '--hp_policy', - type=str, - default=None, - help='either a comma separated list of values or a file') - parser.add_argument( - '--hp_policy_epochs', - type=int, - default=200, - help='number of epochs/iterations policy trained for') - parser.add_argument( - '--no_aug', - action='store_true', - help= - 'no additional augmentation at all (besides cutout if not toggled)' - ) - parser.add_argument( - '--flatten', - action='store_true', - help='randomly select aug policy from schedule') - parser.add_argument('--name', type=str, default='autoaug') - - elif state == 'search': - parser.add_argument('--perturbation_interval', type=int, default=10) - parser.add_argument('--name', type=str, default='autoaug_pbt') - else: - raise ValueError('unknown state') - args = parser.parse_args() - tf.logging.info(str(args)) - return args - - -def create_hparams(state, FLAGS): # pylint: disable=invalid-name - """Creates hyperparameters to pass into Ray config. - - Different options depending on search or eval mode. - - Args: - state: a string, 'train' or 'search'. - FLAGS: parsed command line flags. - - Returns: - tf.hparams object. - """ - epochs = 0 - tf.logging.info('data path: {}'.format(FLAGS.data_path)) - hparams = tf.contrib.training.HParams( - train_size=FLAGS.train_size, - validation_size=FLAGS.val_size, - dataset=FLAGS.dataset, - data_path=FLAGS.data_path, - batch_size=FLAGS.bs, - gradient_clipping_by_global_norm=5.0, - explore=FLAGS.explore, - aug_policy=FLAGS.aug_policy, - no_cutout=FLAGS.no_cutout, - recompute_dset_stats=FLAGS.recompute_dset_stats, - lr=FLAGS.lr, - weight_decay_rate=FLAGS.wd, - test_batch_size=FLAGS.test_bs) - - if state == 'train': - hparams.add_hparam('no_aug', FLAGS.no_aug) - hparams.add_hparam('use_hp_policy', FLAGS.use_hp_policy) - if FLAGS.use_hp_policy: - if FLAGS.hp_policy == 'random': - tf.logging.info('RANDOM SEARCH') - parsed_policy = [] - for i in range(NUM_HP_TRANSFORM * 4): - if i % 2 == 0: - parsed_policy.append(random.randint(0, 10)) - else: - parsed_policy.append(random.randint(0, 9)) - elif FLAGS.hp_policy.endswith('.txt') or FLAGS.hp_policy.endswith( - '.p'): - # will be loaded in in data_utils - parsed_policy = FLAGS.hp_policy - else: - # parse input into a fixed augmentation policy - parsed_policy = FLAGS.hp_policy.split(', ') - parsed_policy = [int(p) for p in parsed_policy] - hparams.add_hparam('hp_policy', parsed_policy) - hparams.add_hparam('hp_policy_epochs', FLAGS.hp_policy_epochs) - hparams.add_hparam('flatten', FLAGS.flatten) - elif state == 'search': - hparams.add_hparam('no_aug', False) - hparams.add_hparam('use_hp_policy', True) - # default start value of 0 - hparams.add_hparam('hp_policy', - [0 for _ in range(4 * NUM_HP_TRANSFORM)]) - else: - raise ValueError('unknown state') - - if FLAGS.model_name == 'wrn_40_2': - hparams.add_hparam('model_name', 'wrn') - epochs = 200 - hparams.add_hparam('wrn_size', 32) - hparams.add_hparam('wrn_depth', 40) - elif FLAGS.model_name == 'wrn_28_10': - hparams.add_hparam('model_name', 'wrn') - epochs = 200 - hparams.add_hparam('wrn_size', 160) - hparams.add_hparam('wrn_depth', 28) - elif FLAGS.model_name == 'resnet': - hparams.add_hparam('model_name', 'resnet') - epochs = 200 - hparams.add_hparam('resnet_size', 20) - hparams.add_hparam('num_filters', 32) - elif FLAGS.model_name == 'shake_shake_32': - hparams.add_hparam('model_name', 'shake_shake') - epochs = 1800 - hparams.add_hparam('shake_shake_widen_factor', 2) - elif FLAGS.model_name == 'shake_shake_96': - hparams.add_hparam('model_name', 'shake_shake') - epochs = 1800 - hparams.add_hparam('shake_shake_widen_factor', 6) - elif FLAGS.model_name == 'shake_shake_112': - hparams.add_hparam('model_name', 'shake_shake') - epochs = 1800 - hparams.add_hparam('shake_shake_widen_factor', 7) - elif FLAGS.model_name == 'pyramid_net': - hparams.add_hparam('model_name', 'pyramid_net') - epochs = 1800 - hparams.set_hparam('batch_size', 64) - - elif FLAGS.model_name == 'LeNet': - hparams.add_hparam('model_name', 'LeNet') - epochs = 200 - - else: - raise ValueError('Not Valid Model Name: %s' % FLAGS.model_name) - if FLAGS.epochs > 0: - tf.logging.info('overwriting with custom epochs') - epochs = FLAGS.epochs - hparams.add_hparam('num_epochs', epochs) - tf.logging.info('epochs: {}, lr: {}, wd: {}'.format( - hparams.num_epochs, hparams.lr, hparams.weight_decay_rate)) - return hparams diff --git a/Old/PBA/table_1_cifar10.sh b/Old/PBA/table_1_cifar10.sh deleted file mode 100755 index 4d35bd6..0000000 --- a/Old/PBA/table_1_cifar10.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -export PYTHONPATH="$(pwd)" - -# args: [model name] [lr] [wd] #Learning rate / weight decay -eval_cifar10() { - hp_policy="$PWD/schedules/rcifar10_16_wrn.txt" - local_dir="$PWD/results/" - data_path="$PWD/datasets/cifar-10-batches-py" - - size=50000 - dataset="cifar10" - name="eval_cifar10_$1" # has 8 cutout size - - python pba/train.py \ - --local_dir "$local_dir" --data_path "$data_path" \ - --model_name "$1" --dataset "$dataset" \ - --train_size "$size" --val_size 0 \ - --checkpoint_freq 25 --gpu 1 --cpu 4 \ - --use_hp_policy --hp_policy "$hp_policy" \ - --hp_policy_epochs 200 \ - --aug_policy cifar10 --name "$name" \ - --lr "$2" --wd "$3" -} - -if [ "$@" = "wrn_28_10" ]; then - eval_cifar10 wrn_28_10 0.1 0.0005 -elif [ "$@" = "ss_32" ]; then - eval_cifar10 shake_shake_32 0.01 0.001 -elif [ "$@" = "ss_96" ]; then - eval_cifar10 shake_shake_96 0.01 0.001 -elif [ "$@" = "ss_112" ]; then - eval_cifar10 shake_shake_112 0.01 0.001 -elif [ "$@" = "pyramid_net" ]; then - eval_cifar10 pyramid_net 0.05 0.00005 - -elif [ "$@" = "LeNet" ]; then - eval_cifar10 LeNet 0.05 0.0 - -else - echo "invalid args" -fi diff --git a/Old/UDA/LeNet.py b/Old/UDA/LeNet.py deleted file mode 100755 index 7a411b6..0000000 --- a/Old/UDA/LeNet.py +++ /dev/null @@ -1,73 +0,0 @@ -import numpy as np -import tensorflow as tf - -## build the neural network class -# weight initialization -def weight_variable(shape, name = None): - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial, name = name) - -# bias initialization -def bias_variable(shape, name = None): - initial = tf.constant(0.1, shape=shape) # positive bias - return tf.Variable(initial, name = name) - -# 2D convolution -def conv2d(x, W, name = None): - return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID', name = name) - -# max pooling -def max_pool_2x2(x, name = None): - return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], - padding='SAME', name = name) - -def LeNet(images, num_classes): - # tunable hyperparameters for nn architecture - s_f_conv1 = 5; # filter size of first convolution layer (default = 3) - n_f_conv1 = 20; # number of features of first convolution layer (default = 36) - s_f_conv2 = 5; # filter size of second convolution layer (default = 3) - n_f_conv2 = 50; # number of features of second convolution layer (default = 36) - n_n_fc1 = 500; # number of neurons of first fully connected layer (default = 576) - n_n_fc2 = 500; # number of neurons of first fully connected layer (default = 576) - - #print(images.shape) - # 1.layer: convolution + max pooling - W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, int(images.shape[3]), n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32) - b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32) - h_conv1_tf = tf.nn.relu(conv2d(images, W_conv1_tf) + b_conv1_tf, name = 'h_conv1_tf') # (.,28,28,32) - h_pool1_tf = max_pool_2x2(h_conv1_tf, name = 'h_pool1_tf') # (.,14,14,32) - #print(h_conv1_tf.shape) - #print(h_pool1_tf.shape) - # 2.layer: convolution + max pooling - W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2, n_f_conv1, n_f_conv2], name = 'W_conv2_tf') - b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf') - h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf, W_conv2_tf) + b_conv2_tf, name ='h_conv2_tf') #(.,14,14,32) - h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32) - - #print(h_pool2_tf.shape) - - # 4.layer: fully connected - W_fc1_tf = weight_variable([5*5*n_f_conv2,n_n_fc1], name = 'W_fc1_tf') # (4*4*32, 1024) - b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024) - h_pool2_flat_tf = tf.reshape(h_pool2_tf, [int(h_pool2_tf.shape[0]), -1], name = 'h_pool3_flat_tf') # (.,1024) - h_fc1_tf = tf.nn.relu(tf.matmul(h_pool2_flat_tf, W_fc1_tf) + b_fc1_tf, - name = 'h_fc1_tf') # (.,1024) - - # add dropout - #keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf') - #h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf') - print(h_fc1_tf.shape) - - # 5.layer: fully connected - W_fc2_tf = weight_variable([n_n_fc1, num_classes], name = 'W_fc2_tf') - b_fc2_tf = bias_variable([num_classes], name = 'b_fc2_tf') - z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf), b_fc2_tf, name = 'z_pred_tf')# => (.,10) - # predicted probabilities in one-hot encoding - #y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf') - - # tensor of correct predictions - #y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1), - # tf.argmax(y_data_tf, 1), - # name = 'y_pred_correct_tf') - logits = z_pred_tf - return logits #y_pred_proba_tf diff --git a/Old/UDA/main.py b/Old/UDA/main.py deleted file mode 100755 index 04f8950..0000000 --- a/Old/UDA/main.py +++ /dev/null @@ -1,620 +0,0 @@ -# coding=utf-8 -# Copyright 2019 The Google UDA Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""UDA on CIFAR-10 and SVHN. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import contextlib -import os -import time -import json - -import numpy as np - -from absl import flags -import absl.logging as _logging # pylint: disable=unused-import - -import tensorflow as tf - -from randaugment import custom_ops as ops -import data -import utils - -from randaugment.wrn import build_wrn_model -from randaugment.shake_drop import build_shake_drop_model -from randaugment.shake_shake import build_shake_shake_model - -from randaugment.LeNet import LeNet - - -# TPU related -flags.DEFINE_string( - "master", default=None, - help="the TPU address. This should be set when using Cloud TPU") -flags.DEFINE_string( - "tpu", default=None, - help="The Cloud TPU to use for training. This should be either the name " - "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 url.") -flags.DEFINE_string( - "gcp_project", default=None, - help="Project name for the Cloud TPU-enabled project. If not specified, " - "we will attempt to automatically detect the GCE project from metadata.") -flags.DEFINE_string( - "tpu_zone", default=None, - help="GCE zone where the Cloud TPU is located in. If not specified, we " - "will attempt to automatically detect the GCE project from metadata.") -flags.DEFINE_bool( - "use_tpu", default=False, - help="Use TPUs rather than GPU/CPU.") -flags.DEFINE_enum( - "task_name", "cifar10", - enum_values=["cifar10", "svhn"], - help="The task to use") - -# UDA config: -flags.DEFINE_integer( - "sup_size", default=4000, - help="Number of supervised pairs to use. " - "-1: all training samples. 4000: 4000 supervised examples.") -flags.DEFINE_integer( - "aug_copy", default=0, - help="Number of different augmented data generated.") -flags.DEFINE_integer( - "unsup_ratio", default=0, - help="The ratio between batch size of unlabeled data and labeled data, " - "i.e., unsup_ratio * train_batch_size is the batch_size for unlabeled data." - "Do not use the unsupervised objective if set to 0.") -flags.DEFINE_enum( - "tsa", "", - enum_values=["", "linear_schedule", "log_schedule", "exp_schedule"], - help="anneal schedule of training signal annealing. " - "tsa='' means not using TSA. See the paper for other schedules.") -flags.DEFINE_float( - "uda_confidence_thresh", default=-1, - help="The threshold on predicted probability on unsupervised data. If set," - "UDA loss will only be calculated on unlabeled examples whose largest" - "probability is larger than the threshold") -flags.DEFINE_float( - "uda_softmax_temp", -1, - help="The temperature of the Softmax when making prediction on unlabeled" - "examples. -1 means to use normal Softmax") -flags.DEFINE_float( - "ent_min_coeff", default=0, - help="") -flags.DEFINE_integer( - "unsup_coeff", default=1, - help="The coefficient on the UDA loss. " - "setting unsup_coeff to 1 works for most settings. " - "When you have extermely few samples, consider increasing unsup_coeff") - -# Experiment (data/checkpoint/directory) config -flags.DEFINE_string( - "data_dir", default=None, - help="Path to data directory containing `*.tfrecords`.") -flags.DEFINE_string( - "model_dir", default=None, - help="model dir of the saved checkpoints.") -flags.DEFINE_bool( - "do_train", default=True, - help="Whether to run training.") -flags.DEFINE_bool( - "do_eval", default=False, - help="Whether to run eval on the test set.") -flags.DEFINE_integer( - "dev_size", default=-1, - help="dev set size.") -flags.DEFINE_bool( - "verbose", default=False, - help="Whether to print additional information.") - -# Training config -flags.DEFINE_integer( - "train_batch_size", default=32, - help="Size of train batch.") -flags.DEFINE_integer( - "eval_batch_size", default=8, - help="Size of evalation batch.") -flags.DEFINE_integer( - "train_steps", default=100000, - help="Total number of training steps.") -flags.DEFINE_integer( - "iterations", default=10000, - help="Number of iterations per repeat loop.") -flags.DEFINE_integer( - "save_steps", default=10000, - help="number of steps for model checkpointing.") -flags.DEFINE_integer( - "max_save", default=10, - help="Maximum number of checkpoints to save.") - -# Model config -flags.DEFINE_enum( - "model_name", default="wrn", - enum_values=["wrn", "shake_shake_32", "shake_shake_96", "shake_shake_112", "pyramid_net", "LeNet"], - help="Name of the model") -flags.DEFINE_integer( - "num_classes", default=10, - help="Number of categories for classification.") -flags.DEFINE_integer( - "wrn_size", default=32, - help="The size of WideResNet. It should be set to 32 for WRN-28-2" - "and should be set to 160 for WRN-28-10") - -# Optimization config -flags.DEFINE_float( - "learning_rate", default=0.03, - help="Maximum learning rate.") -flags.DEFINE_float( - "weight_decay_rate", default=5e-4, - help="Weight decay rate.") -flags.DEFINE_float( - "min_lr_ratio", default=0.004, - help="Minimum ratio learning rate.") -flags.DEFINE_integer( - "warmup_steps", default=20000, - help="Number of steps for linear lr warmup.") - - - -FLAGS = tf.flags.FLAGS - -arg_scope = tf.contrib.framework.arg_scope - - -def get_tsa_threshold(schedule, global_step, num_train_steps, start, end): - step_ratio = tf.to_float(global_step) / tf.to_float(num_train_steps) - if schedule == "linear_schedule": - coeff = step_ratio - elif schedule == "exp_schedule": - scale = 5 - # [exp(-5), exp(0)] = [1e-2, 1] - coeff = tf.exp((step_ratio - 1) * scale) - elif schedule == "log_schedule": - scale = 5 - # [1 - exp(0), 1 - exp(-5)] = [0, 0.99] - coeff = 1 - tf.exp((-step_ratio) * scale) - return coeff * (end - start) + start - - -def setup_arg_scopes(is_training): - """Sets up the argscopes that will be used when building an image model. - - Args: - is_training: Is the model training or not. - - Returns: - Arg scopes to be put around the model being constructed. - """ - - batch_norm_decay = 0.9 - batch_norm_epsilon = 1e-5 - batch_norm_params = { - # Decay for the moving averages. - "decay": batch_norm_decay, - # epsilon to prevent 0s in variance. - "epsilon": batch_norm_epsilon, - "scale": True, - # collection containing the moving mean and moving variance. - "is_training": is_training, - } - - scopes = [] - - scopes.append(arg_scope([ops.batch_norm], **batch_norm_params)) - return scopes - - -def build_model(inputs, num_classes, is_training, update_bn, hparams): - """Constructs the vision model being trained/evaled. - - Args: - inputs: input features/images being fed to the image model build built. - num_classes: number of output classes being predicted. - is_training: is the model training or not. - hparams: additional hyperparameters associated with the image model. - - Returns: - The logits of the image model. - """ - scopes = setup_arg_scopes(is_training) - - try: - from contextlib import nested - except ImportError: - from contextlib import ExitStack, contextmanager - - @contextmanager - def nested(*contexts): - with ExitStack() as stack: - for ctx in contexts: - stack.enter_context(ctx) - yield contexts - - with nested(*scopes): - if hparams.model_name == "pyramid_net": - logits = build_shake_drop_model( - inputs, num_classes, is_training) - elif hparams.model_name == "wrn": - logits = build_wrn_model( - inputs, num_classes, hparams.wrn_size, update_bn) - elif hparams.model_name == "shake_shake": - logits = build_shake_shake_model( - inputs, num_classes, hparams, is_training) - - elif hparams.model_name == "LeNet": - logits = LeNet(inputs, num_classes) - - return logits - - -def _kl_divergence_with_logits(p_logits, q_logits): - p = tf.nn.softmax(p_logits) - log_p = tf.nn.log_softmax(p_logits) - log_q = tf.nn.log_softmax(q_logits) - - kl = tf.reduce_sum(p * (log_p - log_q), -1) - return kl - - -def anneal_sup_loss(sup_logits, sup_labels, sup_loss, global_step, metric_dict): - tsa_start = 1. / FLAGS.num_classes - eff_train_prob_threshold = get_tsa_threshold( - FLAGS.tsa, global_step, FLAGS.train_steps, - tsa_start, end=1) - - one_hot_labels = tf.one_hot( - sup_labels, depth=FLAGS.num_classes, dtype=tf.float32) - sup_probs = tf.nn.softmax(sup_logits, axis=-1) - correct_label_probs = tf.reduce_sum( - one_hot_labels * sup_probs, axis=-1) - larger_than_threshold = tf.greater( - correct_label_probs, eff_train_prob_threshold) - loss_mask = 1 - tf.cast(larger_than_threshold, tf.float32) - loss_mask = tf.stop_gradient(loss_mask) - sup_loss = sup_loss * loss_mask - avg_sup_loss = (tf.reduce_sum(sup_loss) / - tf.maximum(tf.reduce_sum(loss_mask), 1)) - metric_dict["sup/sup_trained_ratio"] = tf.reduce_mean(loss_mask) - metric_dict["sup/eff_train_prob_threshold"] = eff_train_prob_threshold - return sup_loss, avg_sup_loss - - -def get_ent(logits, return_mean=True): - log_prob = tf.nn.log_softmax(logits, axis=-1) - prob = tf.exp(log_prob) - ent = tf.reduce_sum(-prob * log_prob, axis=-1) - if return_mean: - ent = tf.reduce_mean(ent) - return ent - - -def get_model_fn(hparams): - def model_fn(features, labels, mode, params): - sup_labels = tf.reshape(features["label"], [-1]) - - #### Configuring the optimizer - global_step = tf.train.get_global_step() - metric_dict = {} - is_training = (mode == tf.estimator.ModeKeys.TRAIN) - if FLAGS.unsup_ratio > 0 and is_training: - all_images = tf.concat([features["image"], - features["ori_image"], - features["aug_image"]], 0) - else: - all_images = features["image"] - - with tf.variable_scope("model", reuse=tf.AUTO_REUSE): - all_logits = build_model( - inputs=all_images, - num_classes=FLAGS.num_classes, - is_training=is_training, - update_bn=True and is_training, - hparams=hparams, - ) - - sup_bsz = tf.shape(features["image"])[0] - sup_logits = all_logits[:sup_bsz] - - sup_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=sup_labels, - logits=sup_logits) - sup_prob = tf.nn.softmax(sup_logits, axis=-1) - metric_dict["sup/pred_prob"] = tf.reduce_mean( - tf.reduce_max(sup_prob, axis=-1)) - if FLAGS.tsa: - sup_loss, avg_sup_loss = anneal_sup_loss(sup_logits, sup_labels, sup_loss, - global_step, metric_dict) - else: - avg_sup_loss = tf.reduce_mean(sup_loss) - total_loss = avg_sup_loss - - if FLAGS.unsup_ratio > 0 and is_training: - aug_bsz = tf.shape(features["ori_image"])[0] - - ori_logits = all_logits[sup_bsz : sup_bsz + aug_bsz] - aug_logits = all_logits[sup_bsz + aug_bsz:] - if FLAGS.uda_softmax_temp != -1: - ori_logits_tgt = ori_logits / FLAGS.uda_softmax_temp - else: - ori_logits_tgt = ori_logits - ori_prob = tf.nn.softmax(ori_logits, axis=-1) - aug_prob = tf.nn.softmax(aug_logits, axis=-1) - metric_dict["unsup/ori_prob"] = tf.reduce_mean( - tf.reduce_max(ori_prob, axis=-1)) - metric_dict["unsup/aug_prob"] = tf.reduce_mean( - tf.reduce_max(aug_prob, axis=-1)) - - aug_loss = _kl_divergence_with_logits( - p_logits=tf.stop_gradient(ori_logits_tgt), - q_logits=aug_logits) - - if FLAGS.uda_confidence_thresh != -1: - ori_prob = tf.nn.softmax(ori_logits, axis=-1) - largest_prob = tf.reduce_max(ori_prob, axis=-1) - loss_mask = tf.cast(tf.greater( - largest_prob, FLAGS.uda_confidence_thresh), tf.float32) - metric_dict["unsup/high_prob_ratio"] = tf.reduce_mean(loss_mask) - loss_mask = tf.stop_gradient(loss_mask) - aug_loss = aug_loss * loss_mask - metric_dict["unsup/high_prob_loss"] = tf.reduce_mean(aug_loss) - - if FLAGS.ent_min_coeff > 0: - ent_min_coeff = FLAGS.ent_min_coeff - metric_dict["unsup/ent_min_coeff"] = ent_min_coeff - per_example_ent = get_ent(ori_logits) - ent_min_loss = tf.reduce_mean(per_example_ent) - total_loss = total_loss + ent_min_coeff * ent_min_loss - - avg_unsup_loss = tf.reduce_mean(aug_loss) - total_loss += FLAGS.unsup_coeff * avg_unsup_loss - metric_dict["unsup/loss"] = avg_unsup_loss - - total_loss = utils.decay_weights( - total_loss, - FLAGS.weight_decay_rate) - - #### Check model parameters - num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()]) - tf.logging.info("#params: {}".format(num_params)) - - if FLAGS.verbose: - format_str = "{{:<{0}s}}\t{{}}".format( - max([len(v.name) for v in tf.trainable_variables()])) - for v in tf.trainable_variables(): - tf.logging.info(format_str.format(v.name, v.get_shape())) - - #### Evaluation mode - if mode == tf.estimator.ModeKeys.EVAL: - #### Metric function for classification - def metric_fn(per_example_loss, label_ids, logits): - # classification loss & accuracy - loss = tf.metrics.mean(per_example_loss) - - predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) - accuracy = tf.metrics.accuracy(label_ids, predictions) - - ret_dict = { - "eval/classify_loss": loss, - "eval/classify_accuracy": accuracy - } - - return ret_dict - - eval_metrics = (metric_fn, [sup_loss, sup_labels, sup_logits]) - - #### Constucting evaluation TPUEstimatorSpec. - eval_spec = tf.contrib.tpu.TPUEstimatorSpec( - mode=mode, - loss=total_loss, - eval_metrics=eval_metrics) - - return eval_spec - - # increase the learning rate linearly - if FLAGS.warmup_steps > 0: - warmup_lr = tf.to_float(global_step) / tf.to_float(FLAGS.warmup_steps) \ - * FLAGS.learning_rate - else: - warmup_lr = 0.0 - - # decay the learning rate using the cosine schedule - decay_lr = tf.train.cosine_decay( - FLAGS.learning_rate, - global_step=global_step-FLAGS.warmup_steps, - decay_steps=FLAGS.train_steps-FLAGS.warmup_steps, - alpha=FLAGS.min_lr_ratio) - - learning_rate = tf.where(global_step < FLAGS.warmup_steps, - warmup_lr, decay_lr) - - optimizer = tf.train.MomentumOptimizer( - learning_rate=learning_rate, - momentum=0.9, - use_nesterov=True) - - if FLAGS.use_tpu: - optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer) - - grads_and_vars = optimizer.compute_gradients(total_loss) - gradients, variables = zip(*grads_and_vars) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - with tf.control_dependencies(update_ops): - train_op = optimizer.apply_gradients( - zip(gradients, variables), global_step=tf.train.get_global_step()) - - #### Creating training logging hook - # compute accuracy - sup_pred = tf.argmax(sup_logits, axis=-1, output_type=sup_labels.dtype) - is_correct = tf.to_float(tf.equal(sup_pred, sup_labels)) - acc = tf.reduce_mean(is_correct) - metric_dict["sup/sup_loss"] = avg_sup_loss - metric_dict["training/loss"] = total_loss - metric_dict["sup/acc"] = acc - metric_dict["training/lr"] = learning_rate - metric_dict["training/step"] = global_step - - if not FLAGS.use_tpu: - log_info = ("step [{training/step}] lr {training/lr:.6f} " - "loss {training/loss:.4f} " - "sup/acc {sup/acc:.4f} sup/loss {sup/sup_loss:.6f} ") - if FLAGS.unsup_ratio > 0: - log_info += "unsup/loss {unsup/loss:.6f} " - formatter = lambda kwargs: log_info.format(**kwargs) - logging_hook = tf.train.LoggingTensorHook( - tensors=metric_dict, - every_n_iter=FLAGS.iterations, - formatter=formatter) - training_hooks = [logging_hook] - #### Constucting training TPUEstimatorSpec. - train_spec = tf.contrib.tpu.TPUEstimatorSpec( - mode=mode, loss=total_loss, train_op=train_op, - training_hooks=training_hooks) - else: - #### Constucting training TPUEstimatorSpec. - host_call = utils.construct_scalar_host_call( - metric_dict=metric_dict, - model_dir=params["model_dir"], - prefix="", - reduce_fn=tf.reduce_mean) - train_spec = tf.contrib.tpu.TPUEstimatorSpec( - mode=mode, loss=total_loss, train_op=train_op, - host_call=host_call) - - return train_spec - - return model_fn - - -def train(hparams): - ##### Create input function - if FLAGS.unsup_ratio == 0: - FLAGS.aug_copy = 0 - if FLAGS.dev_size != -1: - FLAGS.do_train = True - FLAGS.do_eval = True - if FLAGS.do_train: - train_input_fn = data.get_input_fn( - data_dir=FLAGS.data_dir, - split="train", - task_name=FLAGS.task_name, - sup_size=FLAGS.sup_size, - unsup_ratio=FLAGS.unsup_ratio, - aug_copy=FLAGS.aug_copy, - ) - - if FLAGS.do_eval: - if FLAGS.dev_size != -1: - eval_input_fn = data.get_input_fn( - data_dir=FLAGS.data_dir, - split="dev", - task_name=FLAGS.task_name, - sup_size=FLAGS.dev_size, - unsup_ratio=0, - aug_copy=0) - eval_size = FLAGS.dev_size - else: - eval_input_fn = data.get_input_fn( - data_dir=FLAGS.data_dir, - split="test", - task_name=FLAGS.task_name, - sup_size=-1, - unsup_ratio=0, - aug_copy=0) - if FLAGS.task_name == "cifar10": - eval_size = 10000 - elif FLAGS.task_name == "svhn": - eval_size = 26032 - else: - assert False, "You need to specify the size of your test set." - eval_steps = eval_size // FLAGS.eval_batch_size - - ##### Get model function - model_fn = get_model_fn(hparams) - estimator = utils.get_TPU_estimator(FLAGS, model_fn) - - #### Training - if FLAGS.dev_size != -1: - tf.logging.info("***** Running training and validation *****") - tf.logging.info(" Supervised batch size = %d", FLAGS.train_batch_size) - tf.logging.info(" Unsupervised batch size = %d", - FLAGS.train_batch_size * FLAGS.unsup_ratio) - tf.logging.info(" Num train steps = %d", FLAGS.train_steps) - curr_step = 0 - while True: - if curr_step >= FLAGS.train_steps: - break - tf.logging.info("Current step {}".format(curr_step)) - train_step = min(FLAGS.save_steps, FLAGS.train_steps - curr_step) - estimator.train(input_fn=train_input_fn, steps=train_step) - estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) - curr_step += FLAGS.save_steps - else: - if FLAGS.do_train: - tf.logging.info("***** Running training *****") - tf.logging.info(" Supervised batch size = %d", FLAGS.train_batch_size) - tf.logging.info(" Unsupervised batch size = %d", - FLAGS.train_batch_size * FLAGS.unsup_ratio) - estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) - if FLAGS.do_eval: - tf.logging.info("***** Running evaluation *****") - results = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) - tf.logging.info(">> Results:") - for key in results.keys(): - tf.logging.info(" %s = %s", key, str(results[key])) - results[key] = results[key].item() - acc = results["eval/classify_accuracy"] - with tf.gfile.Open("{}/results.txt".format(FLAGS.model_dir), "w") as ouf: - ouf.write(str(acc)) - - -def main(_): - - if FLAGS.do_train: - tf.gfile.MakeDirs(FLAGS.model_dir) - flags_dict = tf.app.flags.FLAGS.flag_values_dict() - with tf.gfile.Open(os.path.join(FLAGS.model_dir, "FLAGS.json"), "w") as ouf: - json.dump(flags_dict, ouf) - hparams = tf.contrib.training.HParams() - - if FLAGS.model_name == "wrn": - hparams.add_hparam("model_name", "wrn") - hparams.add_hparam("wrn_size", FLAGS.wrn_size) - elif FLAGS.model_name == "shake_shake_32": - hparams.add_hparam("model_name", "shake_shake") - hparams.add_hparam("shake_shake_widen_factor", 2) - elif FLAGS.model_name == "shake_shake_96": - hparams.add_hparam("model_name", "shake_shake") - hparams.add_hparam("shake_shake_widen_factor", 6) - elif FLAGS.model_name == "shake_shake_112": - hparams.add_hparam("model_name", "shake_shake") - hparams.add_hparam("shake_shake_widen_factor", 7) - elif FLAGS.model_name == "pyramid_net": - hparams.add_hparam("model_name", "pyramid_net") - - elif FLAGS.model_name == "LeNet": - hparams.add_hparam("model_name", "LeNet") - - else: - raise ValueError("Not Valid Model Name: %s" % FLAGS.model_name) - - train(hparams) - - -if __name__ == "__main__": - tf.logging.set_verbosity(tf.logging.INFO) - tf.app.run() diff --git a/Old/UDA/run_cifar10_gpu.sh b/Old/UDA/run_cifar10_gpu.sh deleted file mode 100755 index 5ccebd7..0000000 --- a/Old/UDA/run_cifar10_gpu.sh +++ /dev/null @@ -1,31 +0,0 @@ -# coding=utf-8 -# Copyright 2019 The Google UDA Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#!/bin/bash - -task_name=cifar10 - -python main.py \ - --model_name="LeNet"\ - --use_tpu=False \ - --do_train=True \ - --do_eval=True \ - --task_name=${task_name} \ - --sup_size=4000 \ - --unsup_ratio=5 \ - --train_batch_size=32 \ - --data_dir=data/proc_data/${task_name} \ - --model_dir=ckpt/cifar10_gpu \ - --train_steps=400000 \ - $@ diff --git a/Old/augmentations_randaugment.py b/Old/augmentations_randaugment.py deleted file mode 100755 index b491942..0000000 --- a/Old/augmentations_randaugment.py +++ /dev/null @@ -1,271 +0,0 @@ -# code in this file is adpated from rpmcruz/autoaugment -# https://github.com/rpmcruz/autoaugment/blob/master/transformations.py -import random - -import PIL, PIL.ImageOps, PIL.ImageEnhance, PIL.ImageDraw -import numpy as np -import torch -from PIL import Image - -def ShearX(img, v): # [-0.3, 0.3] - assert -0.3 <= v <= 0.3 - if random.random() > 0.5: - v = -v - return img.transform(img.size, PIL.Image.AFFINE, (1, v, 0, 0, 1, 0)) - - -def ShearY(img, v): # [-0.3, 0.3] - assert -0.3 <= v <= 0.3 - if random.random() > 0.5: - v = -v - return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, v, 1, 0)) - - -def TranslateX(img, v): # [-150, 150] => percentage: [-0.45, 0.45] - assert -0.45 <= v <= 0.45 - if random.random() > 0.5: - v = -v - v = v * img.size[0] - return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0)) - - -def TranslateXabs(img, v): # [-150, 150] => percentage: [-0.45, 0.45] - assert 0 <= v - if random.random() > 0.5: - v = -v - return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0)) - - -def TranslateY(img, v): # [-150, 150] => percentage: [-0.45, 0.45] - assert -0.45 <= v <= 0.45 - if random.random() > 0.5: - v = -v - v = v * img.size[1] - return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v)) - - -def TranslateYabs(img, v): # [-150, 150] => percentage: [-0.45, 0.45] - assert 0 <= v - if random.random() > 0.5: - v = -v - return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v)) - - -def Rotate(img, v): # [-30, 30] - assert -30 <= v <= 30 - if random.random() > 0.5: - v = -v - return img.rotate(v) - - -def AutoContrast(img, _): - return PIL.ImageOps.autocontrast(img) - - -def Invert(img, _): - return PIL.ImageOps.invert(img) - - -def Equalize(img, _): - return PIL.ImageOps.equalize(img) - - -def Flip(img, _): # not from the paper - return PIL.ImageOps.mirror(img) - -def FlipLR(img, v): - return img.transpose(Image.FLIP_LEFT_RIGHT) - -def FlipUD(img, v): - return img.transpose(Image.FLIP_TOP_BOTTOM) - -def Solarize(img, v): # [0, 256] - assert 0 <= v <= 256 - return PIL.ImageOps.solarize(img, v) - - -def SolarizeAdd(img, addition=0, threshold=128): - img_np = np.array(img).astype(np.int) - img_np = img_np + addition - img_np = np.clip(img_np, 0, 255) - img_np = img_np.astype(np.uint8) - img = Image.fromarray(img_np) - return PIL.ImageOps.solarize(img, threshold) - - -def Posterize(img, v): # [4, 8] - v = int(v) - v = max(1, v) - return PIL.ImageOps.posterize(img, v) - - -def Contrast(img, v): # [0.1,1.9] - assert 0.1 <= v <= 1.9 - return PIL.ImageEnhance.Contrast(img).enhance(v) - - -def Color(img, v): # [0.1,1.9] - assert 0.1 <= v <= 1.9 - return PIL.ImageEnhance.Color(img).enhance(v) - - -def Brightness(img, v): # [0.1,1.9] - assert 0.1 <= v <= 1.9 - return PIL.ImageEnhance.Brightness(img).enhance(v) - - -def Sharpness(img, v): # [0.1,1.9] - assert 0.1 <= v <= 1.9 - return PIL.ImageEnhance.Sharpness(img).enhance(v) - - -def Cutout(img, v): # [0, 60] => percentage: [0, 0.2] - assert 0.0 <= v <= 0.2 - if v <= 0.: - return img - - v = v * img.size[0] - return CutoutAbs(img, v) - - -def CutoutAbs(img, v): # [0, 60] => percentage: [0, 0.2] - # assert 0 <= v <= 20 - if v < 0: - return img - w, h = img.size - x0 = np.random.uniform(w) - y0 = np.random.uniform(h) - - x0 = int(max(0, x0 - v / 2.)) - y0 = int(max(0, y0 - v / 2.)) - x1 = min(w, x0 + v) - y1 = min(h, y0 + v) - - xy = (x0, y0, x1, y1) - color = (125, 123, 114) - # color = (0, 0, 0) - img = img.copy() - PIL.ImageDraw.Draw(img).rectangle(xy, color) - return img - - -def SamplePairing(imgs): # [0, 0.4] - def f(img1, v): - i = np.random.choice(len(imgs)) - img2 = PIL.Image.fromarray(imgs[i]) - return PIL.Image.blend(img1, img2, v) - - return f - - -def Identity(img, v): - return img - - -def augment_list(): # 16 oeprations and their ranges - # https://github.com/google-research/uda/blob/master/image/randaugment/policies.py#L57 - l = [ - (Identity, 0., 1.0), - (FlipUD, 0., 1.0), - (FlipLR, 0., 1.0), - (Rotate, 0, 30), # 4 - (TranslateX, 0., 0.33), # 2 - (TranslateY, 0., 0.33), # 3 - (ShearX, 0., 0.3), # 0 - (ShearY, 0., 0.3), # 1 - #(AutoContrast, 0, 1), # 5 - #(Invert, 0, 1), # 6 - #(Equalize, 0, 1), # 7 - (Contrast, 0.1, 1.9), # 10 - (Color, 0.1, 1.9), # 11 - (Brightness, 0.1, 1.9), # 12 - (Sharpness, 0.1, 1.9), # 13 - (Posterize, 4, 8), # 9 - (Solarize, 1, 256), # 8 - - # (Cutout, 0, 0.2), # 14 - # (SamplePairing(imgs), 0, 0.4), # 15 - ] - - # https://github.com/tensorflow/tpu/blob/8462d083dd89489a79e3200bcc8d4063bf362186/models/official/efficientnet/autoaugment.py#L505 - #l = [ - # (AutoContrast, 0, 1), - # (Equalize, 0, 1), - # (Invert, 0, 1), - # (Rotate, 0, 30), - # (Posterize, 0, 4), - # (Solarize, 0, 256), - # (SolarizeAdd, 0, 110), - # (Color, 0.1, 1.9), - # (Contrast, 0.1, 1.9), - # (Brightness, 0.1, 1.9), - # (Sharpness, 0.1, 1.9), - # (ShearX, 0., 0.3), - # (ShearY, 0., 0.3), - # (CutoutAbs, 0, 40), - # (TranslateXabs, 0., 100), - # (TranslateYabs, 0., 100), - #] - - return l - - -class Lighting(object): - """Lighting noise(AlexNet - style PCA - based noise)""" - - def __init__(self, alphastd, eigval, eigvec): - self.alphastd = alphastd - self.eigval = torch.Tensor(eigval) - self.eigvec = torch.Tensor(eigvec) - - def __call__(self, img): - if self.alphastd == 0: - return img - - alpha = img.new().resize_(3).normal_(0, self.alphastd) - rgb = self.eigvec.type_as(img).clone() \ - .mul(alpha.view(1, 3).expand(3, 3)) \ - .mul(self.eigval.view(1, 3).expand(3, 3)) \ - .sum(1).squeeze() - - return img.add(rgb.view(3, 1, 1).expand_as(img)) - - -class CutoutDefault(object): - """ - Reference : https://github.com/quark0/darts/blob/master/cnn/utils.py - """ - def __init__(self, length): - self.length = length - - def __call__(self, img): - h, w = img.size(1), img.size(2) - mask = np.ones((h, w), np.float32) - y = np.random.randint(h) - x = np.random.randint(w) - - y1 = np.clip(y - self.length // 2, 0, h) - y2 = np.clip(y + self.length // 2, 0, h) - x1 = np.clip(x - self.length // 2, 0, w) - x2 = np.clip(x + self.length // 2, 0, w) - - mask[y1: y2, x1: x2] = 0. - mask = torch.from_numpy(mask) - mask = mask.expand_as(img) - img *= mask - return img - -PARAMETER_MAX = 1 -class RandAugment: - def __init__(self, n, m): - self.n = n - self.m = m # [0, PARAMETER_MAX] - self.augment_list = augment_list() - - def __call__(self, img): - ops = random.choices(self.augment_list, k=self.n) - for op, minval, maxval in ops: - val = (float(self.m) / PARAMETER_MAX) * float(maxval - minval) + minval - img = op(img, val) - - return img diff --git a/Old/salvador/cams.py b/Old/salvador/cams.py deleted file mode 100755 index 3e615f1..0000000 --- a/Old/salvador/cams.py +++ /dev/null @@ -1,98 +0,0 @@ -import torch -import numpy as np -import torchvision -from PIL import Image -from torch import topk -import torch.nn.functional as F -from torch import topk -import cv2 -from torchvision import transforms -import os - -class SaveFeatures(): - features=None - def __init__(self, m): self.hook = m.register_forward_hook(self.hook_fn) - def hook_fn(self, module, input, output): self.features = ((output.cpu()).data).numpy() - def remove(self): self.hook.remove() - -def getCAM(feature_conv, weight_fc, class_idx): - _, nc, h, w = feature_conv.shape - cam = weight_fc[class_idx].dot(feature_conv.reshape((nc, h*w))) - cam = cam.reshape(h, w) - cam = cam - np.min(cam) - cam_img = cam / np.max(cam) - # cam_img = np.uint8(255 * cam_img) - return cam_img - -def main(cam): - device = 'cuda:0' - model_name = 'resnet50' - root = 'NEW_SS' - - os.makedirs(os.path.join(root + '_CAM', 'OK'), exist_ok=True) - os.makedirs(os.path.join(root + '_CAM', 'NOK'), exist_ok=True) - - train_transform = transforms.Compose([ - transforms.ToTensor(), - ]) - - dataset = torchvision.datasets.ImageFolder( - root=root, transform=train_transform, - ) - - loader = torch.utils.data.DataLoader(dataset, batch_size=1) - - model = torchvision.models.__dict__[model_name](pretrained=False) - model.fc = torch.nn.Linear(model.fc.in_features, 2) - - model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage)) - model = model.to(device) - model.eval() - - weight_softmax_params = list(model._modules.get('fc').parameters()) - weight_softmax = np.squeeze(weight_softmax_params[0].cpu().data.numpy()) - - final_layer = model._modules.get('layer4') - - activated_features = SaveFeatures(final_layer) - - for i, (img, target ) in enumerate(loader): - img = img.to(device) - prediction = model(img) - pred_probabilities = F.softmax(prediction, dim=1).data.squeeze() - class_idx = topk(pred_probabilities,1)[1].int() - # if target.item() != class_idx: - # print(dataset.imgs[i][0]) - - if cam: - overlay = getCAM(activated_features.features, weight_softmax, class_idx ) - - import ipdb; ipdb.set_trace() - import PIL - from torchvision.transforms import ToPILImage - - img = ToPILImage()(overlay).resize(size=(1280, 1024), resample=PIL.Image.BILINEAR) - img.save('heat-pil.jpg') - - - img = cv2.imread(dataset.imgs[i][0]) - height, width, _ = img.shape - overlay = cv2.resize(overlay, (width, height)) - heatmap = cv2.applyColorMap(overlay, cv2.COLORMAP_JET) - cv2.imwrite('heat-cv2.jpg', heatmap) - - img = cv2.imread(dataset.imgs[i][0]) - height, width, _ = img.shape - overlay = cv2.resize(overlay, (width, height)) - heatmap = cv2.applyColorMap(overlay, cv2.COLORMAP_JET) - result = heatmap * 0.3 + img * 0.5 - - clss = dataset.imgs[i][0].split(os.sep)[1] - name = dataset.imgs[i][0].split(os.sep)[2].split('.')[0] - cv2.imwrite(os.path.join(root+"_CAM", clss, name + '.jpg'), result) - print(f'{os.path.join(root+"_CAM", clss, name + ".jpg")} saved') - - activated_features.remove() - -if __name__ == "__main__": - main(cam=True) diff --git a/Old/salvador/checkpoint.pt b/Old/salvador/checkpoint.pt deleted file mode 100755 index 6252c9e..0000000 Binary files a/Old/salvador/checkpoint.pt and /dev/null differ diff --git a/Old/salvador/dataug.py b/Old/salvador/dataug.py deleted file mode 100755 index 6f246df..0000000 --- a/Old/salvador/dataug.py +++ /dev/null @@ -1,1136 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.distributions import * - -#import kornia -#import random -import numpy as np -import copy - -import transformations as TF - -class Data_aug(nn.Module): #Rotation parametree - def __init__(self): - super(Data_aug, self).__init__() - self._data_augmentation = True - self._params = nn.ParameterDict({ - "prob": nn.Parameter(torch.tensor(0.5)), - "mag": nn.Parameter(torch.tensor(1.0)) - }) - - #self.params["mag"].register_hook(print) - - def forward(self, x): - - if self._data_augmentation and random.random() < self._params["prob"]: - #print('Aug') - batch_size = x.shape[0] - # create transformation (rotation) - alpha = self._params["mag"]*180 # in degrees - angle = torch.ones(batch_size, device=x.device) * alpha - - # define the rotation center - center = torch.ones(batch_size, 2, device=x.device) - center[..., 0] = x.shape[3] / 2 # x - center[..., 1] = x.shape[2] / 2 # y - - #print(x.shape, center) - # define the scale factor - scale = torch.ones(batch_size, device=x.device) - - # compute the transformation matrix - M = kornia.get_rotation_matrix2d(center, angle, scale) - - # apply the transformation to original image - x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w) - - return x - - def eval(self): - self.augment(mode=False) - nn.Module.eval(self) - - def augment(self, mode=True): - self._data_augmentation=mode - - def __getitem__(self, key): - return self._params[key] - - def __str__(self): - return "Data_aug(Mag-1 TF)" - -class Data_augV2(nn.Module): #Methode exacte - def __init__(self): - super(Data_augV2, self).__init__() - self._data_augmentation = True - - self._fixed_transf=[0.0, 45.0, 180.0] #Degree rotation - #self._fixed_transf=[0.0] - self._nb_tf= len(self._fixed_transf) - - self._params = nn.ParameterDict({ - "prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme - #"prob2": nn.Parameter(torch.ones(len(self._fixed_transf)).softmax(dim=0)) - }) - - #print(self._params["prob"], self._params["prob2"]) - - self.transf_idx=0 - - def forward(self, x): - - if self._data_augmentation: - #print('Aug',self._fixed_transf[self.transf_idx]) - device = x.device - batch_size = x.shape[0] - - # create transformation (rotation) - #alpha = 180 # in degrees - alpha = self._fixed_transf[self.transf_idx] - angle = torch.ones(batch_size, device=device) * alpha - - x = self.rotate(x,angle) - - return x - - def rotate(self, x, angle): - - device = x.device - batch_size = x.shape[0] - # define the rotation center - center = torch.ones(batch_size, 2, device=device) - center[..., 0] = x.shape[3] / 2 # x - center[..., 1] = x.shape[2] / 2 # y - - #print(x.shape, center) - # define the scale factor - scale = torch.ones(batch_size, device=device) - - # compute the transformation matrix - M = kornia.get_rotation_matrix2d(center, angle, scale) - - # apply the transformation to original image - return kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w) - - - def adjust_param(self): #Detach from gradient ? - self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0) - #print('proba',self._params['prob']) - self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1 - #print('Sum p', sum(self._params['prob'])) - - def eval(self): - self.augment(mode=False) - nn.Module.eval(self) - - def augment(self, mode=True): - self._data_augmentation=mode - - def __getitem__(self, key): - return self._params[key] - - def __str__(self): - return "Data_augV2(Exact-%d TF)" % self._nb_tf - -class Data_augV3(nn.Module): #Echantillonage uniforme/Mixte - def __init__(self, mix_dist=0.0): - super(Data_augV3, self).__init__() - self._data_augmentation = True - - #self._fixed_transf=[0.0, 45.0, 180.0] #Degree rotation - self._fixed_transf=[0.0, 1.0, -1.0] #Flips (Identity,Horizontal,Vertical) - #self._fixed_transf=[0.0] - self._nb_tf= len(self._fixed_transf) - - self._params = nn.ParameterDict({ - "prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme - #"prob2": nn.Parameter(torch.ones(len(self._fixed_transf)).softmax(dim=0)) - }) - - #print(self._params["prob"], self._params["prob2"]) - self._sample = [] - - self._mix_dist = False - if mix_dist != 0.0: - self._mix_dist = True - self._mix_factor = max(min(mix_dist, 1.0), 0.0) - - def forward(self, x): - - if self._data_augmentation: - device = x.device - batch_size = x.shape[0] - - - #good_distrib = Uniform(low=torch.zeros(batch_size,1, device=device),high=torch.new_full((batch_size,1),self._params["prob"], device=device)) - #bad_distrib = Uniform(low=torch.zeros(batch_size,1, device=device),high=torch.new_full((batch_size,1), 1-self._params["prob"], device=device)) - - #transform_dist = Categorical(probs=torch.tensor([self._params["prob"], 1-self._params["prob"]], device=device)) - #self._sample = transform_dist._sample(sample_shape=torch.Size([batch_size,1])) - - uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=0) - - if not self._mix_dist: - distrib = uniforme_dist - else: - distrib = (self._mix_factor*self._params["prob"]+(1-self._mix_factor)*uniforme_dist).softmax(dim=0) #Mix distrib reel / uniforme avec mix_factor - - cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*distrib) - self._sample = cat_distrib.sample() - - TF_param = torch.tensor([self._fixed_transf[x] for x in self._sample], device=device) #Approche de marco peut-etre plus rapide - - #x = self.rotate(x,angle=TF_param) - x = self.flip(x,flip_mat=TF_param) - - return x - - def rotate(self, x, angle): - - device = x.device - batch_size = x.shape[0] - # define the rotation center - center = torch.ones(batch_size, 2, device=device) - center[..., 0] = x.shape[3] / 2 # x - center[..., 1] = x.shape[2] / 2 # y - - #print(x.shape, center) - # define the scale factor - scale = torch.ones(batch_size, device=device) - - # compute the transformation matrix - M = kornia.get_rotation_matrix2d(center, angle, scale) - - # apply the transformation to original image - return kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w) - - def flip(self, x, flip_mat): - - #print(flip_mat) - device = x.device - batch_size = x.shape[0] - - h, w = x.shape[2], x.shape[3] # destination size - #points_src = torch.ones(batch_size, 4, 2, device=device) - #points_dst = torch.ones(batch_size, 4, 2, device=device) - - #Identity - iM=torch.tensor(np.eye(3)) - - #Horizontal flip - # the source points are the region to crop corners - #points_src = torch.FloatTensor([[ - # [w - 1, 0], [0, 0], [0, h - 1], [w - 1, h - 1], - #]]) - # the destination points are the image vertexes - #points_dst = torch.FloatTensor([[ - # [0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1], - #]]) - # compute perspective transform - #hM = kornia.get_perspective_transform(points_src, points_dst) - hM =torch.tensor( [[[-1., 0., w-1], - [ 0., 1., 0.], - [ 0., 0., 1.]]]) - - #Vertical flip - # the source points are the region to crop corners - #points_src = torch.FloatTensor([[ - # [0, h - 1], [w - 1, h - 1], [w - 1, 0], [0, 0], - #]]) - # the destination points are the image vertexes - #points_dst = torch.FloatTensor([[ - # [0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1], - #]]) - # compute perspective transform - #vM = kornia.get_perspective_transform(points_src, points_dst) - vM =torch.tensor( [[[ 1., 0., 0.], - [ 0., -1., h-1], - [ 0., 0., 1.]]]) - #print(vM) - - M=torch.ones(batch_size, 3, 3, device=device) - - for i in range(batch_size): # A optimiser - if flip_mat[i]==0.0: - M[i,]=iM - elif flip_mat[i]==1.0: - M[i,]=hM - elif flip_mat[i]==-1.0: - M[i,]=vM - - # warp the original image by the found transform - return kornia.warp_perspective(x, M, dsize=(h, w)) - - def adjust_param(self, soft=False): #Detach from gradient ? - - if soft : - self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible - else: - #self._params['prob'].clamp(min=0.0,max=1.0) - self._params['prob'].data = F.relu(self._params['prob'].data) - #self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0) - #print('proba',self._params['prob']) - self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1 - #print('Sum p', sum(self._params['prob'])) - - def loss_weight(self): - #w_loss = [self._params["prob"][x] for x in self._sample] - #print(self._sample.view(-1,1).shape) - #print(self._sample[:10]) - - w_loss = torch.zeros((self._sample.shape[0],self._nb_tf), device=self._sample.device) - w_loss.scatter_(1, self._sample.view(-1,1), 1) - #print(w_loss.shape) - #print(w_loss[:10,:]) - w_loss = w_loss * self._params["prob"] - #print(w_loss.shape) - #print(w_loss[:10,:]) - w_loss = torch.sum(w_loss,dim=1) - #print(w_loss.shape) - #print(w_loss[:10]) - return w_loss - - def train(self, mode=None): - if mode is None : - mode=self._data_augmentation - self.augment(mode=mode) #Inutile si mode=None - super(Data_augV3, self).train(mode) - - def eval(self): - self.train(mode=False) - #super(Augmented_model, self).eval() - - def augment(self, mode=True): - self._data_augmentation=mode - - def __getitem__(self, key): - return self._params[key] - - def __str__(self): - if not self._mix_dist: - return "Data_augV3(Uniform-%d TF)" % self._nb_tf - else: - return "Data_augV3(Mix %.1f-%d TF)" % (self._mix_factor, self._nb_tf) - -class Data_augV4(nn.Module): #Transformations avec mask - def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mix_dist=0.0): - super(Data_augV4, self).__init__() - assert len(TF_dict)>0 - - self._data_augmentation = True - - #self._TF_matrix={} - #self._input_info={'h':0, 'w':0, 'device':None} #Input associe a TF_matrix - #self._mag_fct = TF_dict - self._TF_dict = TF_dict - self._TF= list(self._TF_dict.keys()) - self._nb_tf= len(self._TF) - - self._N_seqTF = N_TF - - self._fixed_mag=5 #[0, PARAMETER_MAX] - self._params = nn.ParameterDict({ - "prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme - }) - - self._samples = [] - - self._mix_dist = False - if mix_dist != 0.0: - self._mix_dist = True - self._mix_factor = max(min(mix_dist, 1.0), 0.0) - - def forward(self, x): - if self._data_augmentation: - device = x.device - batch_size, h, w = x.shape[0], x.shape[2], x.shape[3] - - x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles) - self._samples = [] - - for _ in range(self._N_seqTF): - ## Echantillonage ## - uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=1) - - if not self._mix_dist: - self._distrib = uniforme_dist - else: - self._distrib = (self._mix_factor*self._params["prob"]+(1-self._mix_factor)*uniforme_dist).softmax(dim=1) #Mix distrib reel / uniforme avec mix_factor - - cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*self._distrib) - sample = cat_distrib.sample() - self._samples.append(sample) - - ## Transformations ## - x = self.apply_TF(x, sample) - return x - ''' - def compute_TF_matrix(self, magnitude=None, sample_info= None): - print('Computing TF_matrix...') - if not magnitude : - magnitude=self._fixed_mag - - if sample_info: - self._input_info['h']= sample_info['h'] - self._input_info['w']= sample_info['w'] - self._input_info['device'] = sample_info['device'] - h, w, device= self._input_info['h'], self._input_info['w'], self._input_info['device'] - - self._TF_matrix={} - for tf in self._TF : - if tf=='Id': - self._TF_matrix[tf]=torch.tensor([[[ 1., 0., 0.], - [ 0., 1., 0.], - [ 0., 0., 1.]]], device=device) - elif tf=='Rot': - center = torch.ones(1, 2, device=device) - center[0, 0] = w / 2 # x - center[0, 1] = h / 2 # y - scale = torch.ones(1, device=device) - angle = self._mag_fct[tf](magnitude) * torch.ones(1, device=device) - R = kornia.get_rotation_matrix2d(center, angle, scale) #Rotation matrix (1,2,3) - self._TF_matrix[tf]=torch.cat((R,torch.tensor([[[ 0., 0., 1.]]], device=device)), dim=1) #TF matrix (1,3,3) - elif tf=='FlipLR': - self._TF_matrix[tf]=torch.tensor([[[-1., 0., w-1], - [ 0., 1., 0.], - [ 0., 0., 1.]]], device=device) - elif tf=='FlipUD': - self._TF_matrix[tf]=torch.tensor([[[ 1., 0., 0.], - [ 0., -1., h-1], - [ 0., 0., 1.]]], device=device) - else: - raise Exception("Invalid TF requested") - ''' - def apply_TF(self, x, sampled_TF): - device = x.device - smps_x=[] - masks=[] - for tf_idx in range(self._nb_tf): - mask = sampled_TF==tf_idx #Create selection mask - smp_x = x[mask] #torch.masked_select() ? - - if smp_x.shape[0]!=0: #if there's data to TF - magnitude=self._fixed_mag - tf=self._TF[tf_idx] - - ''' - ## Geometric TF ## - if tf=='Identity': - pass - elif tf=='FlipLR': - smp_x = TF.flipLR(smp_x) - elif tf=='FlipUD': - smp_x = TF.flipUD(smp_x) - elif tf=='Rotate': - smp_x = TF.rotate(smp_x, angle=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device)) - elif tf=='TranslateX' or tf=='TranslateY': - smp_x = TF.translate(smp_x, translation=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device)) - elif tf=='ShearX' or tf=='ShearY' : - smp_x = TF.shear(smp_x, shear=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device)) - - ## Color TF (Expect image in the range of [0, 1]) ## - elif tf=='Contrast': - smp_x = TF.contrast(smp_x, contrast_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device)) - elif tf=='Color': - smp_x = TF.color(smp_x, color_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device)) - elif tf=='Brightness': - smp_x = TF.brightness(smp_x, brightness_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device)) - elif tf=='Sharpness': - smp_x = TF.sharpeness(smp_x, sharpness_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device)) - elif tf=='Posterize': - smp_x = TF.posterize(smp_x, bits=torch.tensor([1 for _ in smp_x], device=device)) - elif tf=='Solarize': - smp_x = TF.solarize(smp_x, thresholds=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device)) - elif tf=='Equalize': - smp_x = TF.equalize(smp_x) - elif tf=='Auto_Contrast': - smp_x = TF.auto_contrast(smp_x) - else: - raise Exception("Invalid TF requested : ", tf) - - x[mask]=smp_x # Refusionner eviter x[mask] : in place - ''' - x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude) # Refusionner eviter x[mask] : in place - - #idx= mask.nonzero() - #print('-'*8) - #print(idx[0], tf_idx) - #print(smp_x[0,]) - #x=x.view(-1,3*32*32) - #x=x.scatter(dim=0, index=idx, src=smp_x.view(-1,3*32*32)) #Changement des Tensor mais pas visible sur la visualisation... - #x=x.view(-1,3,32,32) - #print(x[0,]) - - ''' - if len(self._TF_matrix)==0 or self._input_info['h']!=h or self._input_info['w']!=w or self._input_info['device']!=device: #Device different:Pas necessaire de tout recalculer - self.compute_TF_matrix(sample_info={'h': x.shape[2], - 'w': x.shape[3], - 'device': x.device}) - - TF_matrix = torch.zeros(batch_size, 3, 3, device=device) #All geom TF - - for tf_idx in range(self._nb_tf): - mask = self._sample==tf_idx #Create selection mask - TF_matrix[mask,]=self._TF_matrix[self._TF[tf_idx]] - - x=kornia.warp_perspective(x, TF_matrix, dsize=(h, w)) - ''' - return x - - def adjust_param(self, soft=False): #Detach from gradient ? - - if soft : - self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible - else: - #self._params['prob'].clamp(min=0.0,max=1.0) - self._params['prob'].data = F.relu(self._params['prob'].data) - #self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0) - - self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1 - - def loss_weight(self): - # 1 seule TF - #self._sample = self._samples[-1] - #w_loss = torch.zeros((self._sample.shape[0],self._nb_tf), device=self._sample.device) - #w_loss.scatter_(dim=1, index=self._sample.view(-1,1), value=1) - #w_loss = w_loss * self._params["prob"]/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss) - #w_loss = torch.sum(w_loss,dim=1) - - #Plusieurs TF sequentielles - w_loss = torch.zeros((self._samples[0].shape[0],self._nb_tf), device=self._samples[0].device) - for sample in self._samples: - tmp_w = torch.zeros(w_loss.size(),device=w_loss.device) - tmp_w.scatter_(dim=1, index=sample.view(-1,1), value=1/self._N_seqTF) - w_loss += tmp_w - - w_loss = w_loss * self._params["prob"]/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss) - w_loss = torch.sum(w_loss,dim=1) - return w_loss - - - def train(self, mode=None): - if mode is None : - mode=self._data_augmentation - self.augment(mode=mode) #Inutile si mode=None - super(Data_augV4, self).train(mode) - - def eval(self): - self.train(mode=False) - - def augment(self, mode=True): - self._data_augmentation=mode - - def __getitem__(self, key): - return self._params[key] - - def __str__(self): - if not self._mix_dist: - return "Data_augV4(Uniform-%d TF x %d)" % (self._nb_tf, self._N_seqTF) - else: - return "Data_augV4(Mix %.1f-%d TF x %d)" % (self._mix_factor, self._nb_tf, self._N_seqTF) - -class Data_augV5(nn.Module): #Optimisation jointe (mag, proba) - def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mix_dist=0.0, fixed_prob=False, fixed_mag=True, shared_mag=True): - super(Data_augV5, self).__init__() - assert len(TF_dict)>0 - - self._data_augmentation = True - - self._TF_dict = TF_dict - self._TF= list(self._TF_dict.keys()) - self._nb_tf= len(self._TF) - - self._N_seqTF = N_TF - self._shared_mag = shared_mag - self._fixed_mag = fixed_mag - - #self._fixed_mag=5 #[0, PARAMETER_MAX] - self._params = nn.ParameterDict({ - "prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme - "mag" : nn.Parameter(torch.tensor(float(TF.PARAMETER_MAX)) if self._shared_mag - else torch.tensor(float(TF.PARAMETER_MAX)).expand(self._nb_tf)), #[0, PARAMETER_MAX] - }) - - #for t in TF.TF_no_mag: self._params['mag'][self._TF.index(t)].data-=self._params['mag'][self._TF.index(t)].data #Mag inutile pour les TF ignore_mag - - #Distribution - self._fixed_prob=fixed_prob - self._samples = [] - self._mix_dist = False - if mix_dist != 0.0: - self._mix_dist = True - self._mix_factor = max(min(mix_dist, 1.0), 0.0) - - #Mag regularisation - if not self._fixed_mag: - if self._shared_mag : - self._reg_tgt = torch.tensor(TF.PARAMETER_MAX, dtype=torch.float) #Encourage amplitude max - else: - self._reg_mask=[self._TF.index(t) for t in self._TF if t not in TF.TF_ignore_mag] - self._reg_tgt=torch.full(size=(len(self._reg_mask),), fill_value=TF.PARAMETER_MAX) #Encourage amplitude max - - def forward(self, x): - self._samples = [] - if self._data_augmentation:# and TF.random.random() < 0.5: - device = x.device - batch_size, h, w = x.shape[0], x.shape[2], x.shape[3] - - x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles) - - for _ in range(self._N_seqTF): - ## Echantillonage ## - uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=1) - - if not self._mix_dist: - self._distrib = uniforme_dist - else: - prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"] - self._distrib = (self._mix_factor*prob+(1-self._mix_factor)*uniforme_dist).softmax(dim=1) #Mix distrib reel / uniforme avec mix_factor - - cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*self._distrib) - sample = cat_distrib.sample() - self._samples.append(sample) - - ## Transformations ## - x = self.apply_TF(x, sample) - return x - - def apply_TF(self, x, sampled_TF): - device = x.device - batch_size, channels, h, w = x.shape - smps_x=[] - - for tf_idx in range(self._nb_tf): - mask = sampled_TF==tf_idx #Create selection mask - smp_x = x[mask] #torch.masked_select() ? (NEcessite d'expand le mask au meme dim) - - if smp_x.shape[0]!=0: #if there's data to TF - magnitude=self._params["mag"] if self._shared_mag else self._params["mag"][tf_idx] - if self._fixed_mag: magnitude=magnitude.detach() #Fmodel tente systematiquement de tracker les gradient de tout les param - - tf=self._TF[tf_idx] - #print(magnitude) - - #In place - #x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude) - - #Out of place - smp_x = self._TF_dict[tf](x=smp_x, mag=magnitude) - idx= mask.nonzero() - idx= idx.expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ... - x=x.scatter(dim=0, index=idx, src=smp_x) - - return x - - def adjust_param(self, soft=False): #Detach from gradient ? - if not self._fixed_prob: - if soft : - self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible - else: - self._params['prob'].data = F.relu(self._params['prob'].data) - #self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0) - self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1 - - if not self._fixed_mag: - #self._params['mag'].data = self._params['mag'].data.clamp(min=0.0,max=TF.PARAMETER_MAX) #Bloque une fois au extreme - self._params['mag'].data = F.relu(self._params['mag'].data) - F.relu(self._params['mag'].data - TF.PARAMETER_MAX) - - def loss_weight(self): - if len(self._samples)==0 : return 1 #Pas d'echantillon = pas de ponderation - - prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"] - # 1 seule TF - #self._sample = self._samples[-1] - #w_loss = torch.zeros((self._sample.shape[0],self._nb_tf), device=self._sample.device) - #w_loss.scatter_(dim=1, index=self._sample.view(-1,1), value=1) - #w_loss = w_loss * self._params["prob"]/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss) - #w_loss = torch.sum(w_loss,dim=1) - - #Plusieurs TF sequentielles (Attention ne prend pas en compte ordre !) - w_loss = torch.zeros((self._samples[0].shape[0],self._nb_tf), device=self._samples[0].device) - for sample in self._samples: - tmp_w = torch.zeros(w_loss.size(),device=w_loss.device) - tmp_w.scatter_(dim=1, index=sample.view(-1,1), value=1/self._N_seqTF) - w_loss += tmp_w - - w_loss = w_loss * prob/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss) - w_loss = torch.sum(w_loss,dim=1) - return w_loss - - def reg_loss(self, reg_factor=0.005): - if self._fixed_mag: # or self._fixed_prob: #Pas de regularisation si trop peu de DOF - return torch.tensor(0) - else: - #return reg_factor * F.l1_loss(self._params['mag'][self._reg_mask], target=self._reg_tgt, reduction='mean') - params = self._params['mag'] if self._params['mag'].shape==torch.Size([]) else self._params['mag'][self._reg_mask] - return reg_factor * F.mse_loss(params, target=self._reg_tgt.to(params.device), reduction='mean') - - def train(self, mode=None): - if mode is None : - mode=self._data_augmentation - self.augment(mode=mode) #Inutile si mode=None - super(Data_augV5, self).train(mode) - - def eval(self): - self.train(mode=False) - - def augment(self, mode=True): - self._data_augmentation=mode - - def __getitem__(self, key): - return self._params[key] - - def __str__(self): - dist_param='' - if self._fixed_prob: dist_param+='Fx' - mag_param='Mag' - if self._fixed_mag: mag_param+= 'Fx' - if self._shared_mag: mag_param+= 'Sh' - if not self._mix_dist: - return "Data_augV5(Uniform%s-%dTFx%d-%s)" % (dist_param, self._nb_tf, self._N_seqTF, mag_param) - else: - return "Data_augV5(Mix%.1f%s-%dTFx%d-%s)" % (self._mix_factor,dist_param, self._nb_tf, self._N_seqTF, mag_param) - -class Data_augV6(nn.Module): #Optimisation sequentielle - def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mix_dist=0.0, fixed_prob=False, prob_set_size=None, fixed_mag=True, shared_mag=True): - super(Data_augV6, self).__init__() - assert len(TF_dict)>0 - - self._data_augmentation = True - - self._TF_dict = TF_dict - self._TF= list(self._TF_dict.keys()) - self._nb_tf= len(self._TF) - - self._N_seqTF = N_TF - self._shared_mag = shared_mag - self._fixed_mag = fixed_mag - - self._TF_set_size = prob_set_size if prob_set_size else self._nb_tf - - self._fixed_TF=[0] #Identite - assert self._TF_set_size>=len(self._fixed_TF) - - if self._TF_set_size>self._nb_tf: - print("Warning : TF sets size higher than number of TF. Reducing set size to %d"%self._nb_tf) - self._TF_set_size=self._nb_tf - - ## Genenerate TF sets ## - if self._TF_set_size==len(self._fixed_TF): - print("Warning : using only fixed set of TF : ", self._fixed_TF) - self._TF_sets=torch.tensor([self._fixed_TF]) - else: - def generate_TF_sets(n_TF, set_size, idx_prefix=[]): - TF_sets=[] - if len(idx_prefix)!=0: - if set_size>2: - for i in range(idx_prefix[-1]+1, n_TF): - TF_sets += generate_TF_sets(n_TF=n_TF, set_size=set_size-1, idx_prefix=idx_prefix+[i]) - else: - #if i not in idx_prefix: - TF_sets+=[torch.tensor(idx_prefix+[i]) for i in range(idx_prefix[-1]+1, n_TF)] - elif set_size>1: - for i in range(0, n_TF): - TF_sets += generate_TF_sets(n_TF=n_TF, set_size=set_size, idx_prefix=[i]) - else: - TF_sets+=[torch.tensor([i]) for i in range(0, n_TF)] - return TF_sets - - self._TF_sets=generate_TF_sets(self._nb_tf, self._TF_set_size, self._fixed_TF) - - ## Plan TF learning schedule ## - self._TF_schedule = [list(range(len(self._TF_sets))) for _ in range(self._N_seqTF)] - for n_tf in range(self._N_seqTF) : - TF.random.shuffle(self._TF_schedule[n_tf]) - - self._current_TF_idx=0 #random.randint - self._start_prob = 1/self._TF_set_size - - - self._params = nn.ParameterDict({ - "prob": nn.Parameter(torch.tensor(self._start_prob).expand(self._nb_tf)), #Proba independantes - "mag" : nn.Parameter(torch.tensor(float(TF.PARAMETER_MAX)) if self._shared_mag - else torch.tensor(float(TF.PARAMETER_MAX)).expand(self._nb_tf)), #[0, PARAMETER_MAX] - }) - - #for t in TF.TF_no_mag: self._params['mag'][self._TF.index(t)].data-=self._params['mag'][self._TF.index(t)].data #Mag inutile pour les TF ignore_mag - - #Distribution - self._fixed_prob=fixed_prob - self._samples = [] - self._mix_dist = False - if mix_dist != 0.0: - self._mix_dist = True - self._mix_factor = max(min(mix_dist, 1.0), 0.0) - - #Mag regularisation - if not self._fixed_mag: - if self._shared_mag : - self._reg_tgt = torch.tensor(TF.PARAMETER_MAX, dtype=torch.float) #Encourage amplitude max - else: - self._reg_mask=[self._TF.index(t) for t in self._TF if t not in TF.TF_ignore_mag] - self._reg_tgt=torch.full(size=(len(self._reg_mask),), fill_value=TF.PARAMETER_MAX) #Encourage amplitude max - - def forward(self, x): - self._samples = [] - if self._data_augmentation:# and TF.random.random() < 0.5: - device = x.device - batch_size, h, w = x.shape[0], x.shape[2], x.shape[3] - - x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles) - - for n_tf in range(self._N_seqTF): - - tf_set = self._TF_sets[self._TF_schedule[n_tf][self._current_TF_idx]].to(device) - #print(n_tf, tf_set) - ## Echantillonage ## - uniforme_dist = torch.ones(1,len(tf_set),device=device).softmax(dim=1) - - if not self._mix_dist: - self._distrib = uniforme_dist - else: - prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"] - curr_prob = torch.index_select(prob, 0, tf_set) - curr_prob = curr_prob /sum(curr_prob) #Contrainte sum(p)=1 - self._distrib = (self._mix_factor*curr_prob+(1-self._mix_factor)*uniforme_dist).softmax(dim=1) #Mix distrib reel / uniforme avec mix_factor - - cat_distrib= Categorical(probs=torch.ones((batch_size, len(tf_set)), device=device)*self._distrib) - sample = cat_distrib.sample() - self._samples.append(sample) - - ## Transformations ## - x = self.apply_TF(x, sample) - return x - - def apply_TF(self, x, sampled_TF): - device = x.device - batch_size, channels, h, w = x.shape - smps_x=[] - - for sel_idx, tf_idx in enumerate(self._TF_sets[self._current_TF_idx]): - mask = sampled_TF==sel_idx #Create selection mask - smp_x = x[mask] #torch.masked_select() ? (NEcessite d'expand le mask au meme dim) - - if smp_x.shape[0]!=0: #if there's data to TF - magnitude=self._params["mag"] if self._shared_mag else self._params["mag"][tf_idx] - if self._fixed_mag: magnitude=magnitude.detach() #Fmodel tente systematiquement de tracker les gradient de tout les param - - tf=self._TF[tf_idx] - #print(magnitude) - - #In place - #x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude) - - #Out of place - smp_x = self._TF_dict[tf](x=smp_x, mag=magnitude) - idx= mask.nonzero() - idx= idx.expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ... - x=x.scatter(dim=0, index=idx, src=smp_x) - - return x - - def adjust_param(self, soft=False): #Detach from gradient ? - if not self._fixed_prob: - if soft : - self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible - else: - self._params['prob'].data = F.relu(self._params['prob'].data) - #self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0) - #self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1 - - self._params['prob'].data[0]=self._start_prob #Fixe p identite - - if not self._fixed_mag: - #self._params['mag'].data = self._params['mag'].data.clamp(min=0.0,max=TF.PARAMETER_MAX) #Bloque une fois au extreme - self._params['mag'].data = F.relu(self._params['mag'].data) - F.relu(self._params['mag'].data - TF.PARAMETER_MAX) - - def loss_weight(self): #A verifier - if len(self._samples)==0 : return 1 #Pas d'echantillon = pas de ponderation - - prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"] - - #Plusieurs TF sequentielles (Attention ne prend pas en compte ordre !) - w_loss = torch.zeros((self._samples[0].shape[0],self._TF_set_size), device=self._samples[0].device) - for n_tf in range(self._N_seqTF): - tmp_w = torch.zeros(w_loss.size(),device=w_loss.device) - tmp_w.scatter_(dim=1, index=self._samples[n_tf].view(-1,1), value=1/self._N_seqTF) - - tf_set = self._TF_sets[self._TF_schedule[n_tf][self._current_TF_idx]].to(prob.device) - curr_prob = torch.index_select(prob, 0, tf_set) - curr_prob = curr_prob /sum(curr_prob) #Contrainte sum(p)=1 - - #ATTENTION DISTRIB DIFFERENTE AVEC MIX - assert not self._mix_dist - w_loss += tmp_w * curr_prob /self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss) - - w_loss = torch.sum(w_loss,dim=1) - return w_loss - - def reg_loss(self, reg_factor=0.005): - if self._fixed_mag: # or self._fixed_prob: #Pas de regularisation si trop peu de DOF - return torch.tensor(0) - else: - #return reg_factor * F.l1_loss(self._params['mag'][self._reg_mask], target=self._reg_tgt, reduction='mean') - params = self._params['mag'] if self._params['mag'].shape==torch.Size([]) else self._params['mag'][self._reg_mask] - return reg_factor * F.mse_loss(params, target=self._reg_tgt.to(params.device), reduction='mean') - - def next_TF_set(self, idx=None): - if idx: - self._current_TF_idx=idx - else: - self._current_TF_idx+=1 - - if self._current_TF_idx>=len(self._TF_schedule[0]): - self._current_TF_idx=0 - for n_tf in range(self._N_seqTF) : - TF.random.shuffle(self._TF_schedule[n_tf]) - - def train(self, mode=None): - if mode is None : - mode=self._data_augmentation - self.augment(mode=mode) #Inutile si mode=None - super(Data_augV6, self).train(mode) - - def eval(self): - self.train(mode=False) - - def augment(self, mode=True): - self._data_augmentation=mode - - def __getitem__(self, key): - return self._params[key] - - def __str__(self): - dist_param='' - if self._fixed_prob: dist_param+='Fx' - mag_param='Mag' - if self._fixed_mag: mag_param+= 'Fx' - if self._shared_mag: mag_param+= 'Sh' - if not self._mix_dist: - return "Data_augV6(Uniform%s-%dTF(%d)x%d-%s)" % (dist_param, self._nb_tf, self._TF_set_size, self._N_seqTF, mag_param) - else: - return "Data_augV6(Mix%.1f%s-%dTF(%d)x%d-%s)" % (self._mix_factor,dist_param, self._nb_tf, self._TF_set_size, self._N_seqTF, mag_param) - - -class RandAug(nn.Module): #RandAugment = UniformFx-MagFxSh + rapide - def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mag=TF.PARAMETER_MAX): - super(RandAug, self).__init__() - - self._data_augmentation = True - - self._TF_dict = TF_dict - self._TF= list(self._TF_dict.keys()) - self._nb_tf= len(self._TF) - self._N_seqTF = N_TF - - self.mag=nn.Parameter(torch.tensor(float(mag))) - self._params = nn.ParameterDict({ - "prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #pas utilise - "mag" : nn.Parameter(torch.tensor(float(mag))), - }) - self._shared_mag = True - self._fixed_mag = True - - def forward(self, x): - if self._data_augmentation:# and TF.random.random() < 0.5: - device = x.device - batch_size, h, w = x.shape[0], x.shape[2], x.shape[3] - - x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles) - - for _ in range(self._N_seqTF): - ## Echantillonage ## == sampled_ops = np.random.choice(transforms, N) - uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=1) - cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*uniforme_dist) - sample = cat_distrib.sample() - - ## Transformations ## - x = self.apply_TF(x, sample) - return x - - def apply_TF(self, x, sampled_TF): - smps_x=[] - - for tf_idx in range(self._nb_tf): - mask = sampled_TF==tf_idx #Create selection mask - smp_x = x[mask] #torch.masked_select() ? (NEcessite d'expand le mask au meme dim) - - if smp_x.shape[0]!=0: #if there's data to TF - magnitude=self._params["mag"].detach() - - tf=self._TF[tf_idx] - #print(magnitude) - - #In place - x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude) - - return x - - def adjust_param(self, soft=False): - pass #Pas de parametre a opti - - def loss_weight(self): - return 1 #Pas d'echantillon = pas de ponderation - - def reg_loss(self, reg_factor=0.005): - return torch.tensor(0) #Pas de regularisation - - def train(self, mode=None): - if mode is None : - mode=self._data_augmentation - self.augment(mode=mode) #Inutile si mode=None - super(RandAug, self).train(mode) - - def eval(self): - self.train(mode=False) - - def augment(self, mode=True): - self._data_augmentation=mode - - def __getitem__(self, key): - return self._params[key] - - def __str__(self): - return "RandAug(%dTFx%d-Mag%d)" % (self._nb_tf, self._N_seqTF, self.mag) - -class RandAugUDA(nn.Module): #RandAugment from UDA (for DA during training) - def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mag=TF.PARAMETER_MAX): - super(RandAugUDA, self).__init__() - - self._data_augmentation = True - - self._TF_dict = TF_dict - self._TF= list(self._TF_dict.keys()) - self._nb_tf= len(self._TF) - self._N_seqTF = N_TF - - self.mag=nn.Parameter(torch.tensor(float(mag))) - self._params = nn.ParameterDict({ - "prob": nn.Parameter(torch.tensor(0.5).unsqueeze(dim=0)), - "mag" : nn.Parameter(torch.tensor(float(TF.PARAMETER_MAX))), - }) - self._shared_mag = True - self._fixed_mag = True - - self._op_list =[] - for tf in self._TF: - for mag in range(1, int(self._params['mag']*10), 1): - self._op_list+=[(tf, self._params['prob'].item(), mag/10)] - self._nb_op = len(self._op_list) - - def forward(self, x): - if self._data_augmentation:# and TF.random.random() < 0.5: - device = x.device - batch_size, h, w = x.shape[0], x.shape[2], x.shape[3] - - x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles) - - for _ in range(self._N_seqTF): - ## Echantillonage ## == sampled_ops = np.random.choice(transforms, N) - uniforme_dist = torch.ones(1, self._nb_op, device=device).softmax(dim=1) - cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_op), device=device)*uniforme_dist) - sample = cat_distrib.sample() - - ## Transformations ## - x = self.apply_TF(x, sample) - return x - - def apply_TF(self, x, sampled_TF): - smps_x=[] - - for op_idx in range(self._nb_op): - mask = sampled_TF==op_idx #Create selection mask - smp_x = x[mask] #torch.masked_select() ? (Necessite d'expand le mask au meme dim) - - if smp_x.shape[0]!=0: #if there's data to TF - if TF.random.random() < self._op_list[op_idx][1]: - magnitude=self._op_list[op_idx][2] - tf=self._op_list[op_idx][0] - - #In place - x[mask]=self._TF_dict[tf](x=smp_x, mag=torch.tensor(magnitude, device=x.device)) - - return x - - def adjust_param(self, soft=False): - pass #Pas de parametre a opti - - def loss_weight(self): - return 1 #Pas d'echantillon = pas de ponderation - - def reg_loss(self, reg_factor=0.005): - return torch.tensor(0) #Pas de regularisation - - def train(self, mode=None): - if mode is None : - mode=self._data_augmentation - self.augment(mode=mode) #Inutile si mode=None - super(RandAugUDA, self).train(mode) - - def eval(self): - self.train(mode=False) - - def augment(self, mode=True): - self._data_augmentation=mode - - def __getitem__(self, key): - return self._params[key] - - def __str__(self): - return "RandAugUDA(%dTFx%d-Mag%d)" % (self._nb_tf, self._N_seqTF, self.mag) - -class Augmented_model(nn.Module): - def __init__(self, data_augmenter, model): - super(Augmented_model, self).__init__() - - self._mods = nn.ModuleDict({ - 'data_aug': data_augmenter, - 'model': model - }) - - self.augment(mode=True) - - def initialize(self): - self._mods['model'].initialize() - - def forward(self, x): - return self._mods['model'](self._mods['data_aug'](x)) - - def augment(self, mode=True): - self._data_augmentation=mode - self._mods['data_aug'].augment(mode) - - def train(self, mode=None): - if mode is None : - mode=self._data_augmentation - self._mods['data_aug'].augment(mode) - super(Augmented_model, self).train(mode) - return self - - def eval(self): - return self.train(mode=False) - #super(Augmented_model, self).eval() - - def items(self): - """Return an iterable of the ModuleDict key/value pairs. - """ - return self._mods.items() - - def update(self, modules): - self._mods.update(modules) - - def is_augmenting(self): - return self._data_augmentation - - def TF_names(self): - try: - return self._mods['data_aug']._TF - except: - return None - - def __getitem__(self, key): - return self._mods[key] - - def __str__(self): - return "Aug_mod("+str(self._mods['data_aug'])+"-"+str(self._mods['model'])+")" \ No newline at end of file diff --git a/Old/salvador/dataug_utils.py b/Old/salvador/dataug_utils.py deleted file mode 100755 index ea81ea3..0000000 --- a/Old/salvador/dataug_utils.py +++ /dev/null @@ -1,314 +0,0 @@ -import numpy as np -import json, math, time, os -import matplotlib.pyplot as plt -import copy -import gc - -from torchviz import make_dot - -import torch -import torch.nn.functional as F - -import time - -class timer(): - def __init__(self): - self._start_time=time.time() - def exec_time(self): - end = time.time() - res = end-self._start_time - self._start_time=end - return res - -def print_graph(PyTorch_obj, fig_name='graph'): - graph=make_dot(PyTorch_obj) #Loss give the whole graph - graph.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats - graph.render(fig_name) - -def plot_res(log, fig_name='res', param_names=None): - - epochs = [x["epoch"] for x in log] - - fig, ax = plt.subplots(ncols=3, figsize=(15, 3)) - - ax[0].set_title('Loss') - ax[0].plot(epochs,[x["train_loss"] for x in log], label='Train') - ax[0].plot(epochs,[x["val_loss"] for x in log], label='Val') - ax[0].legend() - - ax[1].set_title('Acc') - ax[1].plot(epochs,[x["acc"] for x in log]) - - if log[0]["param"]!= None: - if isinstance(log[0]["param"],float): - ax[2].set_title('Mag') - ax[2].plot(epochs,[x["param"] for x in log], label='Mag') - ax[2].legend() - else : - ax[2].set_title('Prob') - #for idx, _ in enumerate(log[0]["param"]): - #ax[2].plot(epochs,[x["param"][idx] for x in log], label='P'+str(idx)) - if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])] - proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])] - ax[2].stackplot(epochs, proba, labels=param_names) - ax[2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5)) - - - fig_name = fig_name.replace('.',',') - plt.savefig(fig_name) - plt.close() - -def plot_resV2(log, fig_name='res', param_names=None): - - epochs = [x["epoch"] for x in log] - - fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(30, 15)) - - ax[0, 0].set_title('Loss') - ax[0, 0].plot(epochs,[x["train_loss"] for x in log], label='Train') - ax[0, 0].plot(epochs,[x["val_loss"] for x in log], label='Val') - ax[0, 0].legend() - - ax[1, 0].set_title('Acc') - ax[1, 0].plot(epochs,[x["acc"] for x in log]) - - if log[0]["param"]!= None: - if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])] - #proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])] - proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])] - mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])] - - ax[0, 1].set_title('Prob =f(epoch)') - ax[0, 1].stackplot(epochs, proba, labels=param_names) - #ax[0, 1].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5)) - - ax[1, 1].set_title('Prob =f(TF)') - mean = np.mean(proba, axis=1) - std = np.std(proba, axis=1) - ax[1, 1].bar(param_names, mean, yerr=std) - plt.sca(ax[1, 1]), plt.xticks(rotation=90) - - ax[0, 2].set_title('Mag =f(epoch)') - ax[0, 2].stackplot(epochs, mag, labels=param_names) - ax[0, 2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5)) - - ax[1, 2].set_title('Mag =f(TF)') - mean = np.mean(mag, axis=1) - std = np.std(mag, axis=1) - ax[1, 2].bar(param_names, mean, yerr=std) - plt.sca(ax[1, 2]), plt.xticks(rotation=90) - - - fig_name = fig_name.replace('.',',') - plt.savefig(fig_name, bbox_inches='tight') - plt.close() - -def plot_compare(filenames, fig_name='res'): - - all_data=[] - legend="" - for idx, file in enumerate(filenames): - legend+=str(idx)+'-'+file+'\n' - with open(file) as json_file: - data = json.load(json_file) - all_data.append(data) - - fig, ax = plt.subplots(ncols=3, figsize=(30, 8)) - - for data_idx, log in enumerate(all_data): - log=log['Log'] - epochs = [x["epoch"] for x in log] - - ax[0].plot(epochs,[x["train_loss"] for x in log], label=str(data_idx)+'-Train') - ax[0].plot(epochs,[x["val_loss"] for x in log], label=str(data_idx)+'-Val') - - ax[1].plot(epochs,[x["acc"] for x in log], label=str(data_idx)) - #ax[1].text(x=0.5,y=0,s=str(data_idx)+'-'+filenames[data_idx], transform=ax[1].transAxes) - - if log[0]["param"]!= None: - if isinstance(log[0]["param"],float): - ax[2].plot(epochs,[x["param"] for x in log], label=str(data_idx)+'-Mag') - - else : - for idx, _ in enumerate(log[0]["param"]): - ax[2].plot(epochs,[x["param"][idx] for x in log], label=str(data_idx)+'-P'+str(idx)) - - fig.suptitle(legend) - ax[0].set_title('Loss') - ax[1].set_title('Acc') - ax[2].set_title('Param') - for a in ax: a.legend() - - fig_name = fig_name.replace('.',',') - plt.savefig(fig_name, bbox_inches='tight') - plt.close() - -def plot_res_compare(filenames, fig_name='res'): - - all_data=[] - #legend="" - for idx, file in enumerate(filenames): - #legend+=str(idx)+'-'+file+'\n' - with open(file) as json_file: - data = json.load(json_file) - all_data.append(data) - - n_tf = [len(x["Param_names"]) for x in all_data] - acc = [x["Accuracy"] for x in all_data] - time = [x["Time"][0] for x in all_data] - - fig, ax = plt.subplots(ncols=3, figsize=(30, 8)) - - ax[0].plot(n_tf, acc) - ax[1].plot(n_tf, time) - - ax[0].set_title('Acc') - ax[1].set_title('Time') - #for a in ax: a.legend() - - fig_name = fig_name.replace('.',',') - plt.savefig(fig_name, bbox_inches='tight') - plt.close() - -def plot_TF_res(log, tf_names, fig_name='res'): - - mean = np.mean([x["param"] for x in log], axis=0) - std = np.std([x["param"] for x in log], axis=0) - - fig, ax = plt.subplots(1, 1, figsize=(30, 8), sharey=True) - ax.bar(tf_names, mean, yerr=std) - #ax.bar(tf_names, log[-1]["param"]) - - fig_name = fig_name.replace('.',',') - plt.savefig(fig_name, bbox_inches='tight') - plt.close() - -def viz_sample_data(imgs, labels, fig_name='data_sample'): - - sample = imgs[0:25,].permute(0, 2, 3, 1).squeeze().cpu() - - plt.figure(figsize=(10,10)) - for i in range(25): - plt.subplot(5,5,i+1) - plt.xticks([]) - plt.yticks([]) - plt.grid(False) - plt.imshow(sample[i,].detach().numpy(), cmap=plt.cm.binary) - plt.xlabel(labels[i].item()) - - plt.savefig(fig_name) - print("Sample saved :", fig_name) - plt.close() - -def model_copy(src,dst, patch_copy=True, copy_grad=True): - #model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats) - - dst.load_state_dict(src.state_dict()) #Do not copy gradient ! - - if patch_copy: - dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ? - dst['data_aug'].load_state_dict(src['data_aug'].state_dict()) - - #Copie des gradients - if copy_grad: - for paramName, paramValue, in src.named_parameters(): - for netCopyName, netCopyValue, in dst.named_parameters(): - if paramName == netCopyName: - netCopyValue.grad = paramValue.grad - #netCopyValue=copy.deepcopy(paramValue) - - try: #Data_augV4 - dst['data_aug']._input_info = src['data_aug']._input_info - dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix - except: - pass - -def optim_copy(dopt, opt): - - #inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state... - #opt_param=higher.optim.get_trainable_opt_params(diffopt) - - for group_idx, group in enumerate(opt.param_groups): - # print('gp idx',group_idx) - for p_idx, p in enumerate(group['params']): - opt.state[p]=dopt.state[group_idx][p_idx] - -def print_torch_mem(add_info=''): - - nb=0 - max_size=0 - for obj in gc.get_objects(): - #print(type(obj)) - try: - if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1: - #print(i, type(obj), obj.size()) - size = np.sum(obj.size()) - if(size>max_size): max_size=size - nb+=1 - except: - pass - print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size) - - #print(add_info, "-Garbage size :",len(gc.garbage)) - - """Simple GPU memory report.""" - - mega_bytes = 1024.0 * 1024.0 - string = add_info + ' memory (MB)' - string += ' | allocated: {}'.format( - torch.cuda.memory_allocated() / mega_bytes) - string += ' | max allocated: {}'.format( - torch.cuda.max_memory_allocated() / mega_bytes) - string += ' | cached: {}'.format(torch.cuda.memory_cached() / mega_bytes) - string += ' | max cached: {}'.format( - torch.cuda.max_memory_cached()/ mega_bytes) - print(string) - -def plot_TF_influence(log, fig_name='TF_influence', param_names=None): - proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])] - mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])] - - plt.figure() - - mean = np.mean(proba, axis=1)*np.mean(mag, axis=1) #Pourrait etre interessant de multiplier avant le mean - std = np.std(proba, axis=1)*np.std(mag, axis=1) - plt.bar(param_names, mean, yerr=std) - - plt.xticks(rotation=90) - fig_name = fig_name.replace('.',',') - plt.savefig(fig_name, bbox_inches='tight') - plt.close() - -class loss_monitor(): #Voir https://github.com/pytorch/ignite - def __init__(self, patience, end_train=1): - self.patience = patience - self.end_train = end_train - self.counter = 0 - self.best_score = None - self.reached_limit = 0 - - def register(self, loss): - if self.best_score is None: - self.best_score = loss - elif loss > self.best_score: - self.counter += 1 - #if not self.reached_limit: - print("loss no improve counter", self.counter, self.reached_limit) - else: - self.best_score = loss - self.counter = 0 - def limit_reached(self): - if self.counter >= self.patience: - self.counter = 0 - self.reached_limit +=1 - self.best_score = None - return self.reached_limit - - def end_training(self): - if self.limit_reached() >= self.end_train: - return True - else: - return False - - def reset(self): - self.__init__(self.patience, self.end_train) \ No newline at end of file diff --git a/Old/salvador/grad_cam.py b/Old/salvador/grad_cam.py deleted file mode 100755 index 2aeada5..0000000 --- a/Old/salvador/grad_cam.py +++ /dev/null @@ -1,102 +0,0 @@ -import torch -import numpy as np -import torchvision -from PIL import Image -from torch import topk -from torch import nn -import torch.nn.functional as F -from torch import topk -import cv2 -from torchvision import transforms -import os - -class Lambda(nn.Module): - "Create a layer that simply calls `func` with `x`" - def __init__(self, func): - super().__init__() - self.func=func - def forward(self, x): return self.func(x) - -class SaveFeatures(): - activations, gradients = None, None - def __init__(self, m): - self.forward = m.register_forward_hook(self.forward_hook_fn) - self.backward = m.register_backward_hook(self.backward_hook_fn) - - def forward_hook_fn(self, module, input, output): - self.activations = output.cpu().detach() - - def backward_hook_fn(self, module, grad_input, grad_output): - self.gradients = grad_output[0].cpu().detach() - - def remove(self): - self.forward.remove() - self.backward.remove() - -def main(cam): - device = 'cuda:0' - model_name = 'resnet50' - root = '/mnt/md0/data/cifar10/tmp/cifar/train' - _root = 'cifar' - - os.makedirs(os.path.join(_root + '_CAM'), exist_ok=True) - os.makedirs(os.path.join(_root + '_CAM'), exist_ok=True) - - train_transform = transforms.Compose([ - transforms.ToTensor(), - ]) - - dataset = torchvision.datasets.ImageFolder( - root=root, transform=train_transform, - ) - - loader = torch.utils.data.DataLoader(dataset, batch_size=1) - model = torchvision.models.__dict__[model_name](pretrained=True) - flat = list(model.children()) - body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(loader.dataset.classes))) - model = nn.Sequential(body, head) - - model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage)) - model = model.to(device) - model.eval() - - activated_features = SaveFeatures(model[0]) - - for i, (img, target ) in enumerate(loader): - img = img.to(device) - pred = model(img) - import ipdb; ipdb.set_trace() - # get the gradient of the output with respect to the parameters of the model - pred[:, target.item()].backward() - - # import ipdb; ipdb.set_trace() - # pull the gradients out of the model - gradients = activated_features.gradients[0] - - pooled_gradients = gradients.mean(1).mean(1) - - # get the activations of the last convolutional layer - activations = activated_features.activations[0] - - heatmap = F.relu(((activations*pooled_gradients[...,None,None])).sum(0)) - heatmap /= torch.max(heatmap) - - heatmap = heatmap.numpy() - - - image = cv2.imread(dataset.imgs[i][0]) - heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0])) - heatmap = np.uint8(255 * heatmap) - heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) - # superimposed_img = heatmap * 0.3 + image * 0.5 - superimposed_img = heatmap - - clss = dataset.imgs[i][0].split(os.sep)[1] - name = dataset.imgs[i][0].split(os.sep)[2].split('.')[0] - cv2.imwrite(os.path.join(_root+"_CAM", name + '.jpg'), superimposed_img) - print(f'{os.path.join(_root+"_CAM", name + ".jpg")} saved') - - activated_features.remove() - -if __name__ == "__main__": - main(cam=True) diff --git a/Old/salvador/train.py b/Old/salvador/train.py deleted file mode 100755 index f481d3f..0000000 --- a/Old/salvador/train.py +++ /dev/null @@ -1,382 +0,0 @@ -import datetime -import os -import time -import sys - -import torch -import torch.utils.data -from torch import nn -import torchvision -from torchvision import transforms -from PIL import ImageEnhance -import random - -import utils -from fastprogress import master_bar, progress_bar -import numpy as np - -## DATA AUG ## -import higher -from dataug import * -from dataug_utils import * -tf_names = [ - ## Geometric TF ## - 'Identity', - 'FlipUD', - 'FlipLR', - 'Rotate', - 'TranslateX', - 'TranslateY', - 'ShearX', - 'ShearY', - - ## Color TF (Expect image in the range of [0, 1]) ## - #'Contrast', - #'Color', - #'Brightness', - #'Sharpness', - #'Posterize', - #'Solarize', #=>Image entre [0,1] #Pas opti pour des batch -] - -class Lambda(nn.Module): - "Create a layer that simply calls `func` with `x`" - def __init__(self, func): - super().__init__() - self.func=func - def forward(self, x): return self.func(x) - -class SubsetSampler(torch.utils.data.SubsetRandomSampler): - def __init__(self, indices): - super().__init__(indices) - - def __iter__(self): - return (self.indices[i] for i in range(len(self.indices))) - - def __len__(self): - return len(self.indices) - -def sharpness(img, factor): - sharpness_factor = random.uniform(1, factor) - sharp = ImageEnhance.Sharpness(img) - sharped = sharp.enhance(sharpness_factor) - return sharped - -def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, master_bar, Kldiv=False): - model.train() - metric_logger = utils.MetricLogger(delimiter=" ") - confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes)) - header = 'Epoch: {}'.format(epoch) - for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=master_bar): - - image, target = image.to(device), target.to(device) - - if not Kldiv : - output = model(image) - #output = F.log_softmax(output, dim=1) - loss = criterion(output, target) #Pas de softmax ? - - else : #Consume x2 memory - model.augment(mode=False) - output = model(image) - model.augment(mode=True) - log_sup=F.log_softmax(output, dim=1) - sup_loss = F.cross_entropy(log_sup, target) - - aug_output = model(image) - log_aug=F.log_softmax(aug_output, dim=1) - aug_loss=F.cross_entropy(log_aug, target) - - #KL div w/ logits - Similarite predictions (distributions) - KL_loss = F.softmax(output, dim=1)*(log_sup-log_aug) - KL_loss = KL_loss.sum(dim=-1) - #KL_loss = F.kl_div(aug_logits, sup_logits, reduction='none') - KL_loss = KL_loss.mean() - - unsupp_coeff = 1 - loss = sup_loss + (aug_loss + KL_loss) * unsupp_coeff - #print(sup_loss.item(), (aug_loss + KL_loss).item()) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - - acc1 = utils.accuracy(output, target)[0] - batch_size = image.shape[0] - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.update(loss=loss.item()) - - confmat.update(target.flatten(), output.argmax(1).flatten()) - - - return metric_logger.loss.global_avg, confmat - - -def evaluate(model, criterion, data_loader, device): - model.eval() - metric_logger = utils.MetricLogger(delimiter=" ") - confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes)) - header = 'Test:' - missed = [] - with torch.no_grad(): - for i, (image, target) in metric_logger.log_every(data_loader, leave=False, header=header, parent=None): - image, target = image.to(device), target.to(device) - output = model(image) - loss = criterion(output, target) - if target.item() != output.topk(1)[1].item(): - missed.append(data_loader.dataset.imgs[data_loader.sampler.indices[i]]) - - confmat.update(target.flatten(), output.argmax(1).flatten()) - - acc1 = utils.accuracy(output, target)[0] - batch_size = image.shape[0] - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.update(loss=loss.item()) - - - return metric_logger.loss.global_avg, missed, confmat - -def get_train_valid_loader(args, augment, random_seed, valid_size=0.1, shuffle=True, num_workers=4, pin_memory=True): - """ - Utility function for loading and returning train and valid - multi-process iterators over the CIFAR-10 dataset. A sample - 9x9 grid of the images can be optionally displayed. - If using CUDA, num_workers should be set to 1 and pin_memory to True. - Params - ------ - - data_dir: path directory to the dataset. - - batch_size: how many samples per batch to load. - - augment: whether to apply the data augmentation scheme - mentioned in the paper. Only applied on the train split. - - random_seed: fix seed for reproducibility. - - valid_size: percentage split of the training set used for - the validation set. Should be a float in the range [0, 1]. - - shuffle: whether to shuffle the train/validation indices. - - show_sample: plot 9x9 sample grid of the dataset. - - num_workers: number of subprocesses to use when loading the dataset. - - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to - True if using GPU. - Returns - ------- - - train_loader: training set iterator. - - valid_loader: validation set iterator. - """ - error_msg = "[!] valid_size should be in the range [0, 1]." - assert ((valid_size >= 0) and (valid_size <= 1)), error_msg - - # normalize = transforms.Normalize( - # mean=[0.4914, 0.4822, 0.4465], - # std=[0.2023, 0.1994, 0.2010], - # ) - - # define transforms - if augment: - train_transform = transforms.Compose([ - # transforms.ColorJitter(brightness=0.3), - # transforms.Lambda(lambda img: sharpness(img, 5)), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - # normalize, - ]) - - valid_transform = transforms.Compose([ - # transforms.ColorJitter(brightness=0.3), - # transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - # normalize, - ]) - else: - train_transform = transforms.Compose([ - transforms.ToTensor(), - # normalize, - ]) - - valid_transform = transforms.Compose([ - transforms.ToTensor(), - # normalize, - ]) - - - # load the dataset - train_dataset = torchvision.datasets.ImageFolder( - root=args.data_path, transform=train_transform - ) - - valid_dataset = torchvision.datasets.ImageFolder( - root=args.data_path, transform=valid_transform - ) - - num_train = len(train_dataset) - indices = list(range(num_train)) - split = int(np.floor(valid_size * num_train)) - - if shuffle: - np.random.seed(random_seed) - np.random.shuffle(indices) - - train_idx, valid_idx = indices[split:], indices[:split] - train_sampler = torch.utils.data.SubsetRandomSampler(train_idx) if not args.test_only else SubsetSampler(train_idx) - valid_sampler = SubsetSampler(valid_idx) - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=train_sampler, - num_workers=num_workers, pin_memory=pin_memory, - ) - valid_loader = torch.utils.data.DataLoader( - valid_dataset, batch_size=1, sampler=valid_sampler, - num_workers=num_workers, pin_memory=pin_memory, - ) - - imgs = np.asarray(train_dataset.imgs) - - # print('Train') - # print(imgs[train_idx]) - #print('Valid') - #print(imgs[valid_idx]) - - tgt = [0,0] - for _, targets in train_loader: - for target in targets: - tgt[target]+=1 - print("Train targets :", tgt) - - tgt = [0,0] - for _, targets in valid_loader: - for target in targets: - tgt[target]+=1 - print("Valid targets :", tgt) - - return (train_loader, valid_loader) - -def main(args): - print(args) - - device = torch.device(args.device) - - torch.backends.cudnn.benchmark = True - - - #augment = True if not args.test_only else False - - if not args.test_only and args.augment=='flip' : augment = True - else : augment = False - - print("Augment", augment) - data_loader, data_loader_test = get_train_valid_loader(args=args, pin_memory=True, augment=augment, - num_workers=args.workers, valid_size=0.3, random_seed=999) - - print("Creating model") - model = torchvision.models.__dict__[args.model](pretrained=True) - flat = list(model.children()) - - body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(data_loader.dataset.classes))) - model = nn.Sequential(body, head) - - Kldiv=False - if not args.test_only and (args.augment=='Rand' or args.augment=='RandKL'): - tf_dict = {k: TF.TF_dict[k] for k in tf_names} - model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device) - - if args.augment=='RandKL': Kldiv=True - - model['data_aug']['mag'].data = model['data_aug']['mag'].data * args.magnitude - print("Augmodel") - - # model.fc = nn.Linear(model.fc.in_features, 2) - # import ipdb; ipdb.set_trace() - - criterion = nn.CrossEntropyLoss().to(device) - - # optimizer = torch.optim.SGD( - # model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) - - optimizer = torch.optim.Adam( - model.parameters(), lr=args.lr, weight_decay=args.weight_decay) - - lr_scheduler = torch.optim.lr_scheduler.LambdaLR( - optimizer, - lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9) - - es = utils.EarlyStopping() if not (args.augment=='Rand' or args.augment=='RandKL') else utils.EarlyStopping(augmented_model=True) - - if args.test_only: - model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage)) - model = model.to(device) - print('TEST') - _, missed, _ = evaluate(model, criterion, data_loader_test, device=device) - print(missed) - print('TRAIN') - _, missed, _ = evaluate(model, criterion, data_loader, device=device) - print(missed) - return - - model = model.to(device) - - print("Start training") - start_time = time.time() - mb = master_bar(range(args.epochs)) - - for epoch in mb: - _, train_confmat = train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, mb, Kldiv) - lr_scheduler.step( (epoch+1)*len(data_loader) ) - val_loss, _, valid_confmat = evaluate(model, criterion, data_loader_test, device=device) - es(val_loss, model) - - # print('Valid Missed') - # print(valid_missed) - - # print('Train') - # print(train_confmat) - #print('Valid') - #print(valid_confmat) - - # if es.early_stop: - # break - - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('Training time {}'.format(total_time_str)) - - -def parse_args(): - import argparse - parser = argparse.ArgumentParser(description='PyTorch Classification Training') - - parser.add_argument('--data-path', default='/github/smart_augmentation/salvador/data', help='dataset') - parser.add_argument('--model', default='resnet18', help='model') #'resnet18' - parser.add_argument('--device', default='cuda:0', help='device') - parser.add_argument('-b', '--batch-size', default=8, type=int) - parser.add_argument('--epochs', default=3, type=int, metavar='N', - help='number of total epochs to run') - parser.add_argument('-j', '--workers', default=0, type=int, metavar='N', - help='number of data loading workers (default: 16)') - parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate') - parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') - parser.add_argument('--wd', '--weight-decay', default=4e-5, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') - - parser.add_argument( - "--test-only", - dest="test_only", - help="Only test the model", - action="store_true", - ) - - parser.add_argument('-a', '--augment', default='None', type=str, - metavar='N', help='Data augment', - dest='augment') - parser.add_argument('-m', '--magnitude', default=1.0, type=float, - metavar='N', help='Augmentation magnitude', - dest='magnitude') - - - args = parser.parse_args() - - return args - - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/Old/salvador/train_dataug.py b/Old/salvador/train_dataug.py deleted file mode 100755 index a867167..0000000 --- a/Old/salvador/train_dataug.py +++ /dev/null @@ -1,585 +0,0 @@ -import datetime -import os -import time -import sys - -import torch -import torch.utils.data -from torch import nn -import torchvision -from torchvision import transforms -from PIL import ImageEnhance -import random - -import utils -from fastprogress import master_bar, progress_bar -import numpy as np - - -## DATA AUG ## -import higher -from dataug import * -from dataug_utils import * -tf_names = [ - ## Geometric TF ## - 'Identity', - 'FlipUD', - 'FlipLR', - 'Rotate', - 'TranslateX', - 'TranslateY', - 'ShearX', - 'ShearY', - - ## Color TF (Expect image in the range of [0, 1]) ## - 'Contrast', - 'Color', - 'Brightness', - 'Sharpness', - 'Posterize', - 'Solarize', #=>Image entre [0,1] #Pas opti pour des batch -] - -def compute_vaLoss(model, dl_it, dl): - device = next(model.parameters()).device - try: - xs, ys = next(dl_it) - except StopIteration: #Fin epoch val - dl_it = iter(dl) - xs, ys = next(dl_it) - xs, ys = xs.to(device), ys.to(device) - - model.eval() #Validation sans transfornations ! - - return F.cross_entropy(model(xs), ys) - -def model_copy(src,dst, patch_copy=True, copy_grad=True): - #model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats) - - dst.load_state_dict(src.state_dict()) #Do not copy gradient ! - - if patch_copy: - dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ? - dst['data_aug'].load_state_dict(src['data_aug'].state_dict()) - - #Copie des gradients - if copy_grad: - for paramName, paramValue, in src.named_parameters(): - for netCopyName, netCopyValue, in dst.named_parameters(): - if paramName == netCopyName: - netCopyValue.grad = paramValue.grad - #netCopyValue=copy.deepcopy(paramValue) - - try: #Data_augV4 - dst['data_aug']._input_info = src['data_aug']._input_info - dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix - except: - pass - -def optim_copy(dopt, opt): - - #inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state... - #opt_param=higher.optim.get_trainable_opt_params(diffopt) - - for group_idx, group in enumerate(opt.param_groups): - # print('gp idx',group_idx) - for p_idx, p in enumerate(group['params']): - opt.state[p]=dopt.state[group_idx][p_idx] - - -############# - -class Lambda(nn.Module): - "Create a layer that simply calls `func` with `x`" - def __init__(self, func): - super().__init__() - self.func=func - def forward(self, x): return self.func(x) - -class SubsetSampler(torch.utils.data.SubsetRandomSampler): - def __init__(self, indices): - super().__init__(indices) - - def __iter__(self): - return (self.indices[i] for i in range(len(self.indices))) - - def __len__(self): - return len(self.indices) - -def sharpness(img, factor): - sharpness_factor = random.uniform(1, factor) - sharp = ImageEnhance.Sharpness(img) - sharped = sharp.enhance(sharpness_factor) - return sharped - -def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, master_bar): - model.train() - metric_logger = utils.MetricLogger(delimiter=" ") - confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes)) - header = 'Epoch: {}'.format(epoch) - for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=master_bar): - - image, target = image.to(device), target.to(device) - output = model(image) - loss = criterion(output, target) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - - acc1 = utils.accuracy(output, target)[0] - batch_size = image.shape[0] - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.update(loss=loss.item()) - - confmat.update(target.flatten(), output.argmax(1).flatten()) - - - return metric_logger.loss.global_avg, confmat - - -def evaluate(model, criterion, data_loader, device): - model.eval() - metric_logger = utils.MetricLogger(delimiter=" ") - confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes)) - header = 'Test:' - missed = [] - with torch.no_grad(): - for i, (image, target) in metric_logger.log_every(data_loader, leave=False, header=header, parent=None): - image, target = image.to(device), target.to(device) - output = model(image) - loss = criterion(output, target) - if target.item() != output.topk(1)[1].item(): - missed.append(data_loader.dataset.imgs[data_loader.sampler.indices[i]]) - - confmat.update(target.flatten(), output.argmax(1).flatten()) - - acc1 = utils.accuracy(output, target)[0] - batch_size = image.shape[0] - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.update(loss=loss.item()) - - - return metric_logger.loss.global_avg, missed, confmat - -def get_train_valid_loader(args, augment, random_seed, train_size=0.5, test_size=0.1, shuffle=True, num_workers=4, pin_memory=True): - """ - Utility function for loading and returning train and valid - multi-process iterators over the CIFAR-10 dataset. A sample - 9x9 grid of the images can be optionally displayed. - If using CUDA, num_workers should be set to 1 and pin_memory to True. - Params - ------ - - data_dir: path directory to the dataset. - - batch_size: how many samples per batch to load. - - augment: whether to apply the data augmentation scheme - mentioned in the paper. Only applied on the train split. - - random_seed: fix seed for reproducibility. - - valid_size: percentage split of the training set used for - the validation set. Should be a float in the range [0, 1]. - - shuffle: whether to shuffle the train/validation indices. - - show_sample: plot 9x9 sample grid of the dataset. - - num_workers: number of subprocesses to use when loading the dataset. - - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to - True if using GPU. - Returns - ------- - - train_loader: training set iterator. - - valid_loader: validation set iterator. - """ - error_msg = "[!] test_size should be in the range [0, 1]." - assert ((test_size >= 0) and (test_size <= 1)), error_msg - - # normalize = transforms.Normalize( - # mean=[0.4914, 0.4822, 0.4465], - # std=[0.2023, 0.1994, 0.2010], - # ) - - # define transforms - if augment: - train_transform = transforms.Compose([ - # transforms.ColorJitter(brightness=0.3), - # transforms.Lambda(lambda img: sharpness(img, 5)), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - # normalize, - ]) - - valid_transform = transforms.Compose([ - # transforms.ColorJitter(brightness=0.3), - # transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - # normalize, - ]) - else: - train_transform = transforms.Compose([ - transforms.ToTensor(), - # normalize, - ]) - - valid_transform = transforms.Compose([ - transforms.ToTensor(), - # normalize, - ]) - - - # load the dataset - train_dataset = torchvision.datasets.ImageFolder( - root=args.data_path, transform=train_transform - ) - - test_dataset = torchvision.datasets.ImageFolder( - root=args.data_path, transform=valid_transform - ) - - num_train = len(train_dataset) - indices = list(range(num_train)) - split = int(np.floor(test_size * num_train)) - - if shuffle: - np.random.seed(random_seed) - np.random.shuffle(indices) - - train_idx, test_idx = indices[split:], indices[:split] - train_idx, valid_idx = train_idx[:int(len(train_idx)*train_size)], train_idx[int(len(train_idx)*train_size):] - print("\nTrain", len(train_idx), "\nValid", len(valid_idx), "\nTest", len(test_idx)) - train_sampler = torch.utils.data.SubsetRandomSampler(train_idx) if not args.test_only else SubsetSampler(train_idx) - valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx) if not args.test_only else SubsetSampler(valid_idx) - test_sampler = SubsetSampler(test_idx) - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=train_sampler, - num_workers=num_workers, pin_memory=pin_memory, - ) - valid_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=valid_sampler, - num_workers=num_workers, pin_memory=pin_memory, - ) - test_loader = torch.utils.data.DataLoader( - test_dataset, batch_size=1, sampler=test_sampler, - num_workers=num_workers, pin_memory=pin_memory, - ) - - imgs = np.asarray(train_dataset.imgs) - - # print('Train') - # print(imgs[train_idx]) - #print('Valid') - #print(imgs[valid_idx]) - - return (train_loader, valid_loader, test_loader) - -def main(args): - print(args) - - device = torch.device(args.device) - - torch.backends.cudnn.benchmark = True - - #augment = True if not args.test_only else False - augment = False - - data_loader, dl_val, data_loader_test = get_train_valid_loader(args=args, pin_memory=True, augment=augment, - num_workers=args.workers, train_size=0.99, test_size=0.2, random_seed=999) - - print("Creating model") - model = torchvision.models.__dict__[args.model](pretrained=True) - flat = list(model.children()) - - body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(data_loader.dataset.classes))) - model = nn.Sequential(body, head) - - # model.fc = nn.Linear(model.fc.in_features, 2) - # import ipdb; ipdb.set_trace() - - criterion = nn.CrossEntropyLoss().to(device) - - # optimizer = torch.optim.SGD( - # model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) - ''' - optimizer = torch.optim.Adam( - model.parameters(), lr=args.lr, weight_decay=args.weight_decay) - - lr_scheduler = torch.optim.lr_scheduler.LambdaLR( - optimizer, - lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9) - ''' - es = utils.EarlyStopping() - - if args.test_only: - model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage)) - model = model.to(device) - print('TEST') - _, missed, _ = evaluate(model, criterion, data_loader_test, device=device) - print(missed) - print('TRAIN') - _, missed, _ = evaluate(model, criterion, data_loader, device=device) - print(missed) - return - - model = model.to(device) - - print("Start training") - start_time = time.time() - mb = master_bar(range(args.epochs)) - """ - for epoch in mb: - _, train_confmat = train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, mb) - lr_scheduler.step( (epoch+1)*len(data_loader) ) - val_loss, _, valid_confmat = evaluate(model, criterion, data_loader_test, device=device) - es(val_loss, model) - - # print('Valid Missed') - # print(valid_missed) - - - # print('Train') - # print(train_confmat) - print('Valid') - print(valid_confmat) - - # if es.early_stop: - # break - - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('Training time {}'.format(total_time_str)) - - """ - - ####### - - inner_it = args.inner_it - dataug_epoch_start=0 - print_freq=1 - KLdiv=False - - tf_dict = {k: TF.TF_dict[k] for k in tf_names} - model = Augmented_model(Data_augV5(TF_dict=tf_dict, N_TF=3, mix_dist=0.0, fixed_prob=False, fixed_mag=False, shared_mag=False), model).to(device) - #model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device) - - val_loss=torch.tensor(0) #Necessaire si pas de metastep sur une epoch - dl_val_it = iter(dl_val) - countcopy=0 - - #if inner_it!=0: - meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=args.lr) #lr=1e-2 - #inner_opt = torch.optim.SGD(model['model'].parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #lr=1e-2 / momentum=0.9 - inner_opt = torch.optim.Adam(model['model'].parameters(), lr=args.lr, weight_decay=args.weight_decay) - - lr_scheduler = torch.optim.lr_scheduler.LambdaLR( - inner_opt, - lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9) - - high_grad_track = True - if inner_it == 0: - high_grad_track=False - - model.train() - model.augment(mode=False) - - fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True) - diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel,track_higher_grads=high_grad_track) - - i=0 - - for epoch in mb: - - metric_logger = utils.MetricLogger(delimiter=" ") - confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes)) - header = 'Epoch: {}'.format(epoch) - - t0 = time.process_time() - for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=mb): - #for i, (xs, ys) in enumerate(dl_train): - #print_torch_mem("it"+str(i)) - i+=1 - image, target = image.to(device), target.to(device) - - if(not KLdiv): - #Methode uniforme - logits = fmodel(image) # modified `params` can also be passed as a kwarg - output = F.log_softmax(logits, dim=1) - loss = F.cross_entropy(output, target, reduction='none') # no need to call loss.backwards() - - if fmodel._data_augmentation: #Weight loss - w_loss = fmodel['data_aug'].loss_weight()#.to(device) - loss = loss * w_loss - loss = loss.mean() - - else: - #Methode KL div - fmodel.augment(mode=False) - sup_logits = fmodel(xs) - log_sup=F.log_softmax(sup_logits, dim=1) - fmodel.augment(mode=True) - loss = F.cross_entropy(log_sup, ys) - - if fmodel._data_augmentation: - aug_logits = fmodel(xs) - log_aug=F.log_softmax(aug_logits, dim=1) - aug_loss=0 - if epoch>50: #debut differe ? - #KL div w/ logits - Similarite predictions (distributions) - aug_loss = F.softmax(sup_logits, dim=1)*(log_sup-log_aug) - aug_loss=aug_loss.sum(dim=-1) - #aug_loss = F.kl_div(aug_logits, sup_logits, reduction='none') - w_loss = fmodel['data_aug'].loss_weight() #Weight loss - aug_loss = (w_loss * aug_loss).mean() - - aug_loss += (F.cross_entropy(log_aug, ys , reduction='none') * w_loss).mean() - #print(aug_loss) - unsupp_coeff = 1 - loss += aug_loss * unsupp_coeff - - diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step) - - if(high_grad_track and i%inner_it==0): #Perform Meta step - #print("meta") - #Peu utile si high_grad_track = False - val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val) + fmodel['data_aug'].reg_loss() - #print_graph(val_loss) - - val_loss.backward() - - countcopy+=1 - model_copy(src=fmodel, dst=model) - optim_copy(dopt=diffopt, opt=inner_opt) - - #if epoch>50: - meta_opt.step() - model['data_aug'].adjust_param(soft=False) #Contrainte sum(proba)=1 - #model['data_aug'].next_TF_set() - - fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True) - diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track) - - - acc1 = utils.accuracy(output, target)[0] - batch_size = image.shape[0] - metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) - metric_logger.update(loss=loss.item()) - - confmat.update(target.flatten(), output.argmax(1).flatten()) - - if(not high_grad_track and (torch.cuda.memory_cached()/1024.0**2)>20000): - countcopy+=1 - print_torch_mem("copy") - model_copy(src=fmodel, dst=model) - optim_copy(dopt=diffopt, opt=inner_opt) - val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val) - - #Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False) - fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True) - diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track) - print_torch_mem("copy") - - if(not high_grad_track): - countcopy+=1 - print_torch_mem("end copy") - model_copy(src=fmodel, dst=model) - optim_copy(dopt=diffopt, opt=inner_opt) - val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val) - - #Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False) - fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True) - diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track) - print_torch_mem("end copy") - - - tf = time.process_time() - - - #### Print #### - if(print_freq and epoch%print_freq==0): - print('-'*9) - print('Epoch : %d'%(epoch)) - print('Time : %.00f'%(tf - t0)) - print('Train loss :',loss.item(), '/ val loss', val_loss.item()) - print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start)) - print('TF Proba :', model['data_aug']['prob'].data) - #print('proba grad',model['data_aug']['prob'].grad) - print('TF Mag :', model['data_aug']['mag'].data) - #print('Mag grad',model['data_aug']['mag'].grad) - #print('Reg loss:', model['data_aug'].reg_loss().item()) - #print('Aug loss', aug_loss.item()) - ############# - #### Log #### - #print(type(model['data_aug']) is dataug.Data_augV5) - ''' - param = [{'p': p.item(), 'm':model['data_aug']['mag'].item()} for p in model['data_aug']['prob']] if model['data_aug']._shared_mag else [{'p': p.item(), 'm': m.item()} for p, m in zip(model['data_aug']['prob'], model['data_aug']['mag'])] - data={ - "epoch": epoch, - "train_loss": loss.item(), - "val_loss": val_loss.item(), - "acc": accuracy, - "time": tf - t0, - - "param": param #if isinstance(model['data_aug'], Data_augV5) - #else [p.item() for p in model['data_aug']['prob']], - } - log.append(data) - ''' - ############# - - train_confmat=confmat - lr_scheduler.step( (epoch+1)*len(data_loader) ) - - test_loss, _, test_confmat = evaluate(model, criterion, data_loader_test, device=device) - es(test_loss, model) - - # print('Valid Missed') - # print(valid_missed) - - - # print('Train') - # print(train_confmat) - print('Test') - print(test_confmat) - - # if es.early_stop: - # break - - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=int(total_time))) - print('Training time {}'.format(total_time_str)) - - -def parse_args(): - import argparse - parser = argparse.ArgumentParser(description='PyTorch Classification Training') - - parser.add_argument('--data-path', default='/github/smart_augmentation/salvador/data', help='dataset') - parser.add_argument('--model', default='resnet18', help='model') #'resnet18' - parser.add_argument('--device', default='cuda:0', help='device') - parser.add_argument('-b', '--batch-size', default=8, type=int) - parser.add_argument('--epochs', default=3, type=int, metavar='N', - help='number of total epochs to run') - parser.add_argument('-j', '--workers', default=0, type=int, metavar='N', - help='number of data loading workers (default: 16)') - parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate') - parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') - parser.add_argument('--wd', '--weight-decay', default=4e-5, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') - - parser.add_argument( - "--test-only", - dest="test_only", - help="Only test the model", - action="store_true", - ) - - parser.add_argument('--in_it', '--inner_it', default=0, type=int, - metavar='N', help='higher inner_it', - dest='inner_it') - - args = parser.parse_args() - - return args - - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/Old/salvador/transformations.py b/Old/salvador/transformations.py deleted file mode 100755 index 82a8d9e..0000000 --- a/Old/salvador/transformations.py +++ /dev/null @@ -1,346 +0,0 @@ -import torch -import kornia -import random - -### Available TF for Dataug ### -''' -TF_dict={ #Dataugv4 - ## Geometric TF ## - 'Identity' : (lambda x, mag: x), - 'FlipUD' : (lambda x, mag: flipUD(x)), - 'FlipLR' : (lambda x, mag: flipLR(x)), - 'Rotate': (lambda x, mag: rotate(x, angle=torch.tensor([rand_int(mag, maxval=30)for _ in x], device=x.device))), - 'TranslateX': (lambda x, mag: translate(x, translation=torch.tensor([[rand_int(mag, maxval=20), 0] for _ in x], device=x.device))), - 'TranslateY': (lambda x, mag: translate(x, translation=torch.tensor([[0, rand_int(mag, maxval=20)] for _ in x], device=x.device))), - 'ShearX': (lambda x, mag: shear(x, shear=torch.tensor([[rand_float(mag, maxval=0.3), 0] for _ in x], device=x.device))), - 'ShearY': (lambda x, mag: shear(x, shear=torch.tensor([[0, rand_float(mag, maxval=0.3)] for _ in x], device=x.device))), - - ## Color TF (Expect image in the range of [0, 1]) ## - 'Contrast': (lambda x, mag: contrast(x, contrast_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))), - 'Color':(lambda x, mag: color(x, color_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))), - 'Brightness':(lambda x, mag: brightness(x, brightness_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))), - 'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))), - 'Posterize': (lambda x, mag: posterize(x, bits=torch.tensor([rand_int(mag, minval=4, maxval=8) for _ in x], device=x.device))), - 'Solarize': (lambda x, mag: solarize(x, thresholds=torch.tensor([rand_int(mag,minval=1, maxval=256)/256. for _ in x], device=x.device))) , #=>Image entre [0,1] #Pas opti pour des batch - - #Non fonctionnel - #'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent) - #'Equalize': (lambda mag: None), -} -''' -''' -TF_dict={ #Dataugv5 #AutoAugment - ## Geometric TF ## - 'Identity' : (lambda x, mag: x), - 'FlipUD' : (lambda x, mag: flipUD(x)), - 'FlipLR' : (lambda x, mag: flipLR(x)), - 'Rotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30))), - 'TranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=0))), - 'TranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=1))), - 'ShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=0))), - 'ShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=1))), - - ## Color TF (Expect image in the range of [0, 1]) ## - 'Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))), - 'Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))), - 'Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))), - 'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))), - 'Posterize': (lambda x, mag: posterize(x, bits=rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient - 'Solarize': (lambda x, mag: solarize(x, thresholds=rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1] - - #Non fonctionnel - #'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent) - #'Equalize': (lambda mag: None), -} -''' -TF_dict={ #Dataugv5 - ## Geometric TF ## - 'Identity' : (lambda x, mag: x), - 'FlipUD' : (lambda x, mag: flipUD(x)), - 'FlipLR' : (lambda x, mag: flipLR(x)), - 'Rotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30))), - 'TranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=0))), - 'TranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=1))), - 'ShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=0))), - 'ShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=1))), - - ## Color TF (Expect image in the range of [0, 1]) ## - 'Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))), - 'Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))), - 'Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))), - 'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))), - 'Posterize': (lambda x, mag: posterize(x, bits=rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient - 'Solarize': (lambda x, mag: solarize(x, thresholds=rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1] - - #Color TF (Common mag scale) - '+Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))), - '+Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))), - '+Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))), - '+Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))), - '-Contrast': (lambda x, mag: contrast(x, contrast_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))), - '-Color':(lambda x, mag: color(x, color_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))), - '-Brightness':(lambda x, mag: brightness(x, brightness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))), - '-Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))), - '=Posterize': (lambda x, mag: posterize(x, bits=invScale_rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient - '=Solarize': (lambda x, mag: solarize(x, thresholds=invScale_rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1] - - - 'BRotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30*3))), - 'BTranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20*3), zero_pos=0))), - 'BTranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20*3), zero_pos=1))), - 'BShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3*3), zero_pos=0))), - 'BShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3*3), zero_pos=1))), - - 'BadTranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=20*2, maxval=20*3), zero_pos=0))), - 'BadTranslateX_neg': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-20*3, maxval=-20*2), zero_pos=0))), - 'BadTranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=20*2, maxval=20*3), zero_pos=1))), - 'BadTranslateY_neg': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-20*3, maxval=-20*2), zero_pos=1))), - - 'BadColor':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))), - 'BadSharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))), - 'BadContrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))), - 'BadBrightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))), - - #Non fonctionnel - #'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent) - #'Equalize': (lambda mag: None), -} - -TF_no_mag={'Identity', 'FlipUD', 'FlipLR'} -TF_ignore_mag= TF_no_mag | {'Solarize', 'Posterize'} - -def int_image(float_image): #ATTENTION : legere perte d'info (granularite : 1/256 = 0.0039) - return (float_image*255.).type(torch.uint8) - -def float_image(int_image): - return int_image.type(torch.float)/255. - -#def rand_inverse(value): -# return value if random.random() < 0.5 else -value - -#def rand_int(mag, maxval, minval=None): #[(-maxval,minval), maxval] -# real_max = int_parameter(mag, maxval=maxval) -# if not minval : minval = -real_max -# return random.randint(minval, real_max) - -#def rand_float(mag, maxval, minval=None): #[(-maxval,minval), maxval] -# real_max = float_parameter(mag, maxval=maxval) -# if not minval : minval = -real_max -# return random.uniform(minval, real_max) - -def rand_floats(size, mag, maxval, minval=None): #[(-maxval,minval), maxval] - real_mag = float_parameter(mag, maxval=maxval) - if not minval : minval = -real_mag - #return random.uniform(minval, real_max) - return minval + (real_mag-minval) * torch.rand(size, device=mag.device) #[min_val, real_mag] - -def invScale_rand_floats(size, mag, maxval, minval): - #Mag=[0,PARAMETER_MAX] => [PARAMETER_MAX, 0] = [maxval, minval] - real_mag = float_parameter(float(PARAMETER_MAX) - mag, maxval=maxval-minval)+minval - return real_mag + (maxval-real_mag) * torch.rand(size, device=mag.device) #[real_mag, max_val] - -def zero_stack(tensor, zero_pos): - if zero_pos==0: - return torch.stack((tensor, torch.zeros((tensor.shape[0],), device=tensor.device)), dim=1) - if zero_pos==1: - return torch.stack((torch.zeros((tensor.shape[0],), device=tensor.device), tensor), dim=1) - else: - raise Exception("Invalid zero_pos : ", zero_pos) - -#https://github.com/tensorflow/models/blob/fc2056bce6ab17eabdc139061fef8f4f2ee763ec/research/autoaugment/augmentation_transforms.py#L137 -PARAMETER_MAX = 1 # What is the max 'level' a transform could be predicted -def float_parameter(level, maxval): - """Helper function to scale `val` between 0 and maxval . - Args: - level: Level of the operation that will be between [0, `PARAMETER_MAX`]. - maxval: Maximum value that the operation can have. This will be scaled - to level/PARAMETER_MAX. - Returns: - A float that results from scaling `maxval` according to `level`. - """ - - #return float(level) * maxval / PARAMETER_MAX - return (level * maxval / PARAMETER_MAX)#.to(torch.float) - -#def int_parameter(level, maxval): #Perte de gradient - """Helper function to scale `val` between 0 and maxval . - Args: - level: Level of the operation that will be between [0, `PARAMETER_MAX`]. - maxval: Maximum value that the operation can have. This will be scaled - to level/PARAMETER_MAX. - Returns: - An int that results from scaling `maxval` according to `level`. - """ - #return int(level * maxval / PARAMETER_MAX) -# return (level * maxval / PARAMETER_MAX) - -def flipLR(x): - device = x.device - (batch_size, channels, h, w) = x.shape - - M =torch.tensor( [[[-1., 0., w-1], - [ 0., 1., 0.], - [ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1) - - # warp the original image by the found transform - return kornia.warp_perspective(x, M, dsize=(h, w)) - -def flipUD(x): - device = x.device - (batch_size, channels, h, w) = x.shape - - M =torch.tensor( [[[ 1., 0., 0.], - [ 0., -1., h-1], - [ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1) - - # warp the original image by the found transform - return kornia.warp_perspective(x, M, dsize=(h, w)) - -def rotate(x, angle): - return kornia.rotate(x, angle=angle.type(torch.float)) #Kornia ne supporte pas les int - -def translate(x, translation): - #print(translation) - return kornia.translate(x, translation=translation.type(torch.float)) #Kornia ne supporte pas les int - -def shear(x, shear): - return kornia.shear(x, shear=shear) - -def contrast(x, contrast_factor): - return kornia.adjust_contrast(x, contrast_factor=contrast_factor) #Expect image in the range of [0, 1] - -#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageEnhance.py -def color(x, color_factor): - (batch_size, channels, h, w) = x.shape - - gray_x = kornia.rgb_to_grayscale(x) - gray_x = gray_x.repeat_interleave(channels, dim=1) - return blend(gray_x, x, color_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1] - -def brightness(x, brightness_factor): - device = x.device - - return blend(torch.zeros(x.size(), device=device), x, brightness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1] - -def sharpeness(x, sharpness_factor): - device = x.device - (batch_size, channels, h, w) = x.shape - - k = torch.tensor([[[ 1., 1., 1.], - [ 1., 5., 1.], - [ 1., 1., 1.]]], device=device) #Smooth Filter : https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageFilter.py - smooth_x = kornia.filter2D(x, kernel=k, border_type='reflect', normalized=True) #Peut etre necessaire de s'occuper du channel Alhpa differement - - return blend(smooth_x, x, sharpness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1] - -#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py -def posterize(x, bits): - bits = bits.type(torch.uint8) #Perte du gradient - x = int_image(x) #Expect image in the range of [0, 1] - - mask = ~(2 ** (8 - bits) - 1).type(torch.uint8) - - (batch_size, channels, h, w) = x.shape - mask = mask.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ... - - return float_image(x & mask) - -def auto_contrast(x): #PAS OPTIMISE POUR DES BATCH #EXTRA LENT - # Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel - print("Warning : Pas encore check !") - (batch_size, channels, h, w) = x.shape - x = int_image(x) #Expect image in the range of [0, 1] - #print('Start',x[0]) - for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image - #print(img.shape) - for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel - #print(chan.shape) - hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE - - # find lowest/highest samples after preprocessing - for lo in range(256): - if hist[lo]: - break - for hi in range(255, -1, -1): - if hist[hi]: - break - if hi <= lo: - # don't bother - pass - else: - scale = 255.0 / (hi - lo) - offset = -lo * scale - for ix in range(256): - n_ix = int(ix * scale + offset) - if n_ix < 0: n_ix = 0 - elif n_ix > 255: n_ix = 255 - - chan[chan==ix]=n_ix - x[im_idx, chan_idx]=chan - - #print('End',x[0]) - return float_image(x) - -def equalize(x): #PAS OPTIMISE POUR DES BATCH - raise Exception(self, "not implemented") - # Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel - (batch_size, channels, h, w) = x.shape - x = int_image(x) #Expect image in the range of [0, 1] - #print('Start',x[0]) - for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image - #print(img.shape) - for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel - #print(chan.shape) - hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE - - return float_image(x) - -def solarize(x, thresholds): - batch_size, channels, h, w = x.shape - #imgs=[] - #for idx, t in enumerate(thresholds): #Operation par image - # mask = x[idx] > t #Perte du gradient - #In place - # inv_x = 1-x[idx][mask] - # x[idx][mask]=inv_x - # - - #Out of place - # im = x[idx] - # inv_x = 1-im[mask] - - # imgs.append(im.masked_scatter(mask,inv_x)) - - #idxs=torch.tensor(range(x.shape[0]), device=x.device) - #idxs=idxs.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ... - #x=x.scatter(dim=0, index=idxs, src=torch.stack(imgs)) - # - - thresholds = thresholds.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ... - #print(thresholds.grad_fn) - x=torch.where(x>thresholds,1-x, x) - #print(mask.grad_fn) - - #x=x.min(thresholds) - #inv_x = 1-x[mask] - #x=x.where(x= 0) & (a < n) - inds = n * a[k].to(torch.int64) + b[k] - self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n) - - def reset(self): - self.mat.zero_() - - def compute(self): - h = self.mat.float() - acc_global = torch.diag(h).sum() / h.sum() - acc = torch.diag(h) / h.sum(1) - return acc_global, acc - - - def __str__(self): - acc_global, acc = self.compute() - return ( - 'global correct: {:.1f}\n' - 'average row correct: {}').format( - acc_global.item() * 100, - ['{:.1f}'.format(i) for i in (acc * 100).tolist()]) - - -class MetricLogger(object): - def __init__(self, delimiter="\t"): - self.meters = defaultdict(SmoothedValue) - self.delimiter = delimiter - - def update(self, **kwargs): - for k, v in kwargs.items(): - if isinstance(v, torch.Tensor): - v = v.item() - assert isinstance(v, (float, int)) - self.meters[k].update(v) - - def __getattr__(self, attr): - if attr in self.meters: - return self.meters[attr] - if attr in self.__dict__: - return self.__dict__[attr] - raise AttributeError("'{}' object has no attribute '{}'".format( - type(self).__name__, attr)) - - def __str__(self): - loss_str = [] - for name, meter in self.meters.items(): - loss_str.append( - "{}: {}".format(name, str(meter)) - ) - return self.delimiter.join(loss_str) - - - def add_meter(self, name, meter): - self.meters[name] = meter - - def log_every(self, iterable, parent, header=None, **kwargs): - if not header: - header = '' - log_msg = self.delimiter.join([ - '{meters}' - ]) - - progrss = progress_bar(iterable, parent=parent, **kwargs) - - for idx, obj in enumerate(progrss): - yield idx, obj - progrss.comment = log_msg.format( - meters=str(self)) - - print('{header} {meters}'.format(header=header, meters=str(self))) - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target[None]) - - res = [] - for k in topk: - correct_k = correct[:k].flatten().sum(dtype=torch.float32) - res.append(correct_k * (100.0 / batch_size)) - return res - -class EarlyStopping: - """Early stops the training if validation loss doesn't improve after a given patience.""" - def __init__(self, patience=7, verbose=False, delta=0, augmented_model=False): - """ - Args: - patience (int): How long to wait after last time validation loss improved. - Default: 7 - verbose (bool): If True, prints a message for each validation loss improvement. - Default: False - delta (float): Minimum change in the monitored quantity to qualify as an improvement. - Default: 0 - """ - self.patience = patience - self.verbose = verbose - self.counter = 0 - self.best_score = None - self.early_stop = False - self.val_loss_min = np.Inf - self.delta = delta - - self.augmented_model = augmented_model - - def __call__(self, val_loss, model): - - score = -val_loss - - if self.best_score is None: - self.best_score = score - self.save_checkpoint(val_loss, model) - elif score < self.best_score - self.delta: - self.counter += 1 - # print(f'EarlyStopping counter: {self.counter} out of {self.patience}') - # if self.counter >= self.patience: - # self.early_stop = True - else: - self.best_score = score - self.save_checkpoint(val_loss, model) - self.counter = 0 - - def save_checkpoint(self, val_loss, model): - '''Saves model when validation loss decrease.''' - if self.verbose: - print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') - torch.save(model.state_dict(), 'checkpoint.pt') if not self.augmented_model else torch.save(model['model'].state_dict(), 'checkpoint.pt') - self.val_loss_min = val_loss \ No newline at end of file