diff --git a/Old/FAR-HO/augmentation_transforms.py b/Old/FAR-HO/augmentation_transforms.py
deleted file mode 100755
index ef17188..0000000
--- a/Old/FAR-HO/augmentation_transforms.py
+++ /dev/null
@@ -1,456 +0,0 @@
-# Copyright 2018 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Transforms used in the Augmentation Policies."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import inspect
-import random
-import numpy as np
-# pylint:disable=g-multiple-import
-from PIL import ImageOps, ImageEnhance, ImageFilter, Image
-# pylint:enable=g-multiple-import
-
-
-IMAGE_SIZE = 28
-# What is the dataset mean and std of the images on the training set
-MEANS = [0.49139968, 0.48215841, 0.44653091]
-STDS = [0.24703223, 0.24348513, 0.26158784]
-PARAMETER_MAX = 10 # What is the max 'level' a transform could be predicted
-
-
-def random_flip(x):
- """Flip the input x horizontally with 50% probability."""
- if np.random.rand(1)[0] > 0.5:
- return np.fliplr(x)
- return x
-
-
-def zero_pad_and_crop(img, amount=4):
- """Zero pad by `amount` zero pixels on each side then take a random crop.
-
- Args:
- img: numpy image that will be zero padded and cropped.
- amount: amount of zeros to pad `img` with horizontally and verically.
-
- Returns:
- The cropped zero padded img. The returned numpy array will be of the same
- shape as `img`.
- """
- padded_img = np.zeros((img.shape[0] + amount * 2, img.shape[1] + amount * 2,
- img.shape[2]))
- padded_img[amount:img.shape[0] + amount, amount:
- img.shape[1] + amount, :] = img
- top = np.random.randint(low=0, high=2 * amount)
- left = np.random.randint(low=0, high=2 * amount)
- new_img = padded_img[top:top + img.shape[0], left:left + img.shape[1], :]
- return new_img
-
-
-def create_cutout_mask(img_height, img_width, num_channels, size):
- """Creates a zero mask used for cutout of shape `img_height` x `img_width`.
-
- Args:
- img_height: Height of image cutout mask will be applied to.
- img_width: Width of image cutout mask will be applied to.
- num_channels: Number of channels in the image.
- size: Size of the zeros mask.
-
- Returns:
- A mask of shape `img_height` x `img_width` with all ones except for a
- square of zeros of shape `size` x `size`. This mask is meant to be
- elementwise multiplied with the original image. Additionally returns
- the `upper_coord` and `lower_coord` which specify where the cutout mask
- will be applied.
- """
- assert img_height == img_width
-
- # Sample center where cutout mask will be applied
- height_loc = np.random.randint(low=0, high=img_height)
- width_loc = np.random.randint(low=0, high=img_width)
-
- # Determine upper right and lower left corners of patch
- upper_coord = (max(0, height_loc - size // 2), max(0, width_loc - size // 2))
- lower_coord = (min(img_height, height_loc + size // 2),
- min(img_width, width_loc + size // 2))
- mask_height = lower_coord[0] - upper_coord[0]
- mask_width = lower_coord[1] - upper_coord[1]
- assert mask_height > 0
- assert mask_width > 0
-
- mask = np.ones((img_height, img_width, num_channels))
- zeros = np.zeros((mask_height, mask_width, num_channels))
- mask[upper_coord[0]:lower_coord[0], upper_coord[1]:lower_coord[1], :] = (
- zeros)
- return mask, upper_coord, lower_coord
-
-
-def cutout_numpy(img, size=16):
- """Apply cutout with mask of shape `size` x `size` to `img`.
-
- The cutout operation is from the paper https://arxiv.org/abs/1708.04552.
- This operation applies a `size`x`size` mask of zeros to a random location
- within `img`.
-
- Args:
- img: Numpy image that cutout will be applied to.
- size: Height/width of the cutout mask that will be
-
- Returns:
- A numpy tensor that is the result of applying the cutout mask to `img`.
- """
- img_height, img_width, num_channels = (img.shape[0], img.shape[1],
- img.shape[2])
- assert len(img.shape) == 3
- mask, _, _ = create_cutout_mask(img_height, img_width, num_channels, size)
- return img * mask
-
-
-def float_parameter(level, maxval):
- """Helper function to scale `val` between 0 and maxval .
-
- Args:
- level: Level of the operation that will be between [0, `PARAMETER_MAX`].
- maxval: Maximum value that the operation can have. This will be scaled
- to level/PARAMETER_MAX.
-
- Returns:
- A float that results from scaling `maxval` according to `level`.
- """
- return float(level) * maxval / PARAMETER_MAX
-
-
-def int_parameter(level, maxval):
- """Helper function to scale `val` between 0 and maxval .
-
- Args:
- level: Level of the operation that will be between [0, `PARAMETER_MAX`].
- maxval: Maximum value that the operation can have. This will be scaled
- to level/PARAMETER_MAX.
-
- Returns:
- An int that results from scaling `maxval` according to `level`.
- """
- return int(level * maxval / PARAMETER_MAX)
-
-
-def pil_wrap(img):
- """Convert the `img` numpy tensor to a PIL Image."""
- return Image.fromarray(
- np.uint8((img * STDS + MEANS) * 255.0)).convert('RGBA')
-
-
-def pil_unwrap(pil_img):
- """Converts the PIL img to a numpy array."""
- pic_array = (np.array(pil_img.getdata()).reshape((IMAGE_SIZE, IMAGE_SIZE, 4)) / 255.0)
- i1, i2 = np.where(pic_array[:, :, 3] == 0)
- pic_array = (pic_array[:, :, :3] - MEANS) / STDS
- pic_array[i1, i2] = [0, 0, 0]
- return pic_array
-
-
-def apply_policy(policy, img):
- """Apply the `policy` to the numpy `img`.
-
- Args:
- policy: A list of tuples with the form (name, probability, level) where
- `name` is the name of the augmentation operation to apply, `probability`
- is the probability of applying the operation and `level` is what strength
- the operation to apply.
- img: Numpy image that will have `policy` applied to it.
-
- Returns:
- The result of applying `policy` to `img`.
- """
- #print('img shape :',img.shape)
- #print('Policy len :',len(policy))
- pil_img = pil_wrap(img)
- for xform in policy:
- #print('xform :', len(xform))
- assert len(xform) == 3
- name, probability, level = xform
- #xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability, level)
- xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability.eval(), level)
- pil_img = xform_fn(pil_img)
- return pil_unwrap(pil_img)
-
-
-class TransformFunction(object):
- """Wraps the Transform function for pretty printing options."""
-
- def __init__(self, func, name):
- self.f = func
- self.name = name
-
- def __repr__(self):
- return '<' + self.name + '>'
-
- def __call__(self, pil_img):
- return self.f(pil_img)
-
-
-class TransformT(object):
- """Each instance of this class represents a specific transform."""
-
- def __init__(self, name, xform_fn):
- self.name = name
- self.xform = xform_fn
-
- def pil_transformer(self, probability, level):
-
- def return_function(im):
- if random.random() < probability:
- im = self.xform(im, level)
- return im
-
- name = self.name + '({:.1f},{})'.format(probability, level)
- return TransformFunction(return_function, name)
-
- def do_transform(self, image, level):
- f = self.pil_transformer(PARAMETER_MAX, level)
- return pil_unwrap(f(pil_wrap(image)))
-
-
-################## Transform Functions ##################
-identity = TransformT('identity', lambda pil_img, level: pil_img)
-flip_lr = TransformT(
- 'FlipLR',
- lambda pil_img, level: pil_img.transpose(Image.FLIP_LEFT_RIGHT))
-flip_ud = TransformT(
- 'FlipUD',
- lambda pil_img, level: pil_img.transpose(Image.FLIP_TOP_BOTTOM))
-# pylint:disable=g-long-lambda
-auto_contrast = TransformT(
- 'AutoContrast',
- lambda pil_img, level: ImageOps.autocontrast(
- pil_img.convert('RGB')).convert('RGBA'))
-equalize = TransformT(
- 'Equalize',
- lambda pil_img, level: ImageOps.equalize(
- pil_img.convert('RGB')).convert('RGBA'))
-invert = TransformT(
- 'Invert',
- lambda pil_img, level: ImageOps.invert(
- pil_img.convert('RGB')).convert('RGBA'))
-# pylint:enable=g-long-lambda
-blur = TransformT(
- 'Blur', lambda pil_img, level: pil_img.filter(ImageFilter.BLUR))
-smooth = TransformT(
- 'Smooth',
- lambda pil_img, level: pil_img.filter(ImageFilter.SMOOTH))
-
-
-def _rotate_impl(pil_img, level):
- """Rotates `pil_img` from -30 to 30 degrees depending on `level`."""
- degrees = int_parameter(level, 30)
- if random.random() > 0.5:
- degrees = -degrees
- return pil_img.rotate(degrees)
-
-
-rotate = TransformT('Rotate', _rotate_impl)
-
-
-def _posterize_impl(pil_img, level):
- """Applies PIL Posterize to `pil_img`."""
- level = int_parameter(level, 4)
- return ImageOps.posterize(pil_img.convert('RGB'), 4 - level).convert('RGBA')
-
-
-posterize = TransformT('Posterize', _posterize_impl)
-
-
-def _shear_x_impl(pil_img, level):
- """Applies PIL ShearX to `pil_img`.
-
- The ShearX operation shears the image along the horizontal axis with `level`
- magnitude.
-
- Args:
- pil_img: Image in PIL object.
- level: Strength of the operation specified as an Integer from
- [0, `PARAMETER_MAX`].
-
- Returns:
- A PIL Image that has had ShearX applied to it.
- """
- level = float_parameter(level, 0.3)
- if random.random() > 0.5:
- level = -level
- return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, level, 0, 0, 1, 0))
-
-
-shear_x = TransformT('ShearX', _shear_x_impl)
-
-
-def _shear_y_impl(pil_img, level):
- """Applies PIL ShearY to `pil_img`.
-
- The ShearY operation shears the image along the vertical axis with `level`
- magnitude.
-
- Args:
- pil_img: Image in PIL object.
- level: Strength of the operation specified as an Integer from
- [0, `PARAMETER_MAX`].
-
- Returns:
- A PIL Image that has had ShearX applied to it.
- """
- level = float_parameter(level, 0.3)
- if random.random() > 0.5:
- level = -level
- return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, level, 1, 0))
-
-
-shear_y = TransformT('ShearY', _shear_y_impl)
-
-
-def _translate_x_impl(pil_img, level):
- """Applies PIL TranslateX to `pil_img`.
-
- Translate the image in the horizontal direction by `level`
- number of pixels.
-
- Args:
- pil_img: Image in PIL object.
- level: Strength of the operation specified as an Integer from
- [0, `PARAMETER_MAX`].
-
- Returns:
- A PIL Image that has had TranslateX applied to it.
- """
- level = int_parameter(level, 10)
- if random.random() > 0.5:
- level = -level
- return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, level, 0, 1, 0))
-
-
-translate_x = TransformT('TranslateX', _translate_x_impl)
-
-
-def _translate_y_impl(pil_img, level):
- """Applies PIL TranslateY to `pil_img`.
-
- Translate the image in the vertical direction by `level`
- number of pixels.
-
- Args:
- pil_img: Image in PIL object.
- level: Strength of the operation specified as an Integer from
- [0, `PARAMETER_MAX`].
-
- Returns:
- A PIL Image that has had TranslateY applied to it.
- """
- level = int_parameter(level, 10)
- if random.random() > 0.5:
- level = -level
- return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, 0, 1, level))
-
-
-translate_y = TransformT('TranslateY', _translate_y_impl)
-
-
-def _crop_impl(pil_img, level, interpolation=Image.BILINEAR):
- """Applies a crop to `pil_img` with the size depending on the `level`."""
- cropped = pil_img.crop((level, level, IMAGE_SIZE - level, IMAGE_SIZE - level))
- resized = cropped.resize((IMAGE_SIZE, IMAGE_SIZE), interpolation)
- return resized
-
-
-crop_bilinear = TransformT('CropBilinear', _crop_impl)
-
-
-def _solarize_impl(pil_img, level):
- """Applies PIL Solarize to `pil_img`.
-
- Translate the image in the vertical direction by `level`
- number of pixels.
-
- Args:
- pil_img: Image in PIL object.
- level: Strength of the operation specified as an Integer from
- [0, `PARAMETER_MAX`].
-
- Returns:
- A PIL Image that has had Solarize applied to it.
- """
- level = int_parameter(level, 256)
- return ImageOps.solarize(pil_img.convert('RGB'), 256 - level).convert('RGBA')
-
-
-solarize = TransformT('Solarize', _solarize_impl)
-
-
-def _cutout_pil_impl(pil_img, level):
- """Apply cutout to pil_img at the specified level."""
- size = int_parameter(level, 20)
- if size <= 0:
- return pil_img
- img_height, img_width, num_channels = (IMAGE_SIZE, IMAGE_SIZE, 3)
- _, upper_coord, lower_coord = (
- create_cutout_mask(img_height, img_width, num_channels, size))
- pixels = pil_img.load() # create the pixel map
- for i in range(upper_coord[0], lower_coord[0]): # for every col:
- for j in range(upper_coord[1], lower_coord[1]): # For every row
- pixels[i, j] = (125, 122, 113, 0) # set the colour accordingly
- return pil_img
-
-cutout = TransformT('Cutout', _cutout_pil_impl)
-
-
-def _enhancer_impl(enhancer):
- """Sets level to be between 0.1 and 1.8 for ImageEnhance transforms of PIL."""
- def impl(pil_img, level):
- v = float_parameter(level, 1.8) + .1 # going to 0 just destroys it
- return enhancer(pil_img).enhance(v)
- return impl
-
-
-color = TransformT('Color', _enhancer_impl(ImageEnhance.Color))
-contrast = TransformT('Contrast', _enhancer_impl(ImageEnhance.Contrast))
-brightness = TransformT('Brightness', _enhancer_impl(
- ImageEnhance.Brightness))
-sharpness = TransformT('Sharpness', _enhancer_impl(ImageEnhance.Sharpness))
-
-ALL_TRANSFORMS = [
- flip_lr,
- flip_ud,
- auto_contrast,
- equalize,
- invert,
- rotate,
- posterize,
- crop_bilinear,
- solarize,
- color,
- contrast,
- brightness,
- sharpness,
- shear_x,
- shear_y,
- translate_x,
- translate_y,
- cutout,
- blur,
- smooth
-]
-
-NAME_TO_TRANSFORM = {t.name: t for t in ALL_TRANSFORMS}
-TRANSFORM_NAMES = NAME_TO_TRANSFORM.keys()
diff --git a/Old/FAR-HO/blue_utils.py b/Old/FAR-HO/blue_utils.py
deleted file mode 100755
index 59ee62e..0000000
--- a/Old/FAR-HO/blue_utils.py
+++ /dev/null
@@ -1,131 +0,0 @@
-import matplotlib.pyplot as plt
-from far_ho.examples.datasets import Datasets, Dataset
-
-import os
-import numpy as np
-import tensorflow as tf
-
-import augmentation_transforms as augmentation_transforms ##### ATTENTION FICHIER EN DOUBLE => A REGLER MIEUX ####
-
-def viz_data(dataset, fig_name='data_sample',aug_policy=None):
-
- plt.figure(figsize=(10,10))
- for i in range(25):
- plt.subplot(5,5,i+1)
- plt.xticks([])
- plt.yticks([])
- plt.grid(False)
-
- img = dataset.data[i][:,:,0]
- if aug_policy :
- img = augment_img(img,aug_policy)
- #print('im shape',img.shape)
- plt.imshow(img, cmap=plt.cm.binary)
- plt.xlabel(np.nonzero(dataset.target[i])[0].item())
-
- plt.savefig(fig_name)
-
-def augment_img(data, policy):
-
- #print('Im shape',data.shape)
- data = np.stack((data,)*3, axis=-1) #BOF BOF juste pour forcer 3 channels
- #print('Im shape',data.shape)
- final_img = augmentation_transforms.apply_policy(policy, data)
- #final_img = augmentation_transforms.random_flip(augmentation_transforms.zero_pad_and_crop(final_img, 4))
- # Apply cutout
- #final_img = augmentation_transforms.cutout_numpy(final_img)
-
- im_rgb = np.array(final_img, np.float32)
- im_gray = np.dot(im_rgb[...,:3], [0.2989, 0.5870, 0.1140]) #Just pour retourner a 1 channel
-
- return im_gray
-
-
-### https://www.kaggle.com/raoulma/mnist-image-class-tensorflow-cnn-99-51-test-acc#5.-Build-the-neural-network-with-tensorflow-
-## build the neural network class
-# weight initialization
-def weight_variable(shape, name = None):
- initial = tf.truncated_normal(shape, stddev=0.1)
- return tf.Variable(initial, name = name)
-
-# bias initialization
-def bias_variable(shape, name = None):
- initial = tf.constant(0.1, shape=shape) # positive bias
- return tf.Variable(initial, name = name)
-
-# 2D convolution
-def conv2d(x, W, name = None):
- return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name = name)
-
-# max pooling
-def max_pool_2x2(x, name = None):
- return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
- padding='SAME', name = name)
-
-def cnn(x_data_tf,y_data_tf, name='model'):
- # tunable hyperparameters for nn architecture
- s_f_conv1 = 3; # filter size of first convolution layer (default = 3)
- n_f_conv1 = 36; # number of features of first convolution layer (default = 36)
- s_f_conv2 = 3; # filter size of second convolution layer (default = 3)
- n_f_conv2 = 36; # number of features of second convolution layer (default = 36)
- s_f_conv3 = 3; # filter size of third convolution layer (default = 3)
- n_f_conv3 = 36; # number of features of third convolution layer (default = 36)
- n_n_fc1 = 576; # number of neurons of first fully connected layer (default = 576)
-
- # 1.layer: convolution + max pooling
- W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, 1, n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
- b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
- h_conv1_tf = tf.nn.relu(conv2d(x_data_tf,
- W_conv1_tf) + b_conv1_tf,
- name = 'h_conv1_tf') # (.,28,28,32)
- h_pool1_tf = max_pool_2x2(h_conv1_tf,
- name = 'h_pool1_tf') # (.,14,14,32)
-
- # 2.layer: convolution + max pooling
- W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2,
- n_f_conv1, n_f_conv2],
- name = 'W_conv2_tf')
- b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
- h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf,
- W_conv2_tf) + b_conv2_tf,
- name ='h_conv2_tf') #(.,14,14,32)
- h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
-
- # 3.layer: convolution + max pooling
- W_conv3_tf = weight_variable([s_f_conv3, s_f_conv3,
- n_f_conv2, n_f_conv3],
- name = 'W_conv3_tf')
- b_conv3_tf = bias_variable([n_f_conv3], name = 'b_conv3_tf')
- h_conv3_tf = tf.nn.relu(conv2d(h_pool2_tf,
- W_conv3_tf) + b_conv3_tf,
- name = 'h_conv3_tf') #(.,7,7,32)
- h_pool3_tf = max_pool_2x2(h_conv3_tf,
- name = 'h_pool3_tf') # (.,4,4,32)
-
- # 4.layer: fully connected
- W_fc1_tf = weight_variable([4*4*n_f_conv3,n_n_fc1],
- name = 'W_fc1_tf') # (4*4*32, 1024)
- b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
- h_pool3_flat_tf = tf.reshape(h_pool3_tf, [-1,4*4*n_f_conv3],
- name = 'h_pool3_flat_tf') # (.,1024)
- h_fc1_tf = tf.nn.relu(tf.matmul(h_pool3_flat_tf,
- W_fc1_tf) + b_fc1_tf,
- name = 'h_fc1_tf') # (.,1024)
-
- # add dropout
- #keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
- #h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
-
- # 5.layer: fully connected
- W_fc2_tf = weight_variable([n_n_fc1, 10], name = 'W_fc2_tf')
- b_fc2_tf = bias_variable([10], name = 'b_fc2_tf')
- z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf),
- b_fc2_tf, name = 'z_pred_tf')# => (.,10)
- # predicted probabilities in one-hot encoding
- y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf')
-
- # tensor of correct predictions
- y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
- tf.argmax(y_data_tf, 1),
- name = 'y_pred_correct_tf')
- return y_pred_proba_tf
\ No newline at end of file
diff --git a/Old/FAR-HO/far_pba_cifar.py b/Old/FAR-HO/far_pba_cifar.py
deleted file mode 100755
index 60dc509..0000000
--- a/Old/FAR-HO/far_pba_cifar.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#https://github.com/arcelien/pba/blob/master/autoaugment/train_cifar.py
-from __future__ import absolute_import, print_function, division
-
-import os
-import numpy as np
-import tensorflow as tf
-#import tensorflow.contrib.layers as layers
-import far_ho as far
-import far_ho.examples as far_ex
-#import pprint
-
-import autoaugment.augmentation_transforms as augmentation_transforms
-#import autoaugment.policies as found_policies
-from autoaugment.wrn import build_wrn_model
-
-
-def build_model(inputs, num_classes, is_training, hparams):
- """Constructs the vision model being trained/evaled.
- Args:
- inputs: input features/images being fed to the image model build built.
- num_classes: number of output classes being predicted.
- is_training: is the model training or not.
- hparams: additional hyperparameters associated with the image model.
- Returns:
- The logits of the image model.
- """
- scopes = setup_arg_scopes(is_training)
- with contextlib.nested(*scopes):
- if hparams.model_name == 'pyramid_net':
- logits = build_shake_drop_model(
- inputs, num_classes, is_training)
- elif hparams.model_name == 'wrn':
- logits = build_wrn_model(
- inputs, num_classes, hparams.wrn_size)
- elif hparams.model_name == 'shake_shake':
- logits = build_shake_shake_model(
- inputs, num_classes, hparams, is_training)
- return logits
-
-
-class CifarModel(object):
- """Builds an image model for Cifar10/Cifar100."""
-
- def __init__(self, hparams):
- self.hparams = hparams
-
- def build(self, mode):
- """Construct the cifar model."""
- assert mode in ['train', 'eval']
- self.mode = mode
- self._setup_misc(mode)
- self._setup_images_and_labels()
- self._build_graph(self.images, self.labels, mode)
-
- self.init = tf.group(tf.global_variables_initializer(),
- tf.local_variables_initializer())
-
- def _setup_misc(self, mode):
- """Sets up miscellaneous in the cifar model constructor."""
- self.lr_rate_ph = tf.Variable(0.0, name='lrn_rate', trainable=False)
- self.reuse = None if (mode == 'train') else True
- self.batch_size = self.hparams.batch_size
- if mode == 'eval':
- self.batch_size = 25
-
- def _setup_images_and_labels(self):
- """Sets up image and label placeholders for the cifar model."""
- if FLAGS.dataset == 'cifar10':
- self.num_classes = 10
- else:
- self.num_classes = 100
- self.images = tf.placeholder(tf.float32, [self.batch_size, 32, 32, 3])
- self.labels = tf.placeholder(tf.float32,
- [self.batch_size, self.num_classes])
-
- def assign_epoch(self, session, epoch_value):
- session.run(self._epoch_update, feed_dict={self._new_epoch: epoch_value})
-
- def _build_graph(self, images, labels, mode):
- """Constructs the TF graph for the cifar model.
- Args:
- images: A 4-D image Tensor
- labels: A 2-D labels Tensor.
- mode: string indicating training mode ( e.g., 'train', 'valid', 'test').
- """
- is_training = 'train' in mode
- if is_training:
- self.global_step = tf.train.get_or_create_global_step()
-
- logits = build_model(
- images,
- self.num_classes,
- is_training,
- self.hparams)
- self.predictions, self.cost = helper_utils.setup_loss(
- logits, labels)
- self.accuracy, self.eval_op = tf.metrics.accuracy(
- tf.argmax(labels, 1), tf.argmax(self.predictions, 1))
- self._calc_num_trainable_params()
-
- # Adds L2 weight decay to the cost
- self.cost = helper_utils.decay_weights(self.cost,
- self.hparams.weight_decay_rate)
- #### Attention: differe implem originale
-
- self.init = tf.group(tf.global_variables_initializer(),
- tf.local_variables_initializer())
-
-
-########################################################
-
-######## PBA ############
-
-#Parallele Cifar model trainer
-tf.flags.DEFINE_string('model_name', 'wrn',
- 'wrn, shake_shake_32, shake_shake_96, shake_shake_112, '
- 'pyramid_net')
-tf.flags.DEFINE_string('checkpoint_dir', '/tmp/training', 'Training Directory.')
-tf.flags.DEFINE_string('data_path', '/tmp/data',
- 'Directory where dataset is located.')
-tf.flags.DEFINE_string('dataset', 'cifar10',
- 'Dataset to train with. Either cifar10 or cifar100')
-tf.flags.DEFINE_integer('use_cpu', 1, '1 if use CPU, else GPU.')
-## ???
-
-FLAGS = tf.flags.FLAGS
-FLAGS.dataset
-FLAGS.data_path
-FLAGS.model_name = 'wrn'
-
-hparams = tf.contrib.training.HParams(
- train_size=50000,
- validation_size=0,
- eval_test=1,
- dataset=FLAGS.dataset,
- data_path=FLAGS.data_path,
- batch_size=128,
- gradient_clipping_by_global_norm=5.0)
- if FLAGS.model_name == 'wrn':
- hparams.add_hparam('model_name', 'wrn')
- hparams.add_hparam('num_epochs', 200)
- hparams.add_hparam('wrn_size', 160)
- hparams.add_hparam('lr', 0.1)
- hparams.add_hparam('weight_decay_rate', 5e-4)
-
-data_loader = data_utils.DataSet(hparams)
-data_loader.reset()
-
-with tf.Graph().as_default(): #, tf.device('/cpu:0' if FLAGS.use_cpu else '/gpu:0'):
-"""Builds the image models for train and eval."""
- # Determine if we should build the train and eval model. When using
- # distributed training we only want to build one or the other and not both.
- with tf.variable_scope('model', use_resource=False):
- m = CifarModel(self.hparams)
- m.build('train')
- #self._num_trainable_params = m.num_trainable_params
- #self._saver = m.saver
- #with tf.variable_scope('model', reuse=True, use_resource=False):
- # meval = CifarModel(self.hparams)
- # meval.build('eval')
-
-
-##### FAR-HO ####
-for _ in range(n_hyper_iterations):
-
-
diff --git a/Old/FAR-HO/test.py b/Old/FAR-HO/test.py
deleted file mode 100755
index 3364c00..0000000
--- a/Old/FAR-HO/test.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import os
-import numpy as np
-import tensorflow as tf
-import tensorflow.contrib.layers as layers
-import far_ho as far
-import far_ho.examples as far_ex
-import matplotlib.pyplot as plt
-
-sess = tf.InteractiveSession()
-
-
-def get_data():
- # load a small portion of mnist data
- datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=(.1, .1,))
- return datasets.train, datasets.validation
-
-
-def g_logits(x,y):
- with tf.variable_scope('model'):
- h1 = layers.fully_connected(x, 300)
- logits = layers.fully_connected(h1, int(y.shape[1]))
- return logits
-
-
-x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
-y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
-logits = g_logits(x,y)
-train_set, validation_set = get_data()
-
-lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
-lr = far.get_hyperparameter('lr', initializer=0.01)
-
-ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
-L = tf.reduce_mean(tf.sigmoid(lambdas)*ce)
-E = tf.reduce_mean(ce)
-
-accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
-
-inner_optimizer = far.GradientDescentOptimizer(lr)
-outer_optimizer = tf.train.AdamOptimizer()
-rev_it =10
-hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
-hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
-
-T = 20 # Number of inner iterations
-train_set_supplier = train_set.create_supplier(x, y)
-validation_set_supplier = validation_set.create_supplier(x, y)
-tf.global_variables_initializer().run()
-
-print('inner:', L.eval(train_set_supplier()))
-print('outer:', E.eval(validation_set_supplier()))
-# print('-'*50)
-n_hyper_iterations = 200
-inner_losses = []
-outer_losses = []
-train_accs = []
-val_accs = []
-
-for _ in range(n_hyper_iterations):
- hyper_step(T,
- inner_objective_feed_dicts=train_set_supplier,
- outer_objective_feed_dicts=validation_set_supplier)
-
- inner_obj = L.eval(train_set_supplier())
- outer_obj = E.eval(validation_set_supplier())
- inner_losses.append(inner_obj)
- outer_losses.append(outer_obj)
- print('inner:', inner_obj)
- print('outer:', outer_obj)
-
- train_acc = accuracy.eval(train_set_supplier())
- val_acc = accuracy.eval(validation_set_supplier())
- train_accs.append(train_acc)
- val_accs.append(val_acc)
- print('training accuracy', train_acc)
- print('validation accuracy', val_acc)
-
- print('learning rate', lr.eval())
- print('norm of examples weight', tf.norm(lambdas).eval())
- print('-'*50)
-
-plt.subplot(211)
-plt.plot(inner_losses, label='training loss')
-plt.plot(outer_losses, label='validation loss')
-plt.legend(loc=0, frameon=True)
-#plt.xlim(0, 19)
-plt.subplot(212)
-plt.plot(train_accs, label='training accuracy')
-plt.plot(val_accs, label='validation accuracy')
-plt.legend(loc=0, frameon=True)
-
-plt.savefig('H%d - I%d - R%d'%(n_hyper_iterations,T,rev_it))
diff --git a/Old/FAR-HO/test_cnn.py b/Old/FAR-HO/test_cnn.py
deleted file mode 100755
index ffbcb8d..0000000
--- a/Old/FAR-HO/test_cnn.py
+++ /dev/null
@@ -1,126 +0,0 @@
-import warnings
-warnings.filterwarnings("ignore")
-
-import os
-import numpy as np
-import tensorflow as tf
-import tensorflow.contrib.layers as layers
-import far_ho as far
-import far_ho.examples as far_ex
-
-tf.logging.set_verbosity(tf.logging.ERROR)
-
-import matplotlib.pyplot as plt
-import blue_utils as butil
-
-#Reset
-try:
- sess.close()
-except: pass
-rnd = np.random.RandomState(1)
-tf.reset_default_graph()
-sess = tf.InteractiveSession()
-
-def get_data(data_split):
- # load a small portion of mnist data
- datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False)
- print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target)
- [print("Nb samples : ", d.num_examples) for d in datasets]
- return datasets.train, datasets.validation, datasets.test
-
-#Model
-# FC : reshape = True
-def g_logits(x,y, name='model'):
- with tf.variable_scope(name):
- h1 = layers.fully_connected(x, 300)
- logits = layers.fully_connected(h1, int(y.shape[1]))
- return logits
-
-#### Hyper-parametres ####
-n_hyper_iterations = 500
-T = 20 # Number of inner iterations
-rev_it =10
-hp_lr = 1.e-3
-##########################
-
-#MNIST
-#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
-#y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
-#logits = g_logits(x, y)
-
-#CNN : reshape = False
-x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
-y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
-
-logits = butil.cnn(x,y)
-
-train_set, validation_set, test_set = get_data(data_split=(.05, .05,))
-
-butil.viz_data(train_set)
-print('Data sampled !')
-
-# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
-#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, .1), 1.e-7))
-#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5))
-#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.))
-lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4))
-mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9))
-rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.00001), 0.00001))
-
-ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
-L = tf.reduce_mean(ce) + rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles
-E = tf.reduce_mean(ce)
-
-accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
-
-inner_optimizer = far.MomentumOptimizer(lr, mu)
-outer_optimizer = tf.train.AdamOptimizer(hp_lr)
-hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
-hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
-
-train_set_supplier = train_set.create_supplier(x, y, batch_size=256) # stochastic GD
-validation_set_supplier = validation_set.create_supplier(x, y)
-
-his_params = []
-
-tf.global_variables_initializer().run()
-
-for hyt in range(n_hyper_iterations):
- hyper_step(T,
- inner_objective_feed_dicts=train_set_supplier,
- outer_objective_feed_dicts=validation_set_supplier)
- res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()),
- E.eval(validation_set_supplier()),
- accuracy.eval(train_set_supplier()),
- accuracy.eval(validation_set_supplier())]
- his_params.append(res)
-
- print('Hyper-it :',hyt,'/',n_hyper_iterations)
- print('inner:', L.eval(train_set_supplier()))
- print('outer:', E.eval(validation_set_supplier()))
- print('training accuracy:', res[5])
- print('validation accuracy:', res[6])
- #print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
- print('-'*50)
-
-test_set_supplier = test_set.create_supplier(x, y)
-print('Test accuracy:',accuracy.eval(test_set_supplier()))
-
-fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
-ax[0].set_title('Learning rate')
-ax[0].plot([e[0] for e in his_params])
-
-ax[1].set_title('Momentum factor')
-ax[1].plot([e[1] for e in his_params])
-
-#ax[2].set_title('L2 regulariz.')
-#ax[2].plot([e[2] for e in his_params])
-ax[2].set_title('Tr. and val. acc')
-ax[2].plot([e[5] for e in his_params])
-ax[2].plot([e[6] for e in his_params])
-
-ax[3].set_title('Tr. and val. errors')
-ax[3].plot([e[3] for e in his_params])
-ax[3].plot([e[4] for e in his_params])
-
-plt.savefig('res_cnn_H{}_I{}'.format(n_hyper_iterations,T))
diff --git a/Old/FAR-HO/test_cnn_aug.py b/Old/FAR-HO/test_cnn_aug.py
deleted file mode 100755
index db48936..0000000
--- a/Old/FAR-HO/test_cnn_aug.py
+++ /dev/null
@@ -1,141 +0,0 @@
-import warnings
-warnings.filterwarnings("ignore")
-
-import os
-import numpy as np
-import tensorflow as tf
-import tensorflow.contrib.layers as layers
-import far_ho as far
-import far_ho.examples as far_ex
-
-tf.logging.set_verbosity(tf.logging.ERROR)
-
-import matplotlib.pyplot as plt
-import blue_utils as butil
-
-#Reset
-try:
- sess.close()
-except: pass
-rnd = np.random.RandomState(1)
-tf.reset_default_graph()
-sess = tf.InteractiveSession()
-
-def get_data(data_split):
- # load a small portion of mnist data
- datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False)
- print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target)
- [print("Nb samples : ", d.num_examples) for d in datasets]
- return datasets.train, datasets.validation, datasets.test
-
-#Model
-# FC : reshape = True
-def g_logits(x,y, name='model'):
- with tf.variable_scope(name):
- h1 = layers.fully_connected(x, 300)
- logits = layers.fully_connected(h1, int(y.shape[1]))
- return logits
-
-#### Hyper-parametres ####
-n_hyper_iterations = 10
-T = 10 # Number of inner iterations
-rev_it =10
-hp_lr = 0.02
-##########################
-
-#MNIST
-#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
-#y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
-#logits = g_logits(x, y)
-
-#CNN : reshape = False
-x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
-y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
-
-logits = butil.cnn(x,y)
-
-train_set, validation_set, test_set = get_data(data_split=(.1, .1,))
-
-probX = far.get_hyperparameter('probX', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9))
-probY = far.get_hyperparameter('probY', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9))
-
-#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4))
-#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9))
-
-#probX, probY = 0.5, 0.5
-#policy = [('TranslateX', probX, 8), ('TranslateY', probY, 8)]
-policy = [('TranslateX', probX, 8), ('FlipUD', probY, 8)]
-print('Hyp :',far.utils.hyperparameters(scope=None))
-
-#butil.viz_data(train_set, aug_policy= policy)
-#print('Data sampled !')
-
-#Ajout artificiel des transfo a la loss juste pour qu il soit compter dans la dynamique du graph
-probX_loss = tf.sigmoid(probX)
-probY_loss = tf.sigmoid(probY)
-
-ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
-L = tf.reduce_mean(probX_loss*probY_loss*ce)
-E = tf.reduce_mean(ce)
-
-accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
-
-inner_optimizer = far.AdamOptimizer()
-outer_optimizer = tf.train.AdamOptimizer(hp_lr)
-hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
-hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
-
-train_set_supplier = train_set.create_supplier(x, y, batch_size=256, aug_policy=policy) # stochastic GD
-validation_set_supplier = validation_set.create_supplier(x, y)
-
-#print(train_set.dim_data,validation_set.dim_data)
-
-his_params = []
-
-tf.global_variables_initializer().run()
-
-butil.viz_data(train_set, fig_name= 'Start_sample',aug_policy= policy)
-print('Data sampled !')
-
-for hyt in range(n_hyper_iterations):
- hyper_step(T,
- inner_objective_feed_dicts=train_set_supplier,
- outer_objective_feed_dicts=validation_set_supplier,
- _skip_hyper_ts=True)
- res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()),
- E.eval(validation_set_supplier()),
- accuracy.eval(train_set_supplier()),
- accuracy.eval(validation_set_supplier())]
- his_params.append(res)
-
- butil.viz_data(train_set, fig_name= 'Train_sample_{}'.format(hyt),aug_policy= policy)
- print('Data sampled !')
-
- print('Hyper-it :',hyt,'/',n_hyper_iterations)
- print('inner:', L.eval(train_set_supplier()))
- print('outer:', E.eval(validation_set_supplier()))
- print('training accuracy:', res[4])
- print('validation accuracy:', res[5])
- print('Transformation : ProbX -',res[0],'/ProbY -',res[1])
- #print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
- print('-'*50)
-
-test_set_supplier = test_set.create_supplier(x, y)
-print('Test accuracy:',accuracy.eval(test_set_supplier()))
-
-fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
-ax[0].set_title('ProbX')
-ax[0].plot([e[0] for e in his_params])
-
-ax[1].set_title('ProbY')
-ax[1].plot([e[1] for e in his_params])
-
-ax[2].set_title('Tr. and val. errors')
-ax[2].plot([e[2] for e in his_params])
-ax[2].plot([e[3] for e in his_params])
-
-ax[3].set_title('Tr. and val. acc')
-ax[3].plot([e[4] for e in his_params])
-ax[3].plot([e[5] for e in his_params])
-
-plt.savefig('res_cnn_aug_H{}_I{}'.format(n_hyper_iterations,T))
diff --git a/Old/FAR-HO/test_fc.py b/Old/FAR-HO/test_fc.py
deleted file mode 100755
index 24eb596..0000000
--- a/Old/FAR-HO/test_fc.py
+++ /dev/null
@@ -1,133 +0,0 @@
-#https://github.com/lucfra/FAR-HO/blob/master/far_ho/examples/autoMLDemos/Far-HO%20Demo%2C%20AutoML%202018%2C%20ICML%20workshop.ipynb
-import warnings
-warnings.filterwarnings("ignore")
-
-import os
-import numpy as np
-import tensorflow as tf
-import tensorflow.contrib.layers as layers
-import far_ho as far
-import far_ho.examples as far_ex
-
-tf.logging.set_verbosity(tf.logging.ERROR)
-
-import matplotlib.pyplot as plt
-#import blue_utils as butil
-
-#Reset
-try:
- sess.close()
-except: pass
-rnd = np.random.RandomState(1)
-tf.reset_default_graph()
-sess = tf.InteractiveSession()
-
-def get_data(data_split):
- # load a small portion of mnist data
- datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=True)
- print("Data shape : ", datasets.train.dim_data, " / Label shape : ", datasets.train.dim_target)
- [print("Nb samples : ", d.num_examples) for d in datasets]
- return datasets.train, datasets.validation, datasets.test
-
-#Model
-# FC : reshape = True
-def g_logits(x,y, name='model'):
- with tf.variable_scope(name):
- h1 = layers.fully_connected(x, 300)
- logits = layers.fully_connected(h1, int(y.shape[1]))
- return logits
-
-#### Hyper-parametres ####
-n_hyper_iterations = 90
-T = 20 # Number of inner iterations
-rev_it =10
-hp_lr = 0.1
-epochs =10
-batch_size = 256
-##########################
-
-#MNIST
-x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
-y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
-logits = g_logits(x, y)
-
-#CNN : reshape = False
-#x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
-#y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
-
-#logits = butil.cnn(x,y)
-
-train_set, validation_set, test_set = get_data(data_split=(.6, .3,))
-
-#butil.viz_data(train_set)
-
-# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
-lr = far.get_hyperparameter('lr', initializer=1e-2, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 1.e-7))
-mu = far.get_hyperparameter('mu', initializer=0.95, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5))
-#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.))
-
-
-ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
-L = tf.reduce_mean(ce) #+ rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles
-E = tf.reduce_mean(ce)
-
-accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
-
-inner_optimizer = far.MomentumOptimizer(lr, mu)
-#inner_optimizer = far.GradientDescentOptimizer(lr)
-outer_optimizer = tf.train.AdamOptimizer(hp_lr)
-hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
-hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)#, global_step=tf.train.get_or_create_step())
-
-train_set_supplier = train_set.create_supplier(x, y, batch_size=batch_size)#, epochs=1) # stochastic GD
-validation_set_supplier = validation_set.create_supplier(x, y)
-
-
-print('Hyper iterations par epochs',int(train_set.num_examples/batch_size*epochs/T))
-
-his_params = []
-
-tf.global_variables_initializer().run()
-
-for hyt in range(n_hyper_iterations):
- hyper_step(T,
- inner_objective_feed_dicts=train_set_supplier,
- outer_objective_feed_dicts=validation_set_supplier,
- _skip_hyper_ts=False)
- res = sess.run(far.hyperparameters()) + [0, L.eval(train_set_supplier()),
- E.eval(validation_set_supplier()),
- accuracy.eval(train_set_supplier()),
- accuracy.eval(validation_set_supplier())]
-
- his_params.append(res)
-
- print('Hyper-it :',hyt,'/',n_hyper_iterations)
- print('inner:', res[3])
- print('outer:', res[4])
- print('training accuracy:', res[5])
- print('validation accuracy:', res[6])
- #print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
- print('-'*50)
-
-test_set_supplier = test_set.create_supplier(x, y)
-print('Test accuracy:',accuracy.eval(test_set_supplier()))
-
-fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
-ax[0].set_title('Learning rate')
-ax[0].plot([e[0] for e in his_params])
-
-ax[1].set_title('Momentum factor')
-ax[1].plot([e[1] for e in his_params])
-
-#ax[2].set_title('L2 regulariz.')
-#ax[2].plot([e[2] for e in his_params])
-ax[2].set_title('Tr. and val. acc')
-ax[2].plot([e[5] for e in his_params])
-ax[2].plot([e[6] for e in his_params])
-
-ax[3].set_title('Tr. and val. errors')
-ax[3].plot([e[3] for e in his_params])
-ax[3].plot([e[4] for e in his_params])
-
-plt.savefig('resultats/res_fc_H{}_I{}'.format(n_hyper_iterations,T))
-#plt.savefig('resultats/res_fc_H{}_I{}_noHyp'.format(n_hyper_iterations,T))
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/.gitignore b/Old/Gradient-Descent-The-Ultimate-Optimizer/.gitignore
deleted file mode 100755
index 8c17325..0000000
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-venv/
-__pycache__
-data/
-log/
-.vscode/
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/20190929-paper.pdf b/Old/Gradient-Descent-The-Ultimate-Optimizer/20190929-paper.pdf
deleted file mode 100755
index 4f0b65a..0000000
Binary files a/Old/Gradient-Descent-The-Ultimate-Optimizer/20190929-paper.pdf and /dev/null differ
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/README.md b/Old/Gradient-Descent-The-Ultimate-Optimizer/README.md
deleted file mode 100755
index cfa0e6f..0000000
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/README.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Gradient Descent: The Ultimate Optimizer
-
-[](https://github.com/ambv/black)
-
-| ⚠️ WARNING: THIS IS NOT MY WORK ⚠️ |
-| --- |
-
-This repository contains the paper and code to the paper [Gradient Descent:
-The Ultimate Optimizer](https://arxiv.org/abs/1909.13371).
-
-I couldn't find the code (which is found in the appendix at the end of the
-paper) anywhere on the web. What I present here is the code of the paper with
-instructions on how to set it up.
-
-Getting the code in a runnable state required some fixes on my part so the
-code might be slightly different than that presented in the paper.
-
-## Set up
-
-```sh
-git clone https://github.com/Rainymood/Gradient-Descent-The-Ultimate-Optimizer
-cd Gradient-Descent-The-Ultimate-Optimizer
-virtualenv -p python3 venv
-source venv/bin/activate
-pip install -r requirements.txt
-python main.py
-```
-
-When you are done you can exit the virtualenv with
-
-```shell
-deactivate
-```
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/data_aug.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/data_aug.py
deleted file mode 100755
index a18ddf0..0000000
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/data_aug.py
+++ /dev/null
@@ -1,244 +0,0 @@
-from hyperopt import *
-#from hyperopt_v2 import *
-
-import torchvision.transforms.functional as TF
-import torchvision.transforms as T
-
-#from scipy import ndimage
-import kornia
-
-import random
-
-
-class MNIST_FullyConnected_Augmented(Optimizable):
- """
- A fully-connected NN for the MNIST task. This is Optimizable but not itself
- an optimizer.
- """
-
- def __init__(self, num_inp, num_hid, num_out, optimizer, device = torch.device('cuda')):
- self.device = device
- #print(self.device)
- parameters = {
- "w1": torch.zeros(num_inp, num_hid, device=self.device).t(),
- "b1": torch.zeros(num_hid, device=self.device).t(),
- "w2": torch.zeros(num_hid, num_out, device=self.device).t(),
- "b2": torch.zeros(num_out, device=self.device).t(),
-
- #Data augmentation
- "prob": torch.tensor(0.5, device=self.device),
- "mag": torch.tensor(180.0, device=self.device),
- }
- super().__init__(parameters, optimizer)
-
- def initialize(self):
- nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
- nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
- self.optimizer.initialize()
- #print(self.device)
-
- def forward(self, x):
- """Compute a prediction."""
- #print("Prob:",self.parameters["prob"].item())
- if random.random() < self.parameters["prob"]:
- #angle = 45
- #x = TF.rotate(x, angle)
- #print(self.device)
- #x = F.linear(x, torch.ones(28*28, 28*28, device=self.device).t()*self.parameters["mag"], bias=None)
- x = x + self.parameters["mag"]
-
- x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
- x = torch.tanh(x)
- x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
- x = torch.tanh(x)
- x = F.log_softmax(x, dim=1)
- return x
-
- def adjust(self):
- self.optimizer.adjust(self.parameters)
-
- def __str__(self):
- return "mnist_FC_augmented / " + str(self.optimizer)
-
-class LeNet(Optimizable, nn.Module):
- def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
- nn.Module.__init__(self)
- self.device = device
- parameters = {
- "w1": torch.zeros(20, num_inp, 5, 5, device=self.device),
- "b1": torch.zeros(20, device=self.device),
- "w2": torch.zeros(50, 20, 5, 5, device=self.device),
- "b2": torch.zeros(50, device=self.device),
- "w3": torch.zeros(500,4*4*50, device=self.device),
- "b3": torch.zeros(500, device=self.device),
- "w4": torch.zeros(10, 500, device=self.device),
- "b4": torch.zeros(10, device=self.device),
-
- #Data augmentation
- "prob": torch.tensor(1.0, device=self.device),
- "mag": torch.tensor(180.0, device=self.device),
- }
- super().__init__(parameters, optimizer)
-
- def initialize(self):
- nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
- nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
- nn.init.kaiming_uniform_(self.parameters["w3"], a=math.sqrt(5))
- nn.init.kaiming_uniform_(self.parameters["w4"], a=math.sqrt(5))
- self.optimizer.initialize()
-
- def forward(self, x):
-
- if random.random() < self.parameters["prob"]:
-
- batch_size = x.shape[0]
- # create transformation (rotation)
- alpha = self.parameters["mag"] # in degrees
- angle = torch.ones(batch_size, device=self.device) * alpha
-
- # define the rotation center
- center = torch.ones(batch_size, 2, device=self.device)
- center[..., 0] = x.shape[3] / 2 # x
- center[..., 1] = x.shape[2] / 2 # y
-
- #print(x.shape, center)
- # define the scale factor
- scale = torch.ones(batch_size, device=self.device)
-
- # compute the transformation matrix
- M = kornia.get_rotation_matrix2d(center, angle, scale)
-
- # apply the transformation to original image
- x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
-
- #print("Start Shape ", x.shape)
- out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
- #print("Shape ", out.shape)
- out = F.max_pool2d(out, 2)
- #print("Shape ", out.shape)
- out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
- #print("Shape ", out.shape)
- out = F.max_pool2d(out, 2)
- #print("Shape ", out.shape)
- out = out.view(out.size(0), -1)
- #print("Shape ", out.shape)
- out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
- #print("Shape ", out.shape)
- out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
- #print("Shape ", out.shape)
- return F.log_softmax(out, dim=1)
-
- def adjust(self):
- self.optimizer.adjust(self.parameters)
-
- def __str__(self):
- return "mnist_CNN_augmented / " + str(self.optimizer)
-
-class LeNet_v2(Optimizable, nn.Module):
- def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
-
- nn.Module.__init__(self)
- self.device = device
- self.conv1 = nn.Conv2d(num_inp, 20, 5, 1)
- self.conv2 = nn.Conv2d(20, 50, 5, 1)
- #self.fc1 = nn.Linear(4*4*50, 500)
- self.fc1 = nn.Linear(1250, 500)
- self.fc2 = nn.Linear(500, 10)
-
- #print(self.conv1.weight)
- parameters = {
- "w1": self.conv1.weight,
- "b1": self.conv1.bias,
- "w2": self.conv2.weight,
- "b2": self.conv2.bias,
- "w3": self.fc1.weight,
- "b3": self.fc1.bias,
- "w4": self.fc2.weight,
- "b4": self.fc2.bias,
-
- #Data augmentation
- "prob": torch.tensor(0.5, device=self.device),
- "mag": torch.tensor(1.0, device=self.device),
- }
- Optimizable.__init__(self, parameters, optimizer)
-
- '''
- def forward(self, x): #Sature la memoire ???
- x = F.relu(self.conv1(x))
- x = F.max_pool2d(x, 2, 2)
- x = F.relu(self.conv2(x))
- x = F.max_pool2d(x, 2, 2)
- #x = x.view(-1, 4*4*50)
- x = x.view(x.size(0), -1)
- x = F.relu(self.fc1(x))
- x = self.fc2(x)
- return F.log_softmax(x, dim=1)
- '''
- def forward(self, x):
-
- if random.random() < self.parameters["prob"].item():
- #print(self.parameters["prob"])
- #x = [T.ToTensor()(
- # TF.affine(img=T.ToPILImage()(im), angle=self.parameters["mag"], translate=(0,0), scale=1, shear=0, resample=0, fillcolor=None))
- # for im in torch.unbind(x,dim=0)]
- #x = torch.stack(x,dim=0)
-
- #x = [ndimage.rotate(im, self.parameters["mag"], reshape=False)
- # for im in torch.unbind(x,dim=0)]
- #x = torch.stack(x,dim=0)
-
- #x = [im + self.parameters["mag"]
- # for im in torch.unbind(x,dim=0)]
- #x = torch.stack(x,dim=0)
-
- batch_size = x.shape[0]
- # create transformation (rotation)
- alpha = self.parameters["mag"] * 180 # in degrees
- angle = torch.ones(batch_size, device=self.device) * alpha
-
- # define the rotation center
- center = torch.ones(batch_size, 2, device=self.device)
- center[..., 0] = x.shape[3] / 2 # x
- center[..., 1] = x.shape[2] / 2 # y
-
- #print(x.shape, center)
- # define the scale factor
- scale = torch.ones(batch_size, device=self.device)
-
- # compute the transformation matrix
- M = kornia.get_rotation_matrix2d(center, angle, scale)
-
- # apply the transformation to original image
- x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
-
- #print("Start Shape ", x.shape)
- out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
- #print("Shape ", out.shape)
- out = F.max_pool2d(out, 2)
- #print("Shape ", out.shape)
- out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
- #print("Shape ", out.shape)
- out = F.max_pool2d(out, 2)
- #print("Shape ", out.shape)
- out = out.view(out.size(0), -1)
- #print("Shape ", out.shape)
- out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
- #print("Shape ", out.shape)
- out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
- #print("Shape ", out.shape)
- return F.log_softmax(out, dim=1)
-
- def initialize(self):
- self.optimizer.initialize()
-
- def adjust(self):
- self.optimizer.adjust(self.parameters)
-
- def adjust_val(self):
- self.optimizer.adjust_val(self.parameters)
-
- def eval(self):
- self.parameters['prob']=torch.tensor(0.0, device=self.device)
-
- def __str__(self):
- return "mnist_CNN_augmented / " + str(self.optimizer)
\ No newline at end of file
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug.py
deleted file mode 100755
index 160e97c..0000000
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-from torch.utils.data import Dataset, DataLoader
-from torchvision import transforms
-import torchvision.transforms.functional as TF
-
-class MNIST_aug(Dataset):
-
- training_file = 'training.pt'
- test_file = 'test.pt'
- classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four',
- '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
-
- def __init__(self):
- self.images = [TF.to_pil_image(x) for x in torch.ByteTensor(10, 3, 48, 48)]
- self.set_stage(0) # initial stage
-
- def __getitem__(self, index):
- image = self.images[index]
-
- # Just apply your transformations here
- image = self.crop(image)
- x = TF.to_tensor(image)
- return x
-
- def set_stage(self, stage):
- if stage == 0:
- print('Using (32, 32) crops')
- self.crop = transforms.RandomCrop((32, 32))
- elif stage == 1:
- print('Using (28, 28) crops')
- self.crop = transforms.RandomCrop((28, 28))
-
- def __len__(self):
- return len(self.images)
-
-
-dataset = MyData()
-loader = DataLoader(dataset,
- batch_size=2,
- num_workers=2,
- shuffle=True)
-
-for batch_idx, data in enumerate(loader):
- print('Batch idx {}, data shape {}'.format(
- batch_idx, data.shape))
-
-loader.dataset.set_stage(1)
-
-for batch_idx, data in enumerate(loader):
- print('Batch idx {}, data shape {}'.format(
- batch_idx, data.shape))
-
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug_v2.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug_v2.py
deleted file mode 100755
index d2a992b..0000000
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug_v2.py
+++ /dev/null
@@ -1,150 +0,0 @@
-#from hyperopt import *
-from hyperopt_v2 import *
-
-import torchvision.transforms.functional as TF
-import torchvision.transforms as T
-
-#from scipy import ndimage
-import kornia
-
-import random
-
-
-class LeNet_v3(nn.Module):
- def __init__(self, num_inp, num_out):
- super(LeNet_v3, self).__init__()
- self.params = nn.ParameterDict({
- 'w1': nn.Parameter(torch.zeros(20, num_inp, 5, 5)),
- 'b1': nn.Parameter(torch.zeros(20)),
- 'w2': nn.Parameter(torch.zeros(50, 20, 5, 5)),
- 'b2': nn.Parameter(torch.zeros(50)),
- 'w3': nn.Parameter(torch.zeros(500,4*4*50)),
- 'b3': nn.Parameter(torch.zeros(500)),
- 'w4': nn.Parameter(torch.zeros(10, 500)),
- 'b4': nn.Parameter(torch.zeros(10))
- })
-
-
- def initialize(self):
- nn.init.kaiming_uniform_(self.params["w1"], a=math.sqrt(5))
- nn.init.kaiming_uniform_(self.params["w2"], a=math.sqrt(5))
- nn.init.kaiming_uniform_(self.params["w3"], a=math.sqrt(5))
- nn.init.kaiming_uniform_(self.params["w4"], a=math.sqrt(5))
-
- def forward(self, x):
- #print("Start Shape ", x.shape)
- out = F.relu(F.conv2d(input=x, weight=self.params["w1"], bias=self.params["b1"]))
- #print("Shape ", out.shape)
- out = F.max_pool2d(out, 2)
- #print("Shape ", out.shape)
- out = F.relu(F.conv2d(input=out, weight=self.params["w2"], bias=self.params["b2"]))
- #print("Shape ", out.shape)
- out = F.max_pool2d(out, 2)
- #print("Shape ", out.shape)
- out = out.view(out.size(0), -1)
- #print("Shape ", out.shape)
- out = F.relu(F.linear(out, self.params["w3"], self.params["b3"]))
- #print("Shape ", out.shape)
- out = F.linear(out, self.params["w4"], self.params["b4"])
- #print("Shape ", out.shape)
- return F.log_softmax(out, dim=1)
-
-
- def print_grad_fn(self):
- for n, p in self.params.items():
- print(n, p.grad_fn)
-
- def __str__(self):
- return "mnist_CNN_augmented / "
-
-class Data_aug(nn.Module):
- def __init__(self):
- super(Data_aug, self).__init__()
- self.data_augmentation = True
- self.params = nn.ParameterDict({
- "prob": nn.Parameter(torch.tensor(0.5)),
- "mag": nn.Parameter(torch.tensor(180.0))
- })
-
- #self.params["mag"].register_hook(print)
-
- def forward(self, x):
-
- if self.data_augmentation and self.training and random.random() < self.params["prob"]:
- #print('Aug')
- batch_size = x.shape[0]
- # create transformation (rotation)
- alpha = self.params["mag"] # in degrees
- angle = torch.ones(batch_size, device=x.device) * alpha
-
- # define the rotation center
- center = torch.ones(batch_size, 2, device=x.device)
- center[..., 0] = x.shape[3] / 2 # x
- center[..., 1] = x.shape[2] / 2 # y
-
- #print(x.shape, center)
- # define the scale factor
- scale = torch.ones(batch_size, device=x.device)
-
- # compute the transformation matrix
- M = kornia.get_rotation_matrix2d(center, angle, scale)
-
- # apply the transformation to original image
- x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
-
- return x
-
- def eval(self):
- self.params['prob']=torch.tensor(0.0, device=self.device)
- nn.Module.eval(self)
-
- def data_augmentation(self, mode=True):
- self.data_augmentation=mode
-
- def print_grad_fn(self):
- for n, p in self.params.items():
- print(n, p.grad_fn)
-
- def __str__(self):
- return "Data_Augmenter / "
-
-class Augmented_model(nn.Module):
- def __init__(self, model, data_augmenter):
- #self.model = model
- #self.data_aug = data_augmenter
- super(Augmented_model, self).__init__()#nn.Module.__init__(self)
- #super().__init__()
- self.mods = nn.ModuleDict({
- 'data_aug': data_augmenter,
- 'model': model
- })
- #for name, param in self.mods.named_parameters():
- # print(name, type(param.data), param.size())
-
- #params = self.mods.named_parameters() #self.parameters()
- #parameters = [param for param in self.model.parameters()] + [param for param in self.data_aug.parameters()]
- #Optimizable.__init__(self, params, optimizer)
-
- def initialize(self):
- self.mods['model'].initialize()
-
- def forward(self, x):
- return self.mods['model'](self.mods['data_aug'](x))
-
- #def adjust(self):
- # self.optimizer.adjust(self) #Parametres des dict
-
- def data_augmentation(self, mode=True):
- self.mods['data_aug'].data_augmentation=mode
-
- def begin(self):
- for param in self.parameters():
- param.requires_grad_() # keep gradient information…
- param.retain_grad() # even if not a leaf…
-
- def print_grad_fn(self):
- for n, m in self.mods.items():
- m.print_grad_fn()
-
- def __str__(self):
- return str(self.mods['data_aug'])+ str(self.mods['model'])# + str(self.optimizer)
\ No newline at end of file
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph b/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph
deleted file mode 100755
index 96389f9..0000000
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph
+++ /dev/null
@@ -1,5 +0,0 @@
-digraph {
- graph [size="12,12"]
- node [align=left fontsize=12 height=0.2 ranksep=0.1 shape=box style=filled]
- 94296775052080 [label=NoneType fillcolor=darkolivegreen1]
-}
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph.svg b/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph.svg
deleted file mode 100755
index a682cbc..0000000
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph.svg
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-
-
-
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt.py
deleted file mode 100755
index 1506f30..0000000
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt.py
+++ /dev/null
@@ -1,345 +0,0 @@
-import math
-import torch
-import torchvision
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-
-
-class Optimizable():#nn.Module):
- """
- This is the interface for anything that has parameters that need to be
- optimized, somewhat like torch.nn.Model but with the right plumbing for
- hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
- interface which does not give us enough control about the detachments.)
- Nominal operation of an Optimizable at the lowest level is as follows:
- o = MyOptimizable(…)
- o.initialize()
- loop {
- o.begin()
- o.zero_grad()
- loss = –compute loss function from parameters–
- loss.backward()
- o.adjust()
- }
- Optimizables recursively handle updates to their optimiz*ers*.
- """
- #def __init__(self):
- # super(Optimizable, self).__init__()
- # self.parameters = nn.Parameter(torch.zeros(()))
-
- def __init__(self, parameters, optimizer):
- #super(Optimizable, self).__init__()
- self.parameters = parameters # a dict mapping names to tensors
- self.optimizer = optimizer # which must itself be Optimizable!
- self.all_params_with_gradients = []
- #self.device = device
-
- def initialize(self):
- """Initialize parameters, e.g. with a Kaiming initializer."""
- pass
-
- def begin(self):
- """Enable gradient tracking on current parameters."""
- self.all_params_with_gradients = [] #Reintialisation pour eviter surcharge de la memoire
- for name, param in self.parameters.items():
- #for param in self.parameters:
- param.requires_grad_() # keep gradient information…
- param.retain_grad() # even if not a leaf…
- #param.to(self.device)
- #if param.device == torch.device('cuda:0'):
- # print(name, param.device)
- self.all_params_with_gradients.append(param)
- self.optimizer.begin()
-
- def zero_grad(self):
- """ Set all gradients to zero. """
- for param in self.all_params_with_gradients:
- #param = param.to(self.device)
- param.grad = torch.zeros(param.shape, device=param.device)
- self.optimizer.zero_grad()
-
- """ Note: at this point you would probably call .backwards() on the loss
- function. """
-
- def adjust(self):
- """ Update parameters """
- pass
-
-
- def print_grad_fn(self):
- self.optimizer.print_grad_fn()
- for n, p in self.parameters.items():
- print(n," - ", p.grad_fn)
-
- def param_grad(self):
- return self.all_params_with_gradients
-
- def param(self, param_name):
- return self.parameters[param_name].item()
-
-
-class MNIST_FullyConnected(Optimizable):
- """
- A fully-connected NN for the MNIST task. This is Optimizable but not itself
- an optimizer.
- """
-
- def __init__(self, num_inp, num_hid, num_out, optimizer):
- parameters = {
- "w1": torch.zeros(num_inp, num_hid).t(),
- "b1": torch.zeros(num_hid).t(),
- "w2": torch.zeros(num_hid, num_out).t(),
- "b2": torch.zeros(num_out).t(),
- }
- super().__init__(parameters, optimizer)
-
- def initialize(self):
- nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
- nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
- self.optimizer.initialize()
-
- def forward(self, x):
- """Compute a prediction."""
- x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
- x = torch.tanh(x)
- x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
- x = torch.tanh(x)
- x = F.log_softmax(x, dim=1)
- return x
-
- def adjust(self):
- self.optimizer.adjust(self.parameters)
-
- def __str__(self):
- return "mnist / " + str(self.optimizer)
-
-
-class NoOpOptimizer(Optimizable):#, nn.Module):
- """
- NoOpOptimizer sits on top of a stack, and does not affect what lies below.
- """
-
- def __init__(self):
- #super(Optimizable, self).__init__()
- pass
-
- def initialize(self):
- pass
-
- def begin(self):
- pass
-
- def zero_grad(self):
- pass
-
- def adjust(self, params):
- pass
-
- def adjust_val(self, params):
- pass
-
- def print_grad_fn(self):
- pass
-
- def __str__(self):
- return "static"
-
-class Adam(Optimizable):
- """
- A fully hyperoptimizable Adam optimizer
- """
-
- def clamp(x):
- return (x.tanh() + 1.0) / 2.0
-
- def unclamp(y):
- z = y * 2.0 - 1.0
- return ((1.0 + z) / (1.0 - z)).log() / 2.0
-
- def __init__(
- self,
- alpha=0.001,
- beta1=0.9,
- beta2=0.999,
- log_eps=-8.0,
- optimizer=NoOpOptimizer(),
- device = torch.device('cuda')
- ):
- self.device = device
- parameters = {
- "alpha": torch.tensor(alpha, device=self.device),
- "beta1": Adam.unclamp(torch.tensor(beta1, device=self.device)),
- "beta2": Adam.unclamp(torch.tensor(beta2, device=self.device)),
- "log_eps": torch.tensor(log_eps, device=self.device),
- }
- super().__init__(parameters, optimizer)
- self.num_adjustments = 0
- self.num_adjustments_val = 0
- self.cache = {}
-
- for name, param in parameters.items():
- param.requires_grad_() # keep gradient information…
- param.retain_grad() # even if not a leaf…
- #param.to(self.device)
- #if param.device == torch.device('cuda:0'):
- # print(name, param.device)
-
- def adjust(self, params): #Update param d'apprentissage
- self.num_adjustments += 1
- self.optimizer.adjust(self.parameters)
- #print('Adam update')
- t = self.num_adjustments
- beta1 = Adam.clamp(self.parameters["beta1"])
- beta2 = Adam.clamp(self.parameters["beta2"])
- for name, param in params.items():
- if name == "mag": continue
- if name not in self.cache:
- self.cache[name] = {
- "m": torch.zeros(param.shape, device=self.device),
- "v": torch.zeros(param.shape, device=self.device)
- + 10.0 ** self.parameters["log_eps"].data
- # NOTE that we add a little ‘fudge factor' here because sqrt is not
- # differentiable at exactly zero
- }
- #print(name, param.device)
- g = param.grad.detach()
- self.cache[name]["m"] = m = (
- beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
- )
- self.cache[name]["v"] = v = (
- beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
- )
- self.all_params_with_gradients.append(m)
- self.all_params_with_gradients.append(v)
- m_hat = m / (1.0 - beta1 ** float(t))
- v_hat = v / (1.0 - beta2 ** float(t))
- dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
- params[name] = param.detach() - self.parameters["alpha"] * dparam
- #print(name)
-
- def adjust_val(self, params): #Update param Transformations
- self.num_adjustments_val += 1
- self.optimizer.adjust_val(self.parameters)
- #print('Adam update')
- t = self.num_adjustments_val
- beta1 = Adam.clamp(self.parameters["beta1"])
- beta2 = Adam.clamp(self.parameters["beta2"])
- for name, param in params.items():
- if name != "mag": continue
- if name not in self.cache:
- self.cache[name] = {
- "m": torch.zeros(param.shape, device=self.device),
- "v": torch.zeros(param.shape, device=self.device)
- + 10.0 ** self.parameters["log_eps"].data
- # NOTE that we add a little ‘fudge factor' here because sqrt is not
- # differentiable at exactly zero
- }
- #print(name, param.device)
- g = param.grad.detach()
- self.cache[name]["m"] = m = (
- beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
- )
- self.cache[name]["v"] = v = (
- beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
- )
- self.all_params_with_gradients.append(m)
- self.all_params_with_gradients.append(v)
- m_hat = m / (1.0 - beta1 ** float(t))
- v_hat = v / (1.0 - beta2 ** float(t))
- dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
- params[name] = param.detach() - self.parameters["alpha"] * dparam
- #print(name)
-
- def __str__(self):
- return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
-'''
-class SGD(Optimizable):
- """
- A hyperoptimizable SGD
- """
-
- def __init__(self, alpha=0.01, optimizer=NoOpOptimizer()):
- parameters = {"alpha": torch.tensor(alpha)}
- super().__init__(parameters, optimizer)
-
- def adjust(self, params):
- self.optimizer.adjust(self.parameters)
- for name, param in params.items():
- g = param.grad.detach()
- params[name] = param.detach() - g * self.parameters["alpha"]
-
- def __str__(self):
- return "sgd(%f) / " % self.parameters["alpha"] + str(self.optimizer)
-
-class SGDPerParam(Optimizable):
- """
- Like above, but can be taught a separate step size for each parameter it
- tunes.
- """
-
- def __init__(self, alpha=0.01, params=[], optimizer=NoOpOptimizer()):
- parameters = {name + "_alpha": torch.tensor(alpha) for name in params}
- super().__init__(parameters, optimizer)
-
- def adjust(self, params):
- self.optimizer.adjust(self.parameters)
- for name, param in params.items():
- g = param.grad.detach()
- params[name] = param.detach() - g * self.parameters[name + "_alpha"]
-
- def __str__(self):
- return "sgd(%s) / " % str(
- {k: t.item() for k, t in self.parameters.items()}
- ) + str(self.optimizer)
-'''
-'''
-class AdamBaydin(Optimizable):
- """ Same as above, but only optimizes the learning rate, treating the
- remaining hyperparameters as constants. """
-
- def __init__(
- self,
- alpha=0.001,
- beta1=0.9,
- beta2=0.999,
- log_eps=-8.0,
- optimizer=NoOpOptimizer(),
- ):
- parameters = {"alpha": torch.tensor(alpha)}
- self.beta1 = beta1
- self.beta2 = beta2
- self.log_eps = log_eps
- super().__init__(parameters, optimizer)
- self.num_adjustments = 0
- self.cache = {}
-
- def adjust(self, params):
- self.num_adjustments += 1
- self.optimizer.adjust(self.parameters)
- t = self.num_adjustments
- beta1 = self.beta1
- beta2 = self.beta2
- for name, param in params.items():
- if name not in self.cache:
- self.cache[name] = {
- "m": torch.zeros(param.shape),
- "v": torch.zeros(param.shape) + 10.0 ** self.log_eps,
- }
- g = param.grad.detach()
- self.cache[name]["m"] = m = (
- beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
- )
- self.cache[name]["v"] = v = (
- beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
- )
- self.all_params_with_gradients.append(m)
- self.all_params_with_gradients.append(v)
- m_hat = m / (1.0 - beta1 ** float(t))
- v_hat = v / (1.0 - beta2 ** float(t))
- dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.log_eps)
- params[name] = param.detach() - self.parameters["alpha"] * dparam
-
- def __str__(self):
- return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
-'''
\ No newline at end of file
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt_v2.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt_v2.py
deleted file mode 100755
index c100085..0000000
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt_v2.py
+++ /dev/null
@@ -1,296 +0,0 @@
-import math
-import torch
-import torchvision
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.optim.optimizer import Optimizer
-
-class Optimizable():
- """
- This is the interface for anything that has parameters that need to be
- optimized, somewhat like torch.nn.Model but with the right plumbing for
- hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
- interface which does not give us enough control about the detachments.)
- Nominal operation of an Optimizable at the lowest level is as follows:
- o = MyOptimizable(…)
- o.initialize()
- loop {
- o.begin()
- o.zero_grad()
- loss = –compute loss function from parameters–
- loss.backward()
- o.adjust()
- }
- Optimizables recursively handle updates to their optimiz*ers*.
- """
- #def __init__(self):
- # super(Optimizable, self).__init__()
- # self.parameters = nn.Parameter(torch.zeros(()))
-
- def __init__(self, parameters, optimizer):
- self.params = parameters # a dict mapping names to tensors
- self.optimizer = optimizer # which must itself be Optimizable!
- self.all_params_with_gradients = []
- #self.device = device
-
- def initialize(self):
- """Initialize parameters, e.g. with a Kaiming initializer."""
- pass
-
- def begin(self):
- """Enable gradient tracking on current parameters."""
- self.all_params_with_gradients = nn.ParameterList() #Reintialisation pour eviter surcharge de la memoire
- print("Opti param :", type(self.params))
- #for name, param in self.params:
- if isinstance(self.params,dict): #Dict
- for name, param in self.params:
- param.requires_grad_() # keep gradient information…
- param.retain_grad() # even if not a leaf…
- self.all_params_with_gradients.append(param)
- if isinstance(self.params,list): #List
- for param in self.params:
- param.requires_grad_() # keep gradient information…
- param.retain_grad() # even if not a leaf…
- self.all_params_with_gradients.append(param)
- self.optimizer.begin()
-
- def zero_grad(self):
- """ Set all gradients to zero. """
- for param in self.all_params_with_gradients:
- param.grad = torch.zeros(param.shape, device=param.device)
- self.optimizer.zero_grad()
-
- """ Note: at this point you would probably call .backwards() on the loss
- function. """
-
- def adjust(self):
- """ Update parameters """
- pass
-
-
-class NoOpOptimizer(Optimizable):#, nn.Module):
- """
- NoOpOptimizer sits on top of a stack, and does not affect what lies below.
- """
-
- def __init__(self):
- #super(Optimizable, self).__init__()
- pass
-
- def initialize(self):
- pass
-
- def begin(self):
- #print("NoOpt begin")
- pass
-
- def zero_grad(self):
- pass
-
- def adjust(self, params):
- pass
-
- def step(self):
- pass
-
- def print_grad_fn(self):
- pass
-
- def __str__(self):
- return "static"
-
-
-class SGD(Optimizer, nn.Module): #Eviter Optimizer
- """
- A hyperoptimizable SGD
- """
-
- def __init__(self, params, lr=0.01, height=0):
- self.height=height
- #params : a optimiser
- #reste (defaults) param de l'opti
- print('SGD - H', height)
- nn.Module.__init__(self)
-
- optim_keys = ('lr','') #A mettre dans Optimizable ? #'' pour eviter iteration dans la chaine de charactere...
- '''
- self_params = {"lr": torch.tensor(lr),
- "momentum": 0,
- "dampening":0,
- "weight_decay":0,
- "nesterov": False}
- '''
- #self_params = dict(lr=torch.tensor(lr),
- # momentum=0, dampening=0, weight_decay=0, nesterov=False)
-
- self_params = nn.ParameterDict({
- "lr": nn.Parameter(torch.tensor(lr)),
- "momentum": nn.Parameter(torch.tensor(0.0)),
- "dampening": nn.Parameter(torch.tensor(0.0)),
- "weight_decay": nn.Parameter(torch.tensor(0.0)),
- })
-
- for k in self_params.keys() & optim_keys:
- self_params[k].requires_grad_() # keep gradient information…
- self_params[k].retain_grad() # even if not a leaf…
- #self_params[k].register_hook(print)
-
- if height==0:
- optimizer = NoOpOptimizer()
- else:
- #def dict_generator(): yield {k: self_params[k] for k in self_params.keys() & optim_keys}
- #(dict for dict in {k: self_params[k] for k in self_params.keys() & optim_keys}) #Devrait mar
- optimizer = SGD(params=(self_params[k]for k in self_params.keys() & optim_keys), lr=lr, height=height-1)
- #optimizer.register_backward_hook(print)
-
- self.optimizer = optimizer
- #if(height==0):
- # for n,p in params.items():
- # print(n,p)
-
- #Optimizable.__init__(self, self_params, optimizer)
-
- #print(type(params))
- #for p in params:
- # print(type(p))
- Optimizer.__init__(self, params, self_params)
-
- for group in self.param_groups:
- for p in group['params']:
- print(type(p.data), p.size())
- print('End SGD-H', height)
-
- def begin(self):
- for group in self.param_groups:
- for p in group['params']:
- #print(type(p.data), p.size())
- p.requires_grad_() # keep gradient information…
- p.retain_grad() # even if not a leaf…
- #p.register_hook(lambda x: print(self.height, x.grad_fn))
-
- self.optimizer.begin()
-
- def print_grad_fn(self):
- self.optimizer.print_grad_fn()
- for group in self.param_groups:
- for i, p in enumerate(group['params']):
- print(self.height," - ", i, p.grad_fn)
-
- #def adjust(self, params):
- # self.optimizer.adjust(self.params)
- # for name, param in params.items():
- # g = param.grad.detach()
- # params[name] = param.detach() - g * self.params["lr"]
-
- def step(self):
- """Performs a single optimization step.
-
- Arguments:
- closure (callable, optional): A closure that reevaluates the model
- and returns the loss.
- """
- print('SGD start')
- self.optimizer.step()
-
- for group in self.param_groups:
- for i, p in enumerate(group['params']):
- if p.grad is None:
- continue
- #d_p = p.grad.data
- d_p = p.grad.detach()
-
- #print(group['lr'])
- p.data.add_(-group['lr'].item(), d_p)
- #group['params'][i] = p.detach() - d_p * group['lr']
- p.data-= group['lr']*d_p #Data ne pas utiliser perte info
-
- for p in group['params']:
- if p.grad is None:
- print(p, p.grad)
- continue
-
- print("SGD end")
- #return loss
-
- def __str__(self):
- return "sgd(%f) / " % self.params["lr"] + str(self.optimizer)
-
-
-class Adam(Optimizable, nn.Module):
- """
- A fully hyperoptimizable Adam optimizer
- """
-
- def clamp(x):
- return (x.tanh() + 1.0) / 2.0
-
- def unclamp(y):
- z = y * 2.0 - 1.0
- return ((1.0 + z) / (1.0 - z)).log() / 2.0
-
- def __init__(
- self,
- alpha=0.001,
- beta1=0.9,
- beta2=0.999,
- log_eps=-8.0,
- optimizer=NoOpOptimizer(),
- device = torch.device('cuda')
- ):
- #super(Adam, self).__init__()
- nn.Module.__init__(self)
- self.device = device
- params = nn.ParameterDict({
- "alpha": nn.Parameter(torch.tensor(alpha, device=self.device)),
- "beta1": nn.Parameter(Adam.unclamp(torch.tensor(beta1, device=self.device))),
- "beta2": nn.Parameter(Adam.unclamp(torch.tensor(beta2, device=self.device))),
- "log_eps": nn.Parameter(torch.tensor(log_eps, device=self.device)),
- })
- Optimizable.__init__(self, params, optimizer)
- self.num_adjustments = 0
- self.cache = {}
-
- for name, param in params.items():
- param.requires_grad_() # keep gradient information…
- param.retain_grad() # even if not a leaf…
-
- def adjust(self, params, pytorch_mod=False):
- self.num_adjustments += 1
- self.optimizer.adjust(self.params)
- t = self.num_adjustments
- beta1 = Adam.clamp(self.params["beta1"])
- beta2 = Adam.clamp(self.params["beta2"])
-
- updated_param = []
- if pytorch_mod:
- params = params.named_parameters(prefix='') #Changer nom d'input...
-
- for name, param in params:
- if name not in self.cache:
- self.cache[name] = {
- "m": torch.zeros(param.shape, device=self.device),
- "v": torch.zeros(param.shape, device=self.device)
- + 10.0 ** self.params["log_eps"].data
- # NOTE that we add a little ‘fudge factor' here because sqrt is not
- # differentiable at exactly zero
- }
- #print(name, param.device)
- g = param.grad.detach()
- self.cache[name]["m"] = m = (
- beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
- )
- self.cache[name]["v"] = v = (
- beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
- )
- self.all_params_with_gradients.append(nn.Parameter(m)) #Risque de surcharger la memoire => Dict mieux ?
- self.all_params_with_gradients.append(nn.Parameter(v))
- m_hat = m / (1.0 - beta1 ** float(t))
- v_hat = v / (1.0 - beta2 ** float(t))
- dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.params["log_eps"])
- updated_param[name] = param.detach() - self.params["alpha"] * dparam
-
- if pytorch_mod: params.update(updated_param) #Changer nom d'input...
- else: params = updated_param
-
- def __str__(self):
- return "adam(" + str(self.params) + ") / " + str(self.optimizer)
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/main.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/main.py
deleted file mode 100755
index 6ed0f6f..0000000
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/main.py
+++ /dev/null
@@ -1,182 +0,0 @@
-import numpy as np
-import json, math, time, os
-from hyperopt import *
-import gc
-
-BATCH_SIZE = 300
-
-mnist_train = torchvision.datasets.MNIST(
- "./data", train=True, download=True, transform=torchvision.transforms.ToTensor()
-)
-
-mnist_test = torchvision.datasets.MNIST(
- "./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
-)
-
-dl_train = torch.utils.data.DataLoader(
- mnist_train, batch_size=BATCH_SIZE, shuffle=False
-)
-dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=10000, shuffle=False)
-
-
-def test(model):
- for i, (features_, labels_) in enumerate(dl_test):
- features, labels = torch.reshape(features_, (10000, 28 * 28)), labels_
- pred = model.forward(features)
- return pred.argmax(dim=1).eq(labels).sum().item() / 10000 * 100
-
-
-def train(model, epochs=3, height=1):
- stats = []
- for epoch in range(epochs):
- for i, (features_, labels_) in enumerate(dl_train):
- t0 = time.process_time()
- model.begin()
- features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
- pred = model.forward(
- features
- ) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
- loss = F.nll_loss(pred, labels)
- model.zero_grad()
- loss.backward(create_graph=True)
- model.adjust()
- tf = time.process_time()
- data = {
- "time": tf - t0,
- "iter": epoch * len(dl_train) + i,
- "loss": loss.item(),
- "params": {
- k: v.item()
- for k, v in model.optimizer.parameters.items()
- if "." not in k
- },
- }
- stats.append(data)
- return stats
-
-
-def run(opt, name="out", usr={}, epochs=3, height=1):
- torch.manual_seed(0x42)
- model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
- print("Running...", str(model))
- model.initialize()
- log = train(model, epochs, height)
- acc = test(model)
- out = {"acc": acc, "log": log, "usr": usr}
- with open("log/%s.json" % name, "w+") as f:
- json.dump(out, f, indent=True)
- times = [x["time"] for x in log]
- print("Times (ms):", np.mean(times), "+/-", np.std(times))
- print("Final accuracy:", acc)
- return out
-
-
-def sgd_experiments():
- run(SGD(0.01), "sgd", epochs=1)
- out = run(SGD(0.01, optimizer=SGD(0.01)), "sgd+sgd", epochs=1)
- alpha = out["log"][-1]["params"]["alpha"]
- print(alpha)
- run(SGD(alpha), "sgd-final", epochs=1)
-
-
-def adam_experiments():
- run(Adam(), "adam", epochs=1)
- print()
- mo = SGDPerParam(
- 0.001, ["alpha", "beta1", "beta2", "log_eps"], optimizer=SGD(0.0001)
- )
- out = run(Adam(optimizer=mo), "adam+sgd", epochs=1)
- p = out["log"][-1]["params"]
- alpha = p["alpha"]
- beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
- beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
- log_eps = p["log_eps"]
- print(alpha, beta1, beta2, log_eps)
- print(mo)
- run(
- Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
- "adam+sgd-final",
- epochs=1,
- )
- print()
- out = run(Adam(optimizer=Adam()), "adam2", epochs=1)
- p = out["log"][-1]["params"]
- alpha = p["alpha"]
- beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
- beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
- log_eps = p["log_eps"]
- print(alpha, beta1, beta2, log_eps)
- run(
- Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
- "adam2-final",
- epochs=1,
- )
- print()
- mo = SGDPerParam(0.001, ["alpha"], optimizer=SGD(0.0001))
- out = run(AdamBaydin(optimizer=mo), "adambaydin+sgd", epochs=1)
- p = out["log"][-1]["params"]
- alpha = p["alpha"]
- print(alpha)
- print(mo)
- run(Adam(alpha=p["alpha"]), "adambaydin+sgd-final", epochs=1)
- print()
- out = run(AdamBaydin(optimizer=Adam()), "adambaydin2", epochs=1)
- p = out["log"][-1]["params"]
- alpha = p["alpha"]
- print(alpha)
- run(Adam(alpha=p["alpha"]), "adambaydin2-final", epochs=1)
-
-
-def surface():
- run(SGD(10 ** -3, optimizer=SGD(10 ** -1)), "tst", epochs=1)
- for log_alpha in np.linspace(-3, 2, 10):
- run(SGD(10 ** log_alpha), "sgd@1e%+.2f" % log_alpha, epochs=1)
-
-
-def make_sgd_stack(height, top):
- if height == 0:
- return SGD(alpha=top)
- return SGD(alpha=top, optimizer=make_sgd_stack(height - 1, top))
-
-
-def make_adam_stack(height, top=0.0000001):
- if height == 0:
- return Adam(alpha=top)
- return Adam(alpha=top, optimizer=make_adam_stack(height - 1))
-
-
-def stack_test():
- for top in np.linspace(-7, 3, 20):
- for height in range(6):
- print("height =", height, "to p=", top)
- opt = make_sgd_stack(height, 10 ** top)
- run(
- opt,
- "metasgd3-%d@%+.2f" % (height, top),
- {"height": height, "top": top},
- epochs=1,
- height=height,
- )
- gc.collect()
-
-
-def perf_test():
- for h in range(51):
- print("height:", h)
- # opt = make_sgd_stack(h, 0.01)
- opt = make_adam_stack(h)
- run(opt, "adamperf-%d" % h, {"height": h}, epochs=1)
- gc.collect()
-
-
-if __name__ == "__main__":
- try:
- os.mkdir("log")
- except:
- print("log/ exists already")
-
- surface()
- sgd_experiments()
- adam_experiments()
- stack_test()
- perf_test()
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/requirements.txt b/Old/Gradient-Descent-The-Ultimate-Optimizer/requirements.txt
deleted file mode 100755
index 5aae77b..0000000
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-numpy==1.17.2
-Pillow==6.2.0
-six==1.12.0
-torch==1.2.0
-torchvision==0.4.0
diff --git a/Old/Gradient-Descent-The-Ultimate-Optimizer/tests.py b/Old/Gradient-Descent-The-Ultimate-Optimizer/tests.py
deleted file mode 100755
index 936894f..0000000
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/tests.py
+++ /dev/null
@@ -1,344 +0,0 @@
-import numpy as np
-import json, math, time, os
-from data_aug import *
-#from data_aug_v2 import *
-import gc
-
-import matplotlib.pyplot as plt
-from torchviz import make_dot, make_dot_from_trace
-
-from torch.utils.data import SubsetRandomSampler
-
-BATCH_SIZE = 300
-#TEST_SIZE = 10000
-TEST_SIZE = 300
-DATA_LIMIT = 10
-
-'''
-data_train = torchvision.datasets.MNIST(
- "./data", train=True, download=True,
- transform=torchvision.transforms.Compose([
- #torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
- torchvision.transforms.ToTensor()
- ])
-)
-data_test = torchvision.datasets.MNIST(
- "./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
-)
-
-'''
-data_train = torchvision.datasets.CIFAR10(
- "./data", train=True, download=True,
- transform=torchvision.transforms.Compose([
- #torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
- torchvision.transforms.ToTensor()
- ])
-)
-
-data_test = torchvision.datasets.CIFAR10(
- "./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
-)
-
-train_subset_indices=range(int(len(data_train)/2))
-val_subset_indices=range(int(len(data_train)/2),len(data_train))
-
-dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
-dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
-dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False)
-
-def test(model, reshape_in=True, device = torch.device('cuda')):
- for i, (features_, labels_) in enumerate(dl_test):
- if reshape_in :
- features, labels = torch.reshape(features_, (TEST_SIZE, 28 * 28)), labels_
- else:
- features, labels =features_, labels_
-
- features, labels = features.to(device), labels.to(device)
-
- pred = model.forward(features)
- return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
-
-def train_one_epoch(model, optimizer, epoch=0, reshape_in=True, device = torch.device('cuda'), train_data=True):
- if train_data: dl = dl_train
- else: dl = dl_val
- for i, (features_, labels_) in enumerate(dl):
- if i > DATA_LIMIT : break
- #t0 = time.process_time()
-
- if reshape_in :
- features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
- else:
- features, labels =features_, labels_
-
- features, labels = features.to(device), labels.to(device)
-
- #optimizer.begin()
- #optimizer.zero_grad()
- model.begin()
- model.zero_grad()
- pred = model.forward(features)
-
- #loss = F.nll_loss(pred, labels)
- loss = F.cross_entropy(pred,labels)
-
- #model.print_grad_fn()
- #optimizer.print_grad_fn()
- #print('-'*50)
-
- loss.backward(create_graph=True)
-
- #optimizer.step()
- if train_data: model.adjust()
- else: model.adjust_val()
-
- #tf = time.process_time()
- #data = {
- # "time": tf - t0,
- # "iter": epoch * len(dl_train) + i,
- # "loss": loss.item(),
- # "params": {
- # k: v.item()
- # for k, v in model.optimizer.parameters.items()
- # if "." not in k
- # },
- #}
- #stats.append(data)
-
- #print_torch_mem(i)
- return loss.item()
-
-def train_v2(model, optimizer, epochs=3, reshape_in=True, device = torch.device('cuda')):
- log = []
- for epoch in range(epochs):
-
- #dl_train.dataset.transform=torchvision.transforms.Compose([
- # torchvision.transforms.RandomAffine(degrees=model.param('mag'), translate=None, scale=None, shear=None, resample=False, fillcolor=0),
- # torchvision.transforms.ToTensor()
- #])
- viz_data(fig_name='res/data_sample')
- t0 = time.process_time()
- loss = train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device)
- train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device,train_data=False)
-
- #acc = test(model=model, reshape_in=reshape_in, device=device)
- acc = 0
-
-
- tf = time.process_time()
- data = {
- "time": tf - t0,
- "epoch": epoch,
- "loss": loss,
- "acc": acc,
- "params": {
- k: v.item()
- for k, v in model.optimizer.parameters.items()
- #for k, v in model.mods.data_aug.params.named_parameters()
- if "." not in k
-
- },
- }
- log.append(data)
-
-
- print("Epoch :",epoch+1, "/",epochs, "- Loss :",log[-1]["loss"])
- param = [p for p in model.param_grad() if p.grad is not None]
- if(len(param)!=0):
- print(param[-2],' / ', param[-2].grad)
- print(param[-1],' / ', param[-1].grad)
- return log
-
-def train(model, epochs=3, height=1, reshape_in=True, device = torch.device('cuda')):
- stats = []
- for epoch in range(epochs):
- for i, (features_, labels_) in enumerate(dl_train):
- t0 = time.process_time()
- model.begin()
- if reshape_in :
- features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
- else:
- features, labels =features_, labels_
-
- features, labels = features.to(device), labels.to(device)
-
- pred = model.forward(
- features
- ) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
- #loss = F.nll_loss(pred, labels)
- loss = F.cross_entropy(pred,labels)
-
- #print('-'*50)
- #param = [p for p in model.param_grad() if p.grad is not None]
- #if(len(param)!=0):
- # print(param[-2],' / ', param[-2].grad)
- # print(param[-1],' / ', param[-1].grad)
-
- model.zero_grad()
- loss.backward(create_graph=True)
- model.adjust()
- tf = time.process_time()
- data = {
- "time": tf - t0,
- "iter": epoch * len(dl_train) + i,
- "loss": loss.item(),
- "params": {
- k: v.item()
- for k, v in model.optimizer.parameters.items()
- if "." not in k
- },
- }
- stats.append(data)
-
- print('-'*50)
- i=0
- for obj in gc.get_objects():
- try:
- if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)) and len(obj.size())>1:
- print(i, type(obj), obj.size())
- i+=1
- except:
- pass
- print("Epoch :",epoch+1, "/",epochs, "- Loss :",stats[-1]["loss"])
- param = [p for p in model.param_grad() if p.grad is not None]
- if(len(param)!=0):
- print(param[-2],' / ', param[-2].grad)
- print(param[-1],' / ', param[-1].grad)
- return stats
-
-def run(opt, name="out", usr={}, epochs=10, height=1, cnn=True, device = torch.device('cuda')):
- torch.manual_seed(0x42)
- if not cnn:
- reshape_in = True
- #model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
- model = MNIST_FullyConnected_Augmented(28 * 28, 128, 10, opt, device=device)
-
- else:
- reshape_in = False
- #model = LeNet(1, 10,opt, device)
- #model = LeNet_v2(1, 10,opt, device).to(device=device)
- model = LeNet_v2(3, 10,opt, device).to(device=device)
- optimizer=None
- '''
- m = LeNet_v3(1, 10)
- a = Data_aug()
- model = Augmented_model(model=m,
- data_augmenter=a,
- optimizer=opt).to(device) #deux fois le meme optimizer ?...
- '''
- '''
- m = LeNet_v3(1, 10)
- a = Data_aug()
- model = Augmented_model(model=m, data_augmenter=a).to(device)
- #optimizer = SGD(model.parameters())
- optimizer = SGD(model.parameters(), lr=0.01, height=1)
- '''
-
-
- #for idx, m in enumerate(model.modules()):
- # print(idx, '->', m)
- print("Running...", str(model))
- model.initialize()
- #print_model(model)
- #model.data_augmentation(False)
- #model.eval()
-
- log = train_v2(model=model, optimizer=optimizer, epochs=epochs, reshape_in=reshape_in, device=device)
- model.eval()
- acc = test(model, reshape_in, device=device)
-
-
- #param = [p for p in model.param_grad() if p.grad is not None]
- #if(len(param)!=0):
- # print(param[-2],' / ', param[-2].grad)
- # print(param[-1],' / ', param[-1].grad)
-
- out = {"acc": acc, "log": log, "usr": usr}
- with open("log/%s.json" % name, "w+") as f:
- json.dump(out, f, indent=True)
- times = [x["time"] for x in log]
- print("Times (ms):", np.mean(times), "+/-", np.std(times))
- print("Final accuracy:", acc)
-
- #plot_res(log, fig_name='res/'+name)
-
- return out
-
-def make_adam_stack(height, top=0.0000001, device = torch.device('cuda')):
- #print(height,device)
- if height == 0:
- return Adam(alpha=top, device=device)
- return Adam(alpha=top, optimizer=make_adam_stack(height - 1, top, device=device), device=device)
-
-def plot_res(log, fig_name='res'):
-
- fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
- ax[0].set_title('Loss')
- ax[0].plot([x["loss"] for x in log])
-
- ax[1].set_title('Acc')
- ax[1].plot([x["acc"] for x in log])
-
- ax[2].set_title('mag')
- ax[2].plot([x["data_aug"] for x in log])
-
- plt.savefig(fig_name)
-
-def print_torch_mem(add_info=''):
-
- nb=0
- max_size=0
- for obj in gc.get_objects():
- try:
- if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
- #print(i, type(obj), obj.size())
- size = np.sum(obj.size())
- if(size>max_size): max_size=size
- nb+=1
- except:
- pass
- print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
-
-def print_model(model, fig_name='graph/graph'): #Semble ne pas marcher pour les models en fonctionnel
- x = torch.randn(1,1,28,28, device=device)
- dot=make_dot(model(x), params=dict(model.named_parameters()))
- dot.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
- dot.render(fig_name)
- print("Model graph generated !")
-
-def viz_data(fig_name='data_sample'):
-
- features_, labels_ = next(iter(dl_train))
- plt.figure(figsize=(10,10))
- #for i, (features_, labels_) in enumerate(dl_train):
- for i in range(25):
- if i==25: break
- #print(features_.size(), labels_.size())
-
- plt.subplot(5,5,i+1)
- plt.xticks([])
- plt.yticks([])
- plt.grid(False)
-
- img = features_[i,0,:,:]
-
- #print('im shape',img.shape)
- plt.imshow(img, cmap=plt.cm.binary)
- plt.xlabel(labels_[i].item())
-
- plt.savefig(fig_name)
-
-##########################################
-if __name__ == "__main__":
- try:
- os.mkdir("log")
- except:
- print("log/ exists already")
-
- device = torch.device('cuda')
-
- run(make_adam_stack(height=1, top=0.001, device=device),
- "Augmented_MNIST",
- epochs=100,
- cnn=True,
- device = device)
- print()
\ No newline at end of file
diff --git a/Old/PBA/LeNet.py b/Old/PBA/LeNet.py
deleted file mode 100755
index 7a411b6..0000000
--- a/Old/PBA/LeNet.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import numpy as np
-import tensorflow as tf
-
-## build the neural network class
-# weight initialization
-def weight_variable(shape, name = None):
- initial = tf.truncated_normal(shape, stddev=0.1)
- return tf.Variable(initial, name = name)
-
-# bias initialization
-def bias_variable(shape, name = None):
- initial = tf.constant(0.1, shape=shape) # positive bias
- return tf.Variable(initial, name = name)
-
-# 2D convolution
-def conv2d(x, W, name = None):
- return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID', name = name)
-
-# max pooling
-def max_pool_2x2(x, name = None):
- return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
- padding='SAME', name = name)
-
-def LeNet(images, num_classes):
- # tunable hyperparameters for nn architecture
- s_f_conv1 = 5; # filter size of first convolution layer (default = 3)
- n_f_conv1 = 20; # number of features of first convolution layer (default = 36)
- s_f_conv2 = 5; # filter size of second convolution layer (default = 3)
- n_f_conv2 = 50; # number of features of second convolution layer (default = 36)
- n_n_fc1 = 500; # number of neurons of first fully connected layer (default = 576)
- n_n_fc2 = 500; # number of neurons of first fully connected layer (default = 576)
-
- #print(images.shape)
- # 1.layer: convolution + max pooling
- W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, int(images.shape[3]), n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
- b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
- h_conv1_tf = tf.nn.relu(conv2d(images, W_conv1_tf) + b_conv1_tf, name = 'h_conv1_tf') # (.,28,28,32)
- h_pool1_tf = max_pool_2x2(h_conv1_tf, name = 'h_pool1_tf') # (.,14,14,32)
- #print(h_conv1_tf.shape)
- #print(h_pool1_tf.shape)
- # 2.layer: convolution + max pooling
- W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2, n_f_conv1, n_f_conv2], name = 'W_conv2_tf')
- b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
- h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf, W_conv2_tf) + b_conv2_tf, name ='h_conv2_tf') #(.,14,14,32)
- h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
-
- #print(h_pool2_tf.shape)
-
- # 4.layer: fully connected
- W_fc1_tf = weight_variable([5*5*n_f_conv2,n_n_fc1], name = 'W_fc1_tf') # (4*4*32, 1024)
- b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
- h_pool2_flat_tf = tf.reshape(h_pool2_tf, [int(h_pool2_tf.shape[0]), -1], name = 'h_pool3_flat_tf') # (.,1024)
- h_fc1_tf = tf.nn.relu(tf.matmul(h_pool2_flat_tf, W_fc1_tf) + b_fc1_tf,
- name = 'h_fc1_tf') # (.,1024)
-
- # add dropout
- #keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
- #h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
- print(h_fc1_tf.shape)
-
- # 5.layer: fully connected
- W_fc2_tf = weight_variable([n_n_fc1, num_classes], name = 'W_fc2_tf')
- b_fc2_tf = bias_variable([num_classes], name = 'b_fc2_tf')
- z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf), b_fc2_tf, name = 'z_pred_tf')# => (.,10)
- # predicted probabilities in one-hot encoding
- #y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf')
-
- # tensor of correct predictions
- #y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
- # tf.argmax(y_data_tf, 1),
- # name = 'y_pred_correct_tf')
- logits = z_pred_tf
- return logits #y_pred_proba_tf
diff --git a/Old/PBA/model.py b/Old/PBA/model.py
deleted file mode 100755
index 47a0aa9..0000000
--- a/Old/PBA/model.py
+++ /dev/null
@@ -1,353 +0,0 @@
-# Copyright 2018 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""PBA & AutoAugment Train/Eval module.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import contextlib
-import os
-import time
-
-import numpy as np
-import tensorflow as tf
-
-import autoaugment.custom_ops as ops
-from autoaugment.shake_drop import build_shake_drop_model
-from autoaugment.shake_shake import build_shake_shake_model
-import pba.data_utils as data_utils
-import pba.helper_utils as helper_utils
-from pba.wrn import build_wrn_model
-from pba.resnet import build_resnet_model
-
-from pba.LeNet import LeNet
-
-arg_scope = tf.contrib.framework.arg_scope
-
-
-def setup_arg_scopes(is_training):
- """Sets up the argscopes that will be used when building an image model.
-
- Args:
- is_training: Is the model training or not.
-
- Returns:
- Arg scopes to be put around the model being constructed.
- """
-
- batch_norm_decay = 0.9
- batch_norm_epsilon = 1e-5
- batch_norm_params = {
- # Decay for the moving averages.
- 'decay': batch_norm_decay,
- # epsilon to prevent 0s in variance.
- 'epsilon': batch_norm_epsilon,
- 'scale': True,
- # collection containing the moving mean and moving variance.
- 'is_training': is_training,
- }
-
- scopes = []
-
- scopes.append(arg_scope([ops.batch_norm], **batch_norm_params))
- return scopes
-
-
-def build_model(inputs, num_classes, is_training, hparams):
- """Constructs the vision model being trained/evaled.
-
- Args:
- inputs: input features/images being fed to the image model build built.
- num_classes: number of output classes being predicted.
- is_training: is the model training or not.
- hparams: additional hyperparameters associated with the image model.
-
- Returns:
- The logits of the image model.
- """
- scopes = setup_arg_scopes(is_training)
- if len(scopes) != 1:
- raise ValueError('Nested scopes depreciated in py3.')
- with scopes[0]:
- if hparams.model_name == 'pyramid_net':
- logits = build_shake_drop_model(inputs, num_classes, is_training)
- elif hparams.model_name == 'wrn':
- logits = build_wrn_model(inputs, num_classes, hparams.wrn_size)
- elif hparams.model_name == 'shake_shake':
- logits = build_shake_shake_model(inputs, num_classes, hparams,
- is_training)
- elif hparams.model_name == 'resnet':
- logits = build_resnet_model(inputs, num_classes, hparams,
- is_training)
- elif hparams.model_name == 'LeNet':
- logits = LeNet(inputs, num_classes)
- else:
- raise ValueError("Unknown model name.")
- return logits
-
-
-class Model(object):
- """Builds an model."""
-
- def __init__(self, hparams, num_classes, image_size):
- self.hparams = hparams
- self.num_classes = num_classes
- self.image_size = image_size
-
- def build(self, mode):
- """Construct the model."""
- assert mode in ['train', 'eval']
- self.mode = mode
- self._setup_misc(mode)
- self._setup_images_and_labels(self.hparams.dataset)
- self._build_graph(self.images, self.labels, mode)
-
- self.init = tf.group(tf.global_variables_initializer(),
- tf.local_variables_initializer())
-
- def _setup_misc(self, mode):
- """Sets up miscellaneous in the model constructor."""
- self.lr_rate_ph = tf.Variable(0.0, name='lrn_rate', trainable=False)
- self.reuse = None if (mode == 'train') else True
- self.batch_size = self.hparams.batch_size
- if mode == 'eval':
- self.batch_size = self.hparams.test_batch_size
-
- def _setup_images_and_labels(self, dataset):
- """Sets up image and label placeholders for the model."""
- if dataset == 'cifar10' or dataset == 'cifar100' or self.mode == 'train':
- self.images = tf.placeholder(tf.float32,
- [self.batch_size, self.image_size, self.image_size, 3])
- self.labels = tf.placeholder(tf.float32,
- [self.batch_size, self.num_classes])
- else:
- self.images = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3])
- self.labels = tf.placeholder(tf.float32, [None, self.num_classes])
-
- def assign_epoch(self, session, epoch_value):
- session.run(
- self._epoch_update, feed_dict={self._new_epoch: epoch_value})
-
- def _build_graph(self, images, labels, mode):
- """Constructs the TF graph for the model.
-
- Args:
- images: A 4-D image Tensor
- labels: A 2-D labels Tensor.
- mode: string indicating training mode ( e.g., 'train', 'valid', 'test').
- """
- is_training = 'train' in mode
- if is_training:
- self.global_step = tf.train.get_or_create_global_step()
-
- logits = build_model(images, self.num_classes, is_training,
- self.hparams)
- self.predictions, self.cost = helper_utils.setup_loss(logits, labels)
-
- self._calc_num_trainable_params()
-
- # Adds L2 weight decay to the cost
- self.cost = helper_utils.decay_weights(self.cost,
- self.hparams.weight_decay_rate)
-
- if is_training:
- self._build_train_op()
-
- # Setup checkpointing for this child model
- # Keep 2 or more checkpoints around during training.
- with tf.device('/cpu:0'):
- self.saver = tf.train.Saver(max_to_keep=10)
-
- self.init = tf.group(tf.global_variables_initializer(),
- tf.local_variables_initializer())
-
- def _calc_num_trainable_params(self):
- self.num_trainable_params = np.sum([
- np.prod(var.get_shape().as_list())
- for var in tf.trainable_variables()
- ])
- tf.logging.info('number of trainable params: {}'.format(
- self.num_trainable_params))
-
- def _build_train_op(self):
- """Builds the train op for the model."""
- hparams = self.hparams
- tvars = tf.trainable_variables()
- grads = tf.gradients(self.cost, tvars)
- if hparams.gradient_clipping_by_global_norm > 0.0:
- grads, norm = tf.clip_by_global_norm(
- grads, hparams.gradient_clipping_by_global_norm)
- tf.summary.scalar('grad_norm', norm)
-
- # Setup the initial learning rate
- initial_lr = self.lr_rate_ph
- optimizer = tf.train.MomentumOptimizer(
- initial_lr, 0.9, use_nesterov=True)
-
- self.optimizer = optimizer
- apply_op = optimizer.apply_gradients(
- zip(grads, tvars), global_step=self.global_step, name='train_step')
- train_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
- with tf.control_dependencies([apply_op]):
- self.train_op = tf.group(*train_ops)
-
-
-class ModelTrainer(object):
- """Trains an instance of the Model class."""
-
- def __init__(self, hparams):
- self._session = None
- self.hparams = hparams
-
- # Set the random seed to be sure the same validation set
- # is used for each model
- np.random.seed(0)
- self.data_loader = data_utils.DataSet(hparams)
- np.random.seed() # Put the random seed back to random
- self.data_loader.reset()
-
- # extra stuff for ray
- self._build_models()
- self._new_session()
- self._session.__enter__()
-
- def save_model(self, checkpoint_dir, step=None):
- """Dumps model into the backup_dir.
-
- Args:
- step: If provided, creates a checkpoint with the given step
- number, instead of overwriting the existing checkpoints.
- """
- model_save_name = os.path.join(checkpoint_dir,
- 'model.ckpt') + '-' + str(step)
- save_path = self.saver.save(self.session, model_save_name)
- tf.logging.info('Saved child model')
- return model_save_name
-
- def extract_model_spec(self, checkpoint_path):
- """Loads a checkpoint with the architecture structure stored in the name."""
- self.saver.restore(self.session, checkpoint_path)
- tf.logging.warning(
- 'Loaded child model checkpoint from {}'.format(checkpoint_path))
-
- def eval_child_model(self, model, data_loader, mode):
- """Evaluate the child model.
-
- Args:
- model: image model that will be evaluated.
- data_loader: dataset object to extract eval data from.
- mode: will the model be evalled on train, val or test.
-
- Returns:
- Accuracy of the model on the specified dataset.
- """
- tf.logging.info('Evaluating child model in mode {}'.format(mode))
- while True:
- try:
- accuracy = helper_utils.eval_child_model(
- self.session, model, data_loader, mode)
- tf.logging.info(
- 'Eval child model accuracy: {}'.format(accuracy))
- # If epoch trained without raising the below errors, break
- # from loop.
- break
- except (tf.errors.AbortedError, tf.errors.UnavailableError) as e:
- tf.logging.info(
- 'Retryable error caught: {}. Retrying.'.format(e))
-
- return accuracy
-
- @contextlib.contextmanager
- def _new_session(self):
- """Creates a new session for model m."""
- # Create a new session for this model, initialize
- # variables, and save / restore from checkpoint.
- sess_cfg = tf.ConfigProto(
- allow_soft_placement=True, log_device_placement=False)
- sess_cfg.gpu_options.allow_growth = True
- self._session = tf.Session('', config=sess_cfg)
- self._session.run([self.m.init, self.meval.init])
- return self._session
-
- def _build_models(self):
- """Builds the image models for train and eval."""
- # Determine if we should build the train and eval model. When using
- # distributed training we only want to build one or the other and not both.
- with tf.variable_scope('model', use_resource=False):
- m = Model(self.hparams, self.data_loader.num_classes, self.data_loader.image_size)
- m.build('train')
- self._num_trainable_params = m.num_trainable_params
- self._saver = m.saver
- with tf.variable_scope('model', reuse=True, use_resource=False):
- meval = Model(self.hparams, self.data_loader.num_classes, self.data_loader.image_size)
- meval.build('eval')
- self.m = m
- self.meval = meval
-
- def _run_training_loop(self, curr_epoch):
- """Trains the model `m` for one epoch."""
- start_time = time.time()
- while True:
- try:
- train_accuracy = helper_utils.run_epoch_training(
- self.session, self.m, self.data_loader, curr_epoch)
- break
- except (tf.errors.AbortedError, tf.errors.UnavailableError) as e:
- tf.logging.info(
- 'Retryable error caught: {}. Retrying.'.format(e))
- tf.logging.info('Finished epoch: {}'.format(curr_epoch))
- tf.logging.info('Epoch time(min): {}'.format(
- (time.time() - start_time) / 60.0))
- return train_accuracy
-
- def _compute_final_accuracies(self, iteration):
- """Run once training is finished to compute final test accuracy."""
- if (iteration >= self.hparams.num_epochs - 1):
- test_accuracy = self.eval_child_model(self.meval, self.data_loader,
- 'test')
- else:
- test_accuracy = 0
- tf.logging.info('Test Accuracy: {}'.format(test_accuracy))
- return test_accuracy
-
- def run_model(self, epoch):
- """Trains and evalutes the image model."""
- valid_accuracy = 0.
- training_accuracy = self._run_training_loop(epoch)
- if self.hparams.validation_size > 0:
- valid_accuracy = self.eval_child_model(self.meval,
- self.data_loader, 'val')
- tf.logging.info('Train Acc: {}, Valid Acc: {}'.format(
- training_accuracy, valid_accuracy))
- return training_accuracy, valid_accuracy
-
- def reset_config(self, new_hparams):
- self.hparams = new_hparams
- self.data_loader.reset_policy(new_hparams)
- return
-
- @property
- def saver(self):
- return self._saver
-
- @property
- def session(self):
- return self._session
-
- @property
- def num_trainable_params(self):
- return self._num_trainable_params
diff --git a/Old/PBA/search.sh b/Old/PBA/search.sh
deleted file mode 100755
index 08fa9c2..0000000
--- a/Old/PBA/search.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-export PYTHONPATH="$(pwd)"
-
-cifar10_LeNet_search() {
- local_dir="$PWD/results/"
- data_path="$PWD/datasets/cifar-10-batches-py"
-
- python pba/search.py \
- --local_dir "$local_dir" \
- --model_name LeNet \
- --data_path "$data_path" --dataset cifar10 \
- --train_size 4000 --val_size 46000 \
- --checkpoint_freq 0 \
- --name "cifar10_search" --gpu 0.15 --cpu 2 \
- --num_samples 16 --perturbation_interval 3 --epochs 150 \
- --explore cifar10 --aug_policy cifar10 \
- --lr 0.1 --wd 0.0005
-}
-
-cifar10_search() {
- local_dir="$PWD/results/"
- data_path="$PWD/datasets/cifar-10-batches-py"
-
- python pba/search.py \
- --local_dir "$local_dir" \
- --model_name wrn_40_2 \
- --data_path "$data_path" --dataset cifar10 \
- --train_size 4000 --val_size 46000 \
- --checkpoint_freq 0 \
- --name "cifar10_search" --gpu 0.15 --cpu 2 \
- --num_samples 16 --perturbation_interval 3 --epochs 200 \
- --explore cifar10 --aug_policy cifar10 \
- --lr 0.1 --wd 0.0005
-}
-
-svhn_search() {
- local_dir="$PWD/results/"
- data_path="$PWD/datasets/"
-
- python pba/search.py \
- --local_dir "$local_dir" --data_path "$data_path" \
- --model_name wrn_40_2 --dataset svhn \
- --train_size 1000 --val_size 7325 \
- --checkpoint_freq 0 \
- --name "svhn_search" --gpu 0.19 --cpu 2 \
- --num_samples 16 --perturbation_interval 3 --epochs 160 \
- --explore cifar10 --aug_policy cifar10 --no_cutout \
- --lr 0.1 --wd 0.005
-}
-
-if [ "$1" = "rcifar10" ]; then
- cifar10_search
-elif [ "$1" = "rsvhn" ]; then
- svhn_search
-elif [ "$1" = "LeNet" ]; then
- cifar10_LeNet_search
-else
- echo "invalid args"
-fi
diff --git a/Old/PBA/setup.py b/Old/PBA/setup.py
deleted file mode 100755
index cc9b38b..0000000
--- a/Old/PBA/setup.py
+++ /dev/null
@@ -1,210 +0,0 @@
-"""Parse flags and set up hyperparameters."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import random
-import tensorflow as tf
-
-from pba.augmentation_transforms_hp import NUM_HP_TRANSFORM
-
-
-def create_parser(state):
- """Create arg parser for flags."""
- parser = argparse.ArgumentParser()
- parser.add_argument(
- '--model_name',
- default='wrn',
- choices=('wrn_28_10', 'wrn_40_2', 'shake_shake_32', 'shake_shake_96',
- 'shake_shake_112', 'pyramid_net', 'resnet', 'LeNet'))
- parser.add_argument(
- '--data_path',
- default='/tmp/datasets/',
- help='Directory where dataset is located.')
- parser.add_argument(
- '--dataset',
- default='cifar10',
- choices=('cifar10', 'cifar100', 'svhn', 'svhn-full', 'test'))
- parser.add_argument(
- '--recompute_dset_stats',
- action='store_true',
- help='Instead of using hardcoded mean/std, recompute from dataset.')
- parser.add_argument('--local_dir', type=str, default='/tmp/ray_results/', help='Ray directory.')
- parser.add_argument('--restore', type=str, default=None, help='If specified, tries to restore from given path.')
- parser.add_argument('--train_size', type=int, default=5000, help='Number of training examples.')
- parser.add_argument('--val_size', type=int, default=45000, help='Number of validation examples.')
- parser.add_argument('--checkpoint_freq', type=int, default=50, help='Checkpoint frequency.')
- parser.add_argument(
- '--cpu', type=float, default=4, help='Allocated by Ray')
- parser.add_argument(
- '--gpu', type=float, default=1, help='Allocated by Ray')
- parser.add_argument(
- '--aug_policy',
- type=str,
- default='cifar10',
- help=
- 'which augmentation policy to use (in augmentation_transforms_hp.py)')
- # search-use only
- parser.add_argument(
- '--explore',
- type=str,
- default='cifar10',
- help='which explore function to use')
- parser.add_argument(
- '--epochs',
- type=int,
- default=0,
- help='Number of epochs, or <=0 for default')
- parser.add_argument(
- '--no_cutout', action='store_true', help='turn off cutout')
- parser.add_argument('--lr', type=float, default=0.1, help='learning rate')
- parser.add_argument('--wd', type=float, default=0.0005, help='weight decay')
- parser.add_argument('--bs', type=int, default=128, help='batch size')
- parser.add_argument('--test_bs', type=int, default=25, help='test batch size')
- parser.add_argument('--num_samples', type=int, default=1, help='Number of Ray samples')
-
- if state == 'train':
- parser.add_argument(
- '--use_hp_policy',
- action='store_true',
- help='otherwise use autoaug policy')
- parser.add_argument(
- '--hp_policy',
- type=str,
- default=None,
- help='either a comma separated list of values or a file')
- parser.add_argument(
- '--hp_policy_epochs',
- type=int,
- default=200,
- help='number of epochs/iterations policy trained for')
- parser.add_argument(
- '--no_aug',
- action='store_true',
- help=
- 'no additional augmentation at all (besides cutout if not toggled)'
- )
- parser.add_argument(
- '--flatten',
- action='store_true',
- help='randomly select aug policy from schedule')
- parser.add_argument('--name', type=str, default='autoaug')
-
- elif state == 'search':
- parser.add_argument('--perturbation_interval', type=int, default=10)
- parser.add_argument('--name', type=str, default='autoaug_pbt')
- else:
- raise ValueError('unknown state')
- args = parser.parse_args()
- tf.logging.info(str(args))
- return args
-
-
-def create_hparams(state, FLAGS): # pylint: disable=invalid-name
- """Creates hyperparameters to pass into Ray config.
-
- Different options depending on search or eval mode.
-
- Args:
- state: a string, 'train' or 'search'.
- FLAGS: parsed command line flags.
-
- Returns:
- tf.hparams object.
- """
- epochs = 0
- tf.logging.info('data path: {}'.format(FLAGS.data_path))
- hparams = tf.contrib.training.HParams(
- train_size=FLAGS.train_size,
- validation_size=FLAGS.val_size,
- dataset=FLAGS.dataset,
- data_path=FLAGS.data_path,
- batch_size=FLAGS.bs,
- gradient_clipping_by_global_norm=5.0,
- explore=FLAGS.explore,
- aug_policy=FLAGS.aug_policy,
- no_cutout=FLAGS.no_cutout,
- recompute_dset_stats=FLAGS.recompute_dset_stats,
- lr=FLAGS.lr,
- weight_decay_rate=FLAGS.wd,
- test_batch_size=FLAGS.test_bs)
-
- if state == 'train':
- hparams.add_hparam('no_aug', FLAGS.no_aug)
- hparams.add_hparam('use_hp_policy', FLAGS.use_hp_policy)
- if FLAGS.use_hp_policy:
- if FLAGS.hp_policy == 'random':
- tf.logging.info('RANDOM SEARCH')
- parsed_policy = []
- for i in range(NUM_HP_TRANSFORM * 4):
- if i % 2 == 0:
- parsed_policy.append(random.randint(0, 10))
- else:
- parsed_policy.append(random.randint(0, 9))
- elif FLAGS.hp_policy.endswith('.txt') or FLAGS.hp_policy.endswith(
- '.p'):
- # will be loaded in in data_utils
- parsed_policy = FLAGS.hp_policy
- else:
- # parse input into a fixed augmentation policy
- parsed_policy = FLAGS.hp_policy.split(', ')
- parsed_policy = [int(p) for p in parsed_policy]
- hparams.add_hparam('hp_policy', parsed_policy)
- hparams.add_hparam('hp_policy_epochs', FLAGS.hp_policy_epochs)
- hparams.add_hparam('flatten', FLAGS.flatten)
- elif state == 'search':
- hparams.add_hparam('no_aug', False)
- hparams.add_hparam('use_hp_policy', True)
- # default start value of 0
- hparams.add_hparam('hp_policy',
- [0 for _ in range(4 * NUM_HP_TRANSFORM)])
- else:
- raise ValueError('unknown state')
-
- if FLAGS.model_name == 'wrn_40_2':
- hparams.add_hparam('model_name', 'wrn')
- epochs = 200
- hparams.add_hparam('wrn_size', 32)
- hparams.add_hparam('wrn_depth', 40)
- elif FLAGS.model_name == 'wrn_28_10':
- hparams.add_hparam('model_name', 'wrn')
- epochs = 200
- hparams.add_hparam('wrn_size', 160)
- hparams.add_hparam('wrn_depth', 28)
- elif FLAGS.model_name == 'resnet':
- hparams.add_hparam('model_name', 'resnet')
- epochs = 200
- hparams.add_hparam('resnet_size', 20)
- hparams.add_hparam('num_filters', 32)
- elif FLAGS.model_name == 'shake_shake_32':
- hparams.add_hparam('model_name', 'shake_shake')
- epochs = 1800
- hparams.add_hparam('shake_shake_widen_factor', 2)
- elif FLAGS.model_name == 'shake_shake_96':
- hparams.add_hparam('model_name', 'shake_shake')
- epochs = 1800
- hparams.add_hparam('shake_shake_widen_factor', 6)
- elif FLAGS.model_name == 'shake_shake_112':
- hparams.add_hparam('model_name', 'shake_shake')
- epochs = 1800
- hparams.add_hparam('shake_shake_widen_factor', 7)
- elif FLAGS.model_name == 'pyramid_net':
- hparams.add_hparam('model_name', 'pyramid_net')
- epochs = 1800
- hparams.set_hparam('batch_size', 64)
-
- elif FLAGS.model_name == 'LeNet':
- hparams.add_hparam('model_name', 'LeNet')
- epochs = 200
-
- else:
- raise ValueError('Not Valid Model Name: %s' % FLAGS.model_name)
- if FLAGS.epochs > 0:
- tf.logging.info('overwriting with custom epochs')
- epochs = FLAGS.epochs
- hparams.add_hparam('num_epochs', epochs)
- tf.logging.info('epochs: {}, lr: {}, wd: {}'.format(
- hparams.num_epochs, hparams.lr, hparams.weight_decay_rate))
- return hparams
diff --git a/Old/PBA/table_1_cifar10.sh b/Old/PBA/table_1_cifar10.sh
deleted file mode 100755
index 4d35bd6..0000000
--- a/Old/PBA/table_1_cifar10.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-export PYTHONPATH="$(pwd)"
-
-# args: [model name] [lr] [wd] #Learning rate / weight decay
-eval_cifar10() {
- hp_policy="$PWD/schedules/rcifar10_16_wrn.txt"
- local_dir="$PWD/results/"
- data_path="$PWD/datasets/cifar-10-batches-py"
-
- size=50000
- dataset="cifar10"
- name="eval_cifar10_$1" # has 8 cutout size
-
- python pba/train.py \
- --local_dir "$local_dir" --data_path "$data_path" \
- --model_name "$1" --dataset "$dataset" \
- --train_size "$size" --val_size 0 \
- --checkpoint_freq 25 --gpu 1 --cpu 4 \
- --use_hp_policy --hp_policy "$hp_policy" \
- --hp_policy_epochs 200 \
- --aug_policy cifar10 --name "$name" \
- --lr "$2" --wd "$3"
-}
-
-if [ "$@" = "wrn_28_10" ]; then
- eval_cifar10 wrn_28_10 0.1 0.0005
-elif [ "$@" = "ss_32" ]; then
- eval_cifar10 shake_shake_32 0.01 0.001
-elif [ "$@" = "ss_96" ]; then
- eval_cifar10 shake_shake_96 0.01 0.001
-elif [ "$@" = "ss_112" ]; then
- eval_cifar10 shake_shake_112 0.01 0.001
-elif [ "$@" = "pyramid_net" ]; then
- eval_cifar10 pyramid_net 0.05 0.00005
-
-elif [ "$@" = "LeNet" ]; then
- eval_cifar10 LeNet 0.05 0.0
-
-else
- echo "invalid args"
-fi
diff --git a/Old/UDA/LeNet.py b/Old/UDA/LeNet.py
deleted file mode 100755
index 7a411b6..0000000
--- a/Old/UDA/LeNet.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import numpy as np
-import tensorflow as tf
-
-## build the neural network class
-# weight initialization
-def weight_variable(shape, name = None):
- initial = tf.truncated_normal(shape, stddev=0.1)
- return tf.Variable(initial, name = name)
-
-# bias initialization
-def bias_variable(shape, name = None):
- initial = tf.constant(0.1, shape=shape) # positive bias
- return tf.Variable(initial, name = name)
-
-# 2D convolution
-def conv2d(x, W, name = None):
- return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID', name = name)
-
-# max pooling
-def max_pool_2x2(x, name = None):
- return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
- padding='SAME', name = name)
-
-def LeNet(images, num_classes):
- # tunable hyperparameters for nn architecture
- s_f_conv1 = 5; # filter size of first convolution layer (default = 3)
- n_f_conv1 = 20; # number of features of first convolution layer (default = 36)
- s_f_conv2 = 5; # filter size of second convolution layer (default = 3)
- n_f_conv2 = 50; # number of features of second convolution layer (default = 36)
- n_n_fc1 = 500; # number of neurons of first fully connected layer (default = 576)
- n_n_fc2 = 500; # number of neurons of first fully connected layer (default = 576)
-
- #print(images.shape)
- # 1.layer: convolution + max pooling
- W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, int(images.shape[3]), n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
- b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
- h_conv1_tf = tf.nn.relu(conv2d(images, W_conv1_tf) + b_conv1_tf, name = 'h_conv1_tf') # (.,28,28,32)
- h_pool1_tf = max_pool_2x2(h_conv1_tf, name = 'h_pool1_tf') # (.,14,14,32)
- #print(h_conv1_tf.shape)
- #print(h_pool1_tf.shape)
- # 2.layer: convolution + max pooling
- W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2, n_f_conv1, n_f_conv2], name = 'W_conv2_tf')
- b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
- h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf, W_conv2_tf) + b_conv2_tf, name ='h_conv2_tf') #(.,14,14,32)
- h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
-
- #print(h_pool2_tf.shape)
-
- # 4.layer: fully connected
- W_fc1_tf = weight_variable([5*5*n_f_conv2,n_n_fc1], name = 'W_fc1_tf') # (4*4*32, 1024)
- b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
- h_pool2_flat_tf = tf.reshape(h_pool2_tf, [int(h_pool2_tf.shape[0]), -1], name = 'h_pool3_flat_tf') # (.,1024)
- h_fc1_tf = tf.nn.relu(tf.matmul(h_pool2_flat_tf, W_fc1_tf) + b_fc1_tf,
- name = 'h_fc1_tf') # (.,1024)
-
- # add dropout
- #keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
- #h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
- print(h_fc1_tf.shape)
-
- # 5.layer: fully connected
- W_fc2_tf = weight_variable([n_n_fc1, num_classes], name = 'W_fc2_tf')
- b_fc2_tf = bias_variable([num_classes], name = 'b_fc2_tf')
- z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf), b_fc2_tf, name = 'z_pred_tf')# => (.,10)
- # predicted probabilities in one-hot encoding
- #y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf')
-
- # tensor of correct predictions
- #y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
- # tf.argmax(y_data_tf, 1),
- # name = 'y_pred_correct_tf')
- logits = z_pred_tf
- return logits #y_pred_proba_tf
diff --git a/Old/UDA/main.py b/Old/UDA/main.py
deleted file mode 100755
index 04f8950..0000000
--- a/Old/UDA/main.py
+++ /dev/null
@@ -1,620 +0,0 @@
-# coding=utf-8
-# Copyright 2019 The Google UDA Team Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""UDA on CIFAR-10 and SVHN.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import contextlib
-import os
-import time
-import json
-
-import numpy as np
-
-from absl import flags
-import absl.logging as _logging # pylint: disable=unused-import
-
-import tensorflow as tf
-
-from randaugment import custom_ops as ops
-import data
-import utils
-
-from randaugment.wrn import build_wrn_model
-from randaugment.shake_drop import build_shake_drop_model
-from randaugment.shake_shake import build_shake_shake_model
-
-from randaugment.LeNet import LeNet
-
-
-# TPU related
-flags.DEFINE_string(
- "master", default=None,
- help="the TPU address. This should be set when using Cloud TPU")
-flags.DEFINE_string(
- "tpu", default=None,
- help="The Cloud TPU to use for training. This should be either the name "
- "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 url.")
-flags.DEFINE_string(
- "gcp_project", default=None,
- help="Project name for the Cloud TPU-enabled project. If not specified, "
- "we will attempt to automatically detect the GCE project from metadata.")
-flags.DEFINE_string(
- "tpu_zone", default=None,
- help="GCE zone where the Cloud TPU is located in. If not specified, we "
- "will attempt to automatically detect the GCE project from metadata.")
-flags.DEFINE_bool(
- "use_tpu", default=False,
- help="Use TPUs rather than GPU/CPU.")
-flags.DEFINE_enum(
- "task_name", "cifar10",
- enum_values=["cifar10", "svhn"],
- help="The task to use")
-
-# UDA config:
-flags.DEFINE_integer(
- "sup_size", default=4000,
- help="Number of supervised pairs to use. "
- "-1: all training samples. 4000: 4000 supervised examples.")
-flags.DEFINE_integer(
- "aug_copy", default=0,
- help="Number of different augmented data generated.")
-flags.DEFINE_integer(
- "unsup_ratio", default=0,
- help="The ratio between batch size of unlabeled data and labeled data, "
- "i.e., unsup_ratio * train_batch_size is the batch_size for unlabeled data."
- "Do not use the unsupervised objective if set to 0.")
-flags.DEFINE_enum(
- "tsa", "",
- enum_values=["", "linear_schedule", "log_schedule", "exp_schedule"],
- help="anneal schedule of training signal annealing. "
- "tsa='' means not using TSA. See the paper for other schedules.")
-flags.DEFINE_float(
- "uda_confidence_thresh", default=-1,
- help="The threshold on predicted probability on unsupervised data. If set,"
- "UDA loss will only be calculated on unlabeled examples whose largest"
- "probability is larger than the threshold")
-flags.DEFINE_float(
- "uda_softmax_temp", -1,
- help="The temperature of the Softmax when making prediction on unlabeled"
- "examples. -1 means to use normal Softmax")
-flags.DEFINE_float(
- "ent_min_coeff", default=0,
- help="")
-flags.DEFINE_integer(
- "unsup_coeff", default=1,
- help="The coefficient on the UDA loss. "
- "setting unsup_coeff to 1 works for most settings. "
- "When you have extermely few samples, consider increasing unsup_coeff")
-
-# Experiment (data/checkpoint/directory) config
-flags.DEFINE_string(
- "data_dir", default=None,
- help="Path to data directory containing `*.tfrecords`.")
-flags.DEFINE_string(
- "model_dir", default=None,
- help="model dir of the saved checkpoints.")
-flags.DEFINE_bool(
- "do_train", default=True,
- help="Whether to run training.")
-flags.DEFINE_bool(
- "do_eval", default=False,
- help="Whether to run eval on the test set.")
-flags.DEFINE_integer(
- "dev_size", default=-1,
- help="dev set size.")
-flags.DEFINE_bool(
- "verbose", default=False,
- help="Whether to print additional information.")
-
-# Training config
-flags.DEFINE_integer(
- "train_batch_size", default=32,
- help="Size of train batch.")
-flags.DEFINE_integer(
- "eval_batch_size", default=8,
- help="Size of evalation batch.")
-flags.DEFINE_integer(
- "train_steps", default=100000,
- help="Total number of training steps.")
-flags.DEFINE_integer(
- "iterations", default=10000,
- help="Number of iterations per repeat loop.")
-flags.DEFINE_integer(
- "save_steps", default=10000,
- help="number of steps for model checkpointing.")
-flags.DEFINE_integer(
- "max_save", default=10,
- help="Maximum number of checkpoints to save.")
-
-# Model config
-flags.DEFINE_enum(
- "model_name", default="wrn",
- enum_values=["wrn", "shake_shake_32", "shake_shake_96", "shake_shake_112", "pyramid_net", "LeNet"],
- help="Name of the model")
-flags.DEFINE_integer(
- "num_classes", default=10,
- help="Number of categories for classification.")
-flags.DEFINE_integer(
- "wrn_size", default=32,
- help="The size of WideResNet. It should be set to 32 for WRN-28-2"
- "and should be set to 160 for WRN-28-10")
-
-# Optimization config
-flags.DEFINE_float(
- "learning_rate", default=0.03,
- help="Maximum learning rate.")
-flags.DEFINE_float(
- "weight_decay_rate", default=5e-4,
- help="Weight decay rate.")
-flags.DEFINE_float(
- "min_lr_ratio", default=0.004,
- help="Minimum ratio learning rate.")
-flags.DEFINE_integer(
- "warmup_steps", default=20000,
- help="Number of steps for linear lr warmup.")
-
-
-
-FLAGS = tf.flags.FLAGS
-
-arg_scope = tf.contrib.framework.arg_scope
-
-
-def get_tsa_threshold(schedule, global_step, num_train_steps, start, end):
- step_ratio = tf.to_float(global_step) / tf.to_float(num_train_steps)
- if schedule == "linear_schedule":
- coeff = step_ratio
- elif schedule == "exp_schedule":
- scale = 5
- # [exp(-5), exp(0)] = [1e-2, 1]
- coeff = tf.exp((step_ratio - 1) * scale)
- elif schedule == "log_schedule":
- scale = 5
- # [1 - exp(0), 1 - exp(-5)] = [0, 0.99]
- coeff = 1 - tf.exp((-step_ratio) * scale)
- return coeff * (end - start) + start
-
-
-def setup_arg_scopes(is_training):
- """Sets up the argscopes that will be used when building an image model.
-
- Args:
- is_training: Is the model training or not.
-
- Returns:
- Arg scopes to be put around the model being constructed.
- """
-
- batch_norm_decay = 0.9
- batch_norm_epsilon = 1e-5
- batch_norm_params = {
- # Decay for the moving averages.
- "decay": batch_norm_decay,
- # epsilon to prevent 0s in variance.
- "epsilon": batch_norm_epsilon,
- "scale": True,
- # collection containing the moving mean and moving variance.
- "is_training": is_training,
- }
-
- scopes = []
-
- scopes.append(arg_scope([ops.batch_norm], **batch_norm_params))
- return scopes
-
-
-def build_model(inputs, num_classes, is_training, update_bn, hparams):
- """Constructs the vision model being trained/evaled.
-
- Args:
- inputs: input features/images being fed to the image model build built.
- num_classes: number of output classes being predicted.
- is_training: is the model training or not.
- hparams: additional hyperparameters associated with the image model.
-
- Returns:
- The logits of the image model.
- """
- scopes = setup_arg_scopes(is_training)
-
- try:
- from contextlib import nested
- except ImportError:
- from contextlib import ExitStack, contextmanager
-
- @contextmanager
- def nested(*contexts):
- with ExitStack() as stack:
- for ctx in contexts:
- stack.enter_context(ctx)
- yield contexts
-
- with nested(*scopes):
- if hparams.model_name == "pyramid_net":
- logits = build_shake_drop_model(
- inputs, num_classes, is_training)
- elif hparams.model_name == "wrn":
- logits = build_wrn_model(
- inputs, num_classes, hparams.wrn_size, update_bn)
- elif hparams.model_name == "shake_shake":
- logits = build_shake_shake_model(
- inputs, num_classes, hparams, is_training)
-
- elif hparams.model_name == "LeNet":
- logits = LeNet(inputs, num_classes)
-
- return logits
-
-
-def _kl_divergence_with_logits(p_logits, q_logits):
- p = tf.nn.softmax(p_logits)
- log_p = tf.nn.log_softmax(p_logits)
- log_q = tf.nn.log_softmax(q_logits)
-
- kl = tf.reduce_sum(p * (log_p - log_q), -1)
- return kl
-
-
-def anneal_sup_loss(sup_logits, sup_labels, sup_loss, global_step, metric_dict):
- tsa_start = 1. / FLAGS.num_classes
- eff_train_prob_threshold = get_tsa_threshold(
- FLAGS.tsa, global_step, FLAGS.train_steps,
- tsa_start, end=1)
-
- one_hot_labels = tf.one_hot(
- sup_labels, depth=FLAGS.num_classes, dtype=tf.float32)
- sup_probs = tf.nn.softmax(sup_logits, axis=-1)
- correct_label_probs = tf.reduce_sum(
- one_hot_labels * sup_probs, axis=-1)
- larger_than_threshold = tf.greater(
- correct_label_probs, eff_train_prob_threshold)
- loss_mask = 1 - tf.cast(larger_than_threshold, tf.float32)
- loss_mask = tf.stop_gradient(loss_mask)
- sup_loss = sup_loss * loss_mask
- avg_sup_loss = (tf.reduce_sum(sup_loss) /
- tf.maximum(tf.reduce_sum(loss_mask), 1))
- metric_dict["sup/sup_trained_ratio"] = tf.reduce_mean(loss_mask)
- metric_dict["sup/eff_train_prob_threshold"] = eff_train_prob_threshold
- return sup_loss, avg_sup_loss
-
-
-def get_ent(logits, return_mean=True):
- log_prob = tf.nn.log_softmax(logits, axis=-1)
- prob = tf.exp(log_prob)
- ent = tf.reduce_sum(-prob * log_prob, axis=-1)
- if return_mean:
- ent = tf.reduce_mean(ent)
- return ent
-
-
-def get_model_fn(hparams):
- def model_fn(features, labels, mode, params):
- sup_labels = tf.reshape(features["label"], [-1])
-
- #### Configuring the optimizer
- global_step = tf.train.get_global_step()
- metric_dict = {}
- is_training = (mode == tf.estimator.ModeKeys.TRAIN)
- if FLAGS.unsup_ratio > 0 and is_training:
- all_images = tf.concat([features["image"],
- features["ori_image"],
- features["aug_image"]], 0)
- else:
- all_images = features["image"]
-
- with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
- all_logits = build_model(
- inputs=all_images,
- num_classes=FLAGS.num_classes,
- is_training=is_training,
- update_bn=True and is_training,
- hparams=hparams,
- )
-
- sup_bsz = tf.shape(features["image"])[0]
- sup_logits = all_logits[:sup_bsz]
-
- sup_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
- labels=sup_labels,
- logits=sup_logits)
- sup_prob = tf.nn.softmax(sup_logits, axis=-1)
- metric_dict["sup/pred_prob"] = tf.reduce_mean(
- tf.reduce_max(sup_prob, axis=-1))
- if FLAGS.tsa:
- sup_loss, avg_sup_loss = anneal_sup_loss(sup_logits, sup_labels, sup_loss,
- global_step, metric_dict)
- else:
- avg_sup_loss = tf.reduce_mean(sup_loss)
- total_loss = avg_sup_loss
-
- if FLAGS.unsup_ratio > 0 and is_training:
- aug_bsz = tf.shape(features["ori_image"])[0]
-
- ori_logits = all_logits[sup_bsz : sup_bsz + aug_bsz]
- aug_logits = all_logits[sup_bsz + aug_bsz:]
- if FLAGS.uda_softmax_temp != -1:
- ori_logits_tgt = ori_logits / FLAGS.uda_softmax_temp
- else:
- ori_logits_tgt = ori_logits
- ori_prob = tf.nn.softmax(ori_logits, axis=-1)
- aug_prob = tf.nn.softmax(aug_logits, axis=-1)
- metric_dict["unsup/ori_prob"] = tf.reduce_mean(
- tf.reduce_max(ori_prob, axis=-1))
- metric_dict["unsup/aug_prob"] = tf.reduce_mean(
- tf.reduce_max(aug_prob, axis=-1))
-
- aug_loss = _kl_divergence_with_logits(
- p_logits=tf.stop_gradient(ori_logits_tgt),
- q_logits=aug_logits)
-
- if FLAGS.uda_confidence_thresh != -1:
- ori_prob = tf.nn.softmax(ori_logits, axis=-1)
- largest_prob = tf.reduce_max(ori_prob, axis=-1)
- loss_mask = tf.cast(tf.greater(
- largest_prob, FLAGS.uda_confidence_thresh), tf.float32)
- metric_dict["unsup/high_prob_ratio"] = tf.reduce_mean(loss_mask)
- loss_mask = tf.stop_gradient(loss_mask)
- aug_loss = aug_loss * loss_mask
- metric_dict["unsup/high_prob_loss"] = tf.reduce_mean(aug_loss)
-
- if FLAGS.ent_min_coeff > 0:
- ent_min_coeff = FLAGS.ent_min_coeff
- metric_dict["unsup/ent_min_coeff"] = ent_min_coeff
- per_example_ent = get_ent(ori_logits)
- ent_min_loss = tf.reduce_mean(per_example_ent)
- total_loss = total_loss + ent_min_coeff * ent_min_loss
-
- avg_unsup_loss = tf.reduce_mean(aug_loss)
- total_loss += FLAGS.unsup_coeff * avg_unsup_loss
- metric_dict["unsup/loss"] = avg_unsup_loss
-
- total_loss = utils.decay_weights(
- total_loss,
- FLAGS.weight_decay_rate)
-
- #### Check model parameters
- num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
- tf.logging.info("#params: {}".format(num_params))
-
- if FLAGS.verbose:
- format_str = "{{:<{0}s}}\t{{}}".format(
- max([len(v.name) for v in tf.trainable_variables()]))
- for v in tf.trainable_variables():
- tf.logging.info(format_str.format(v.name, v.get_shape()))
-
- #### Evaluation mode
- if mode == tf.estimator.ModeKeys.EVAL:
- #### Metric function for classification
- def metric_fn(per_example_loss, label_ids, logits):
- # classification loss & accuracy
- loss = tf.metrics.mean(per_example_loss)
-
- predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
- accuracy = tf.metrics.accuracy(label_ids, predictions)
-
- ret_dict = {
- "eval/classify_loss": loss,
- "eval/classify_accuracy": accuracy
- }
-
- return ret_dict
-
- eval_metrics = (metric_fn, [sup_loss, sup_labels, sup_logits])
-
- #### Constucting evaluation TPUEstimatorSpec.
- eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
- mode=mode,
- loss=total_loss,
- eval_metrics=eval_metrics)
-
- return eval_spec
-
- # increase the learning rate linearly
- if FLAGS.warmup_steps > 0:
- warmup_lr = tf.to_float(global_step) / tf.to_float(FLAGS.warmup_steps) \
- * FLAGS.learning_rate
- else:
- warmup_lr = 0.0
-
- # decay the learning rate using the cosine schedule
- decay_lr = tf.train.cosine_decay(
- FLAGS.learning_rate,
- global_step=global_step-FLAGS.warmup_steps,
- decay_steps=FLAGS.train_steps-FLAGS.warmup_steps,
- alpha=FLAGS.min_lr_ratio)
-
- learning_rate = tf.where(global_step < FLAGS.warmup_steps,
- warmup_lr, decay_lr)
-
- optimizer = tf.train.MomentumOptimizer(
- learning_rate=learning_rate,
- momentum=0.9,
- use_nesterov=True)
-
- if FLAGS.use_tpu:
- optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
-
- grads_and_vars = optimizer.compute_gradients(total_loss)
- gradients, variables = zip(*grads_and_vars)
- update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
- with tf.control_dependencies(update_ops):
- train_op = optimizer.apply_gradients(
- zip(gradients, variables), global_step=tf.train.get_global_step())
-
- #### Creating training logging hook
- # compute accuracy
- sup_pred = tf.argmax(sup_logits, axis=-1, output_type=sup_labels.dtype)
- is_correct = tf.to_float(tf.equal(sup_pred, sup_labels))
- acc = tf.reduce_mean(is_correct)
- metric_dict["sup/sup_loss"] = avg_sup_loss
- metric_dict["training/loss"] = total_loss
- metric_dict["sup/acc"] = acc
- metric_dict["training/lr"] = learning_rate
- metric_dict["training/step"] = global_step
-
- if not FLAGS.use_tpu:
- log_info = ("step [{training/step}] lr {training/lr:.6f} "
- "loss {training/loss:.4f} "
- "sup/acc {sup/acc:.4f} sup/loss {sup/sup_loss:.6f} ")
- if FLAGS.unsup_ratio > 0:
- log_info += "unsup/loss {unsup/loss:.6f} "
- formatter = lambda kwargs: log_info.format(**kwargs)
- logging_hook = tf.train.LoggingTensorHook(
- tensors=metric_dict,
- every_n_iter=FLAGS.iterations,
- formatter=formatter)
- training_hooks = [logging_hook]
- #### Constucting training TPUEstimatorSpec.
- train_spec = tf.contrib.tpu.TPUEstimatorSpec(
- mode=mode, loss=total_loss, train_op=train_op,
- training_hooks=training_hooks)
- else:
- #### Constucting training TPUEstimatorSpec.
- host_call = utils.construct_scalar_host_call(
- metric_dict=metric_dict,
- model_dir=params["model_dir"],
- prefix="",
- reduce_fn=tf.reduce_mean)
- train_spec = tf.contrib.tpu.TPUEstimatorSpec(
- mode=mode, loss=total_loss, train_op=train_op,
- host_call=host_call)
-
- return train_spec
-
- return model_fn
-
-
-def train(hparams):
- ##### Create input function
- if FLAGS.unsup_ratio == 0:
- FLAGS.aug_copy = 0
- if FLAGS.dev_size != -1:
- FLAGS.do_train = True
- FLAGS.do_eval = True
- if FLAGS.do_train:
- train_input_fn = data.get_input_fn(
- data_dir=FLAGS.data_dir,
- split="train",
- task_name=FLAGS.task_name,
- sup_size=FLAGS.sup_size,
- unsup_ratio=FLAGS.unsup_ratio,
- aug_copy=FLAGS.aug_copy,
- )
-
- if FLAGS.do_eval:
- if FLAGS.dev_size != -1:
- eval_input_fn = data.get_input_fn(
- data_dir=FLAGS.data_dir,
- split="dev",
- task_name=FLAGS.task_name,
- sup_size=FLAGS.dev_size,
- unsup_ratio=0,
- aug_copy=0)
- eval_size = FLAGS.dev_size
- else:
- eval_input_fn = data.get_input_fn(
- data_dir=FLAGS.data_dir,
- split="test",
- task_name=FLAGS.task_name,
- sup_size=-1,
- unsup_ratio=0,
- aug_copy=0)
- if FLAGS.task_name == "cifar10":
- eval_size = 10000
- elif FLAGS.task_name == "svhn":
- eval_size = 26032
- else:
- assert False, "You need to specify the size of your test set."
- eval_steps = eval_size // FLAGS.eval_batch_size
-
- ##### Get model function
- model_fn = get_model_fn(hparams)
- estimator = utils.get_TPU_estimator(FLAGS, model_fn)
-
- #### Training
- if FLAGS.dev_size != -1:
- tf.logging.info("***** Running training and validation *****")
- tf.logging.info(" Supervised batch size = %d", FLAGS.train_batch_size)
- tf.logging.info(" Unsupervised batch size = %d",
- FLAGS.train_batch_size * FLAGS.unsup_ratio)
- tf.logging.info(" Num train steps = %d", FLAGS.train_steps)
- curr_step = 0
- while True:
- if curr_step >= FLAGS.train_steps:
- break
- tf.logging.info("Current step {}".format(curr_step))
- train_step = min(FLAGS.save_steps, FLAGS.train_steps - curr_step)
- estimator.train(input_fn=train_input_fn, steps=train_step)
- estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
- curr_step += FLAGS.save_steps
- else:
- if FLAGS.do_train:
- tf.logging.info("***** Running training *****")
- tf.logging.info(" Supervised batch size = %d", FLAGS.train_batch_size)
- tf.logging.info(" Unsupervised batch size = %d",
- FLAGS.train_batch_size * FLAGS.unsup_ratio)
- estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)
- if FLAGS.do_eval:
- tf.logging.info("***** Running evaluation *****")
- results = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
- tf.logging.info(">> Results:")
- for key in results.keys():
- tf.logging.info(" %s = %s", key, str(results[key]))
- results[key] = results[key].item()
- acc = results["eval/classify_accuracy"]
- with tf.gfile.Open("{}/results.txt".format(FLAGS.model_dir), "w") as ouf:
- ouf.write(str(acc))
-
-
-def main(_):
-
- if FLAGS.do_train:
- tf.gfile.MakeDirs(FLAGS.model_dir)
- flags_dict = tf.app.flags.FLAGS.flag_values_dict()
- with tf.gfile.Open(os.path.join(FLAGS.model_dir, "FLAGS.json"), "w") as ouf:
- json.dump(flags_dict, ouf)
- hparams = tf.contrib.training.HParams()
-
- if FLAGS.model_name == "wrn":
- hparams.add_hparam("model_name", "wrn")
- hparams.add_hparam("wrn_size", FLAGS.wrn_size)
- elif FLAGS.model_name == "shake_shake_32":
- hparams.add_hparam("model_name", "shake_shake")
- hparams.add_hparam("shake_shake_widen_factor", 2)
- elif FLAGS.model_name == "shake_shake_96":
- hparams.add_hparam("model_name", "shake_shake")
- hparams.add_hparam("shake_shake_widen_factor", 6)
- elif FLAGS.model_name == "shake_shake_112":
- hparams.add_hparam("model_name", "shake_shake")
- hparams.add_hparam("shake_shake_widen_factor", 7)
- elif FLAGS.model_name == "pyramid_net":
- hparams.add_hparam("model_name", "pyramid_net")
-
- elif FLAGS.model_name == "LeNet":
- hparams.add_hparam("model_name", "LeNet")
-
- else:
- raise ValueError("Not Valid Model Name: %s" % FLAGS.model_name)
-
- train(hparams)
-
-
-if __name__ == "__main__":
- tf.logging.set_verbosity(tf.logging.INFO)
- tf.app.run()
diff --git a/Old/UDA/run_cifar10_gpu.sh b/Old/UDA/run_cifar10_gpu.sh
deleted file mode 100755
index 5ccebd7..0000000
--- a/Old/UDA/run_cifar10_gpu.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-# coding=utf-8
-# Copyright 2019 The Google UDA Team Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#!/bin/bash
-
-task_name=cifar10
-
-python main.py \
- --model_name="LeNet"\
- --use_tpu=False \
- --do_train=True \
- --do_eval=True \
- --task_name=${task_name} \
- --sup_size=4000 \
- --unsup_ratio=5 \
- --train_batch_size=32 \
- --data_dir=data/proc_data/${task_name} \
- --model_dir=ckpt/cifar10_gpu \
- --train_steps=400000 \
- $@
diff --git a/Old/augmentations_randaugment.py b/Old/augmentations_randaugment.py
deleted file mode 100755
index b491942..0000000
--- a/Old/augmentations_randaugment.py
+++ /dev/null
@@ -1,271 +0,0 @@
-# code in this file is adpated from rpmcruz/autoaugment
-# https://github.com/rpmcruz/autoaugment/blob/master/transformations.py
-import random
-
-import PIL, PIL.ImageOps, PIL.ImageEnhance, PIL.ImageDraw
-import numpy as np
-import torch
-from PIL import Image
-
-def ShearX(img, v): # [-0.3, 0.3]
- assert -0.3 <= v <= 0.3
- if random.random() > 0.5:
- v = -v
- return img.transform(img.size, PIL.Image.AFFINE, (1, v, 0, 0, 1, 0))
-
-
-def ShearY(img, v): # [-0.3, 0.3]
- assert -0.3 <= v <= 0.3
- if random.random() > 0.5:
- v = -v
- return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, v, 1, 0))
-
-
-def TranslateX(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
- assert -0.45 <= v <= 0.45
- if random.random() > 0.5:
- v = -v
- v = v * img.size[0]
- return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))
-
-
-def TranslateXabs(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
- assert 0 <= v
- if random.random() > 0.5:
- v = -v
- return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))
-
-
-def TranslateY(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
- assert -0.45 <= v <= 0.45
- if random.random() > 0.5:
- v = -v
- v = v * img.size[1]
- return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))
-
-
-def TranslateYabs(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
- assert 0 <= v
- if random.random() > 0.5:
- v = -v
- return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))
-
-
-def Rotate(img, v): # [-30, 30]
- assert -30 <= v <= 30
- if random.random() > 0.5:
- v = -v
- return img.rotate(v)
-
-
-def AutoContrast(img, _):
- return PIL.ImageOps.autocontrast(img)
-
-
-def Invert(img, _):
- return PIL.ImageOps.invert(img)
-
-
-def Equalize(img, _):
- return PIL.ImageOps.equalize(img)
-
-
-def Flip(img, _): # not from the paper
- return PIL.ImageOps.mirror(img)
-
-def FlipLR(img, v):
- return img.transpose(Image.FLIP_LEFT_RIGHT)
-
-def FlipUD(img, v):
- return img.transpose(Image.FLIP_TOP_BOTTOM)
-
-def Solarize(img, v): # [0, 256]
- assert 0 <= v <= 256
- return PIL.ImageOps.solarize(img, v)
-
-
-def SolarizeAdd(img, addition=0, threshold=128):
- img_np = np.array(img).astype(np.int)
- img_np = img_np + addition
- img_np = np.clip(img_np, 0, 255)
- img_np = img_np.astype(np.uint8)
- img = Image.fromarray(img_np)
- return PIL.ImageOps.solarize(img, threshold)
-
-
-def Posterize(img, v): # [4, 8]
- v = int(v)
- v = max(1, v)
- return PIL.ImageOps.posterize(img, v)
-
-
-def Contrast(img, v): # [0.1,1.9]
- assert 0.1 <= v <= 1.9
- return PIL.ImageEnhance.Contrast(img).enhance(v)
-
-
-def Color(img, v): # [0.1,1.9]
- assert 0.1 <= v <= 1.9
- return PIL.ImageEnhance.Color(img).enhance(v)
-
-
-def Brightness(img, v): # [0.1,1.9]
- assert 0.1 <= v <= 1.9
- return PIL.ImageEnhance.Brightness(img).enhance(v)
-
-
-def Sharpness(img, v): # [0.1,1.9]
- assert 0.1 <= v <= 1.9
- return PIL.ImageEnhance.Sharpness(img).enhance(v)
-
-
-def Cutout(img, v): # [0, 60] => percentage: [0, 0.2]
- assert 0.0 <= v <= 0.2
- if v <= 0.:
- return img
-
- v = v * img.size[0]
- return CutoutAbs(img, v)
-
-
-def CutoutAbs(img, v): # [0, 60] => percentage: [0, 0.2]
- # assert 0 <= v <= 20
- if v < 0:
- return img
- w, h = img.size
- x0 = np.random.uniform(w)
- y0 = np.random.uniform(h)
-
- x0 = int(max(0, x0 - v / 2.))
- y0 = int(max(0, y0 - v / 2.))
- x1 = min(w, x0 + v)
- y1 = min(h, y0 + v)
-
- xy = (x0, y0, x1, y1)
- color = (125, 123, 114)
- # color = (0, 0, 0)
- img = img.copy()
- PIL.ImageDraw.Draw(img).rectangle(xy, color)
- return img
-
-
-def SamplePairing(imgs): # [0, 0.4]
- def f(img1, v):
- i = np.random.choice(len(imgs))
- img2 = PIL.Image.fromarray(imgs[i])
- return PIL.Image.blend(img1, img2, v)
-
- return f
-
-
-def Identity(img, v):
- return img
-
-
-def augment_list(): # 16 oeprations and their ranges
- # https://github.com/google-research/uda/blob/master/image/randaugment/policies.py#L57
- l = [
- (Identity, 0., 1.0),
- (FlipUD, 0., 1.0),
- (FlipLR, 0., 1.0),
- (Rotate, 0, 30), # 4
- (TranslateX, 0., 0.33), # 2
- (TranslateY, 0., 0.33), # 3
- (ShearX, 0., 0.3), # 0
- (ShearY, 0., 0.3), # 1
- #(AutoContrast, 0, 1), # 5
- #(Invert, 0, 1), # 6
- #(Equalize, 0, 1), # 7
- (Contrast, 0.1, 1.9), # 10
- (Color, 0.1, 1.9), # 11
- (Brightness, 0.1, 1.9), # 12
- (Sharpness, 0.1, 1.9), # 13
- (Posterize, 4, 8), # 9
- (Solarize, 1, 256), # 8
-
- # (Cutout, 0, 0.2), # 14
- # (SamplePairing(imgs), 0, 0.4), # 15
- ]
-
- # https://github.com/tensorflow/tpu/blob/8462d083dd89489a79e3200bcc8d4063bf362186/models/official/efficientnet/autoaugment.py#L505
- #l = [
- # (AutoContrast, 0, 1),
- # (Equalize, 0, 1),
- # (Invert, 0, 1),
- # (Rotate, 0, 30),
- # (Posterize, 0, 4),
- # (Solarize, 0, 256),
- # (SolarizeAdd, 0, 110),
- # (Color, 0.1, 1.9),
- # (Contrast, 0.1, 1.9),
- # (Brightness, 0.1, 1.9),
- # (Sharpness, 0.1, 1.9),
- # (ShearX, 0., 0.3),
- # (ShearY, 0., 0.3),
- # (CutoutAbs, 0, 40),
- # (TranslateXabs, 0., 100),
- # (TranslateYabs, 0., 100),
- #]
-
- return l
-
-
-class Lighting(object):
- """Lighting noise(AlexNet - style PCA - based noise)"""
-
- def __init__(self, alphastd, eigval, eigvec):
- self.alphastd = alphastd
- self.eigval = torch.Tensor(eigval)
- self.eigvec = torch.Tensor(eigvec)
-
- def __call__(self, img):
- if self.alphastd == 0:
- return img
-
- alpha = img.new().resize_(3).normal_(0, self.alphastd)
- rgb = self.eigvec.type_as(img).clone() \
- .mul(alpha.view(1, 3).expand(3, 3)) \
- .mul(self.eigval.view(1, 3).expand(3, 3)) \
- .sum(1).squeeze()
-
- return img.add(rgb.view(3, 1, 1).expand_as(img))
-
-
-class CutoutDefault(object):
- """
- Reference : https://github.com/quark0/darts/blob/master/cnn/utils.py
- """
- def __init__(self, length):
- self.length = length
-
- def __call__(self, img):
- h, w = img.size(1), img.size(2)
- mask = np.ones((h, w), np.float32)
- y = np.random.randint(h)
- x = np.random.randint(w)
-
- y1 = np.clip(y - self.length // 2, 0, h)
- y2 = np.clip(y + self.length // 2, 0, h)
- x1 = np.clip(x - self.length // 2, 0, w)
- x2 = np.clip(x + self.length // 2, 0, w)
-
- mask[y1: y2, x1: x2] = 0.
- mask = torch.from_numpy(mask)
- mask = mask.expand_as(img)
- img *= mask
- return img
-
-PARAMETER_MAX = 1
-class RandAugment:
- def __init__(self, n, m):
- self.n = n
- self.m = m # [0, PARAMETER_MAX]
- self.augment_list = augment_list()
-
- def __call__(self, img):
- ops = random.choices(self.augment_list, k=self.n)
- for op, minval, maxval in ops:
- val = (float(self.m) / PARAMETER_MAX) * float(maxval - minval) + minval
- img = op(img, val)
-
- return img
diff --git a/Old/salvador/cams.py b/Old/salvador/cams.py
deleted file mode 100755
index 3e615f1..0000000
--- a/Old/salvador/cams.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import torch
-import numpy as np
-import torchvision
-from PIL import Image
-from torch import topk
-import torch.nn.functional as F
-from torch import topk
-import cv2
-from torchvision import transforms
-import os
-
-class SaveFeatures():
- features=None
- def __init__(self, m): self.hook = m.register_forward_hook(self.hook_fn)
- def hook_fn(self, module, input, output): self.features = ((output.cpu()).data).numpy()
- def remove(self): self.hook.remove()
-
-def getCAM(feature_conv, weight_fc, class_idx):
- _, nc, h, w = feature_conv.shape
- cam = weight_fc[class_idx].dot(feature_conv.reshape((nc, h*w)))
- cam = cam.reshape(h, w)
- cam = cam - np.min(cam)
- cam_img = cam / np.max(cam)
- # cam_img = np.uint8(255 * cam_img)
- return cam_img
-
-def main(cam):
- device = 'cuda:0'
- model_name = 'resnet50'
- root = 'NEW_SS'
-
- os.makedirs(os.path.join(root + '_CAM', 'OK'), exist_ok=True)
- os.makedirs(os.path.join(root + '_CAM', 'NOK'), exist_ok=True)
-
- train_transform = transforms.Compose([
- transforms.ToTensor(),
- ])
-
- dataset = torchvision.datasets.ImageFolder(
- root=root, transform=train_transform,
- )
-
- loader = torch.utils.data.DataLoader(dataset, batch_size=1)
-
- model = torchvision.models.__dict__[model_name](pretrained=False)
- model.fc = torch.nn.Linear(model.fc.in_features, 2)
-
- model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
- model = model.to(device)
- model.eval()
-
- weight_softmax_params = list(model._modules.get('fc').parameters())
- weight_softmax = np.squeeze(weight_softmax_params[0].cpu().data.numpy())
-
- final_layer = model._modules.get('layer4')
-
- activated_features = SaveFeatures(final_layer)
-
- for i, (img, target ) in enumerate(loader):
- img = img.to(device)
- prediction = model(img)
- pred_probabilities = F.softmax(prediction, dim=1).data.squeeze()
- class_idx = topk(pred_probabilities,1)[1].int()
- # if target.item() != class_idx:
- # print(dataset.imgs[i][0])
-
- if cam:
- overlay = getCAM(activated_features.features, weight_softmax, class_idx )
-
- import ipdb; ipdb.set_trace()
- import PIL
- from torchvision.transforms import ToPILImage
-
- img = ToPILImage()(overlay).resize(size=(1280, 1024), resample=PIL.Image.BILINEAR)
- img.save('heat-pil.jpg')
-
-
- img = cv2.imread(dataset.imgs[i][0])
- height, width, _ = img.shape
- overlay = cv2.resize(overlay, (width, height))
- heatmap = cv2.applyColorMap(overlay, cv2.COLORMAP_JET)
- cv2.imwrite('heat-cv2.jpg', heatmap)
-
- img = cv2.imread(dataset.imgs[i][0])
- height, width, _ = img.shape
- overlay = cv2.resize(overlay, (width, height))
- heatmap = cv2.applyColorMap(overlay, cv2.COLORMAP_JET)
- result = heatmap * 0.3 + img * 0.5
-
- clss = dataset.imgs[i][0].split(os.sep)[1]
- name = dataset.imgs[i][0].split(os.sep)[2].split('.')[0]
- cv2.imwrite(os.path.join(root+"_CAM", clss, name + '.jpg'), result)
- print(f'{os.path.join(root+"_CAM", clss, name + ".jpg")} saved')
-
- activated_features.remove()
-
-if __name__ == "__main__":
- main(cam=True)
diff --git a/Old/salvador/checkpoint.pt b/Old/salvador/checkpoint.pt
deleted file mode 100755
index 6252c9e..0000000
Binary files a/Old/salvador/checkpoint.pt and /dev/null differ
diff --git a/Old/salvador/dataug.py b/Old/salvador/dataug.py
deleted file mode 100755
index 6f246df..0000000
--- a/Old/salvador/dataug.py
+++ /dev/null
@@ -1,1136 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.distributions import *
-
-#import kornia
-#import random
-import numpy as np
-import copy
-
-import transformations as TF
-
-class Data_aug(nn.Module): #Rotation parametree
- def __init__(self):
- super(Data_aug, self).__init__()
- self._data_augmentation = True
- self._params = nn.ParameterDict({
- "prob": nn.Parameter(torch.tensor(0.5)),
- "mag": nn.Parameter(torch.tensor(1.0))
- })
-
- #self.params["mag"].register_hook(print)
-
- def forward(self, x):
-
- if self._data_augmentation and random.random() < self._params["prob"]:
- #print('Aug')
- batch_size = x.shape[0]
- # create transformation (rotation)
- alpha = self._params["mag"]*180 # in degrees
- angle = torch.ones(batch_size, device=x.device) * alpha
-
- # define the rotation center
- center = torch.ones(batch_size, 2, device=x.device)
- center[..., 0] = x.shape[3] / 2 # x
- center[..., 1] = x.shape[2] / 2 # y
-
- #print(x.shape, center)
- # define the scale factor
- scale = torch.ones(batch_size, device=x.device)
-
- # compute the transformation matrix
- M = kornia.get_rotation_matrix2d(center, angle, scale)
-
- # apply the transformation to original image
- x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
-
- return x
-
- def eval(self):
- self.augment(mode=False)
- nn.Module.eval(self)
-
- def augment(self, mode=True):
- self._data_augmentation=mode
-
- def __getitem__(self, key):
- return self._params[key]
-
- def __str__(self):
- return "Data_aug(Mag-1 TF)"
-
-class Data_augV2(nn.Module): #Methode exacte
- def __init__(self):
- super(Data_augV2, self).__init__()
- self._data_augmentation = True
-
- self._fixed_transf=[0.0, 45.0, 180.0] #Degree rotation
- #self._fixed_transf=[0.0]
- self._nb_tf= len(self._fixed_transf)
-
- self._params = nn.ParameterDict({
- "prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme
- #"prob2": nn.Parameter(torch.ones(len(self._fixed_transf)).softmax(dim=0))
- })
-
- #print(self._params["prob"], self._params["prob2"])
-
- self.transf_idx=0
-
- def forward(self, x):
-
- if self._data_augmentation:
- #print('Aug',self._fixed_transf[self.transf_idx])
- device = x.device
- batch_size = x.shape[0]
-
- # create transformation (rotation)
- #alpha = 180 # in degrees
- alpha = self._fixed_transf[self.transf_idx]
- angle = torch.ones(batch_size, device=device) * alpha
-
- x = self.rotate(x,angle)
-
- return x
-
- def rotate(self, x, angle):
-
- device = x.device
- batch_size = x.shape[0]
- # define the rotation center
- center = torch.ones(batch_size, 2, device=device)
- center[..., 0] = x.shape[3] / 2 # x
- center[..., 1] = x.shape[2] / 2 # y
-
- #print(x.shape, center)
- # define the scale factor
- scale = torch.ones(batch_size, device=device)
-
- # compute the transformation matrix
- M = kornia.get_rotation_matrix2d(center, angle, scale)
-
- # apply the transformation to original image
- return kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
-
-
- def adjust_param(self): #Detach from gradient ?
- self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0)
- #print('proba',self._params['prob'])
- self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
- #print('Sum p', sum(self._params['prob']))
-
- def eval(self):
- self.augment(mode=False)
- nn.Module.eval(self)
-
- def augment(self, mode=True):
- self._data_augmentation=mode
-
- def __getitem__(self, key):
- return self._params[key]
-
- def __str__(self):
- return "Data_augV2(Exact-%d TF)" % self._nb_tf
-
-class Data_augV3(nn.Module): #Echantillonage uniforme/Mixte
- def __init__(self, mix_dist=0.0):
- super(Data_augV3, self).__init__()
- self._data_augmentation = True
-
- #self._fixed_transf=[0.0, 45.0, 180.0] #Degree rotation
- self._fixed_transf=[0.0, 1.0, -1.0] #Flips (Identity,Horizontal,Vertical)
- #self._fixed_transf=[0.0]
- self._nb_tf= len(self._fixed_transf)
-
- self._params = nn.ParameterDict({
- "prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme
- #"prob2": nn.Parameter(torch.ones(len(self._fixed_transf)).softmax(dim=0))
- })
-
- #print(self._params["prob"], self._params["prob2"])
- self._sample = []
-
- self._mix_dist = False
- if mix_dist != 0.0:
- self._mix_dist = True
- self._mix_factor = max(min(mix_dist, 1.0), 0.0)
-
- def forward(self, x):
-
- if self._data_augmentation:
- device = x.device
- batch_size = x.shape[0]
-
-
- #good_distrib = Uniform(low=torch.zeros(batch_size,1, device=device),high=torch.new_full((batch_size,1),self._params["prob"], device=device))
- #bad_distrib = Uniform(low=torch.zeros(batch_size,1, device=device),high=torch.new_full((batch_size,1), 1-self._params["prob"], device=device))
-
- #transform_dist = Categorical(probs=torch.tensor([self._params["prob"], 1-self._params["prob"]], device=device))
- #self._sample = transform_dist._sample(sample_shape=torch.Size([batch_size,1]))
-
- uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=0)
-
- if not self._mix_dist:
- distrib = uniforme_dist
- else:
- distrib = (self._mix_factor*self._params["prob"]+(1-self._mix_factor)*uniforme_dist).softmax(dim=0) #Mix distrib reel / uniforme avec mix_factor
-
- cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*distrib)
- self._sample = cat_distrib.sample()
-
- TF_param = torch.tensor([self._fixed_transf[x] for x in self._sample], device=device) #Approche de marco peut-etre plus rapide
-
- #x = self.rotate(x,angle=TF_param)
- x = self.flip(x,flip_mat=TF_param)
-
- return x
-
- def rotate(self, x, angle):
-
- device = x.device
- batch_size = x.shape[0]
- # define the rotation center
- center = torch.ones(batch_size, 2, device=device)
- center[..., 0] = x.shape[3] / 2 # x
- center[..., 1] = x.shape[2] / 2 # y
-
- #print(x.shape, center)
- # define the scale factor
- scale = torch.ones(batch_size, device=device)
-
- # compute the transformation matrix
- M = kornia.get_rotation_matrix2d(center, angle, scale)
-
- # apply the transformation to original image
- return kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
-
- def flip(self, x, flip_mat):
-
- #print(flip_mat)
- device = x.device
- batch_size = x.shape[0]
-
- h, w = x.shape[2], x.shape[3] # destination size
- #points_src = torch.ones(batch_size, 4, 2, device=device)
- #points_dst = torch.ones(batch_size, 4, 2, device=device)
-
- #Identity
- iM=torch.tensor(np.eye(3))
-
- #Horizontal flip
- # the source points are the region to crop corners
- #points_src = torch.FloatTensor([[
- # [w - 1, 0], [0, 0], [0, h - 1], [w - 1, h - 1],
- #]])
- # the destination points are the image vertexes
- #points_dst = torch.FloatTensor([[
- # [0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1],
- #]])
- # compute perspective transform
- #hM = kornia.get_perspective_transform(points_src, points_dst)
- hM =torch.tensor( [[[-1., 0., w-1],
- [ 0., 1., 0.],
- [ 0., 0., 1.]]])
-
- #Vertical flip
- # the source points are the region to crop corners
- #points_src = torch.FloatTensor([[
- # [0, h - 1], [w - 1, h - 1], [w - 1, 0], [0, 0],
- #]])
- # the destination points are the image vertexes
- #points_dst = torch.FloatTensor([[
- # [0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1],
- #]])
- # compute perspective transform
- #vM = kornia.get_perspective_transform(points_src, points_dst)
- vM =torch.tensor( [[[ 1., 0., 0.],
- [ 0., -1., h-1],
- [ 0., 0., 1.]]])
- #print(vM)
-
- M=torch.ones(batch_size, 3, 3, device=device)
-
- for i in range(batch_size): # A optimiser
- if flip_mat[i]==0.0:
- M[i,]=iM
- elif flip_mat[i]==1.0:
- M[i,]=hM
- elif flip_mat[i]==-1.0:
- M[i,]=vM
-
- # warp the original image by the found transform
- return kornia.warp_perspective(x, M, dsize=(h, w))
-
- def adjust_param(self, soft=False): #Detach from gradient ?
-
- if soft :
- self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible
- else:
- #self._params['prob'].clamp(min=0.0,max=1.0)
- self._params['prob'].data = F.relu(self._params['prob'].data)
- #self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0)
- #print('proba',self._params['prob'])
- self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
- #print('Sum p', sum(self._params['prob']))
-
- def loss_weight(self):
- #w_loss = [self._params["prob"][x] for x in self._sample]
- #print(self._sample.view(-1,1).shape)
- #print(self._sample[:10])
-
- w_loss = torch.zeros((self._sample.shape[0],self._nb_tf), device=self._sample.device)
- w_loss.scatter_(1, self._sample.view(-1,1), 1)
- #print(w_loss.shape)
- #print(w_loss[:10,:])
- w_loss = w_loss * self._params["prob"]
- #print(w_loss.shape)
- #print(w_loss[:10,:])
- w_loss = torch.sum(w_loss,dim=1)
- #print(w_loss.shape)
- #print(w_loss[:10])
- return w_loss
-
- def train(self, mode=None):
- if mode is None :
- mode=self._data_augmentation
- self.augment(mode=mode) #Inutile si mode=None
- super(Data_augV3, self).train(mode)
-
- def eval(self):
- self.train(mode=False)
- #super(Augmented_model, self).eval()
-
- def augment(self, mode=True):
- self._data_augmentation=mode
-
- def __getitem__(self, key):
- return self._params[key]
-
- def __str__(self):
- if not self._mix_dist:
- return "Data_augV3(Uniform-%d TF)" % self._nb_tf
- else:
- return "Data_augV3(Mix %.1f-%d TF)" % (self._mix_factor, self._nb_tf)
-
-class Data_augV4(nn.Module): #Transformations avec mask
- def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mix_dist=0.0):
- super(Data_augV4, self).__init__()
- assert len(TF_dict)>0
-
- self._data_augmentation = True
-
- #self._TF_matrix={}
- #self._input_info={'h':0, 'w':0, 'device':None} #Input associe a TF_matrix
- #self._mag_fct = TF_dict
- self._TF_dict = TF_dict
- self._TF= list(self._TF_dict.keys())
- self._nb_tf= len(self._TF)
-
- self._N_seqTF = N_TF
-
- self._fixed_mag=5 #[0, PARAMETER_MAX]
- self._params = nn.ParameterDict({
- "prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme
- })
-
- self._samples = []
-
- self._mix_dist = False
- if mix_dist != 0.0:
- self._mix_dist = True
- self._mix_factor = max(min(mix_dist, 1.0), 0.0)
-
- def forward(self, x):
- if self._data_augmentation:
- device = x.device
- batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
-
- x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
- self._samples = []
-
- for _ in range(self._N_seqTF):
- ## Echantillonage ##
- uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=1)
-
- if not self._mix_dist:
- self._distrib = uniforme_dist
- else:
- self._distrib = (self._mix_factor*self._params["prob"]+(1-self._mix_factor)*uniforme_dist).softmax(dim=1) #Mix distrib reel / uniforme avec mix_factor
-
- cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*self._distrib)
- sample = cat_distrib.sample()
- self._samples.append(sample)
-
- ## Transformations ##
- x = self.apply_TF(x, sample)
- return x
- '''
- def compute_TF_matrix(self, magnitude=None, sample_info= None):
- print('Computing TF_matrix...')
- if not magnitude :
- magnitude=self._fixed_mag
-
- if sample_info:
- self._input_info['h']= sample_info['h']
- self._input_info['w']= sample_info['w']
- self._input_info['device'] = sample_info['device']
- h, w, device= self._input_info['h'], self._input_info['w'], self._input_info['device']
-
- self._TF_matrix={}
- for tf in self._TF :
- if tf=='Id':
- self._TF_matrix[tf]=torch.tensor([[[ 1., 0., 0.],
- [ 0., 1., 0.],
- [ 0., 0., 1.]]], device=device)
- elif tf=='Rot':
- center = torch.ones(1, 2, device=device)
- center[0, 0] = w / 2 # x
- center[0, 1] = h / 2 # y
- scale = torch.ones(1, device=device)
- angle = self._mag_fct[tf](magnitude) * torch.ones(1, device=device)
- R = kornia.get_rotation_matrix2d(center, angle, scale) #Rotation matrix (1,2,3)
- self._TF_matrix[tf]=torch.cat((R,torch.tensor([[[ 0., 0., 1.]]], device=device)), dim=1) #TF matrix (1,3,3)
- elif tf=='FlipLR':
- self._TF_matrix[tf]=torch.tensor([[[-1., 0., w-1],
- [ 0., 1., 0.],
- [ 0., 0., 1.]]], device=device)
- elif tf=='FlipUD':
- self._TF_matrix[tf]=torch.tensor([[[ 1., 0., 0.],
- [ 0., -1., h-1],
- [ 0., 0., 1.]]], device=device)
- else:
- raise Exception("Invalid TF requested")
- '''
- def apply_TF(self, x, sampled_TF):
- device = x.device
- smps_x=[]
- masks=[]
- for tf_idx in range(self._nb_tf):
- mask = sampled_TF==tf_idx #Create selection mask
- smp_x = x[mask] #torch.masked_select() ?
-
- if smp_x.shape[0]!=0: #if there's data to TF
- magnitude=self._fixed_mag
- tf=self._TF[tf_idx]
-
- '''
- ## Geometric TF ##
- if tf=='Identity':
- pass
- elif tf=='FlipLR':
- smp_x = TF.flipLR(smp_x)
- elif tf=='FlipUD':
- smp_x = TF.flipUD(smp_x)
- elif tf=='Rotate':
- smp_x = TF.rotate(smp_x, angle=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
- elif tf=='TranslateX' or tf=='TranslateY':
- smp_x = TF.translate(smp_x, translation=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
- elif tf=='ShearX' or tf=='ShearY' :
- smp_x = TF.shear(smp_x, shear=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
-
- ## Color TF (Expect image in the range of [0, 1]) ##
- elif tf=='Contrast':
- smp_x = TF.contrast(smp_x, contrast_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
- elif tf=='Color':
- smp_x = TF.color(smp_x, color_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
- elif tf=='Brightness':
- smp_x = TF.brightness(smp_x, brightness_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
- elif tf=='Sharpness':
- smp_x = TF.sharpeness(smp_x, sharpness_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
- elif tf=='Posterize':
- smp_x = TF.posterize(smp_x, bits=torch.tensor([1 for _ in smp_x], device=device))
- elif tf=='Solarize':
- smp_x = TF.solarize(smp_x, thresholds=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
- elif tf=='Equalize':
- smp_x = TF.equalize(smp_x)
- elif tf=='Auto_Contrast':
- smp_x = TF.auto_contrast(smp_x)
- else:
- raise Exception("Invalid TF requested : ", tf)
-
- x[mask]=smp_x # Refusionner eviter x[mask] : in place
- '''
- x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude) # Refusionner eviter x[mask] : in place
-
- #idx= mask.nonzero()
- #print('-'*8)
- #print(idx[0], tf_idx)
- #print(smp_x[0,])
- #x=x.view(-1,3*32*32)
- #x=x.scatter(dim=0, index=idx, src=smp_x.view(-1,3*32*32)) #Changement des Tensor mais pas visible sur la visualisation...
- #x=x.view(-1,3,32,32)
- #print(x[0,])
-
- '''
- if len(self._TF_matrix)==0 or self._input_info['h']!=h or self._input_info['w']!=w or self._input_info['device']!=device: #Device different:Pas necessaire de tout recalculer
- self.compute_TF_matrix(sample_info={'h': x.shape[2],
- 'w': x.shape[3],
- 'device': x.device})
-
- TF_matrix = torch.zeros(batch_size, 3, 3, device=device) #All geom TF
-
- for tf_idx in range(self._nb_tf):
- mask = self._sample==tf_idx #Create selection mask
- TF_matrix[mask,]=self._TF_matrix[self._TF[tf_idx]]
-
- x=kornia.warp_perspective(x, TF_matrix, dsize=(h, w))
- '''
- return x
-
- def adjust_param(self, soft=False): #Detach from gradient ?
-
- if soft :
- self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible
- else:
- #self._params['prob'].clamp(min=0.0,max=1.0)
- self._params['prob'].data = F.relu(self._params['prob'].data)
- #self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0)
-
- self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
-
- def loss_weight(self):
- # 1 seule TF
- #self._sample = self._samples[-1]
- #w_loss = torch.zeros((self._sample.shape[0],self._nb_tf), device=self._sample.device)
- #w_loss.scatter_(dim=1, index=self._sample.view(-1,1), value=1)
- #w_loss = w_loss * self._params["prob"]/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss)
- #w_loss = torch.sum(w_loss,dim=1)
-
- #Plusieurs TF sequentielles
- w_loss = torch.zeros((self._samples[0].shape[0],self._nb_tf), device=self._samples[0].device)
- for sample in self._samples:
- tmp_w = torch.zeros(w_loss.size(),device=w_loss.device)
- tmp_w.scatter_(dim=1, index=sample.view(-1,1), value=1/self._N_seqTF)
- w_loss += tmp_w
-
- w_loss = w_loss * self._params["prob"]/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss)
- w_loss = torch.sum(w_loss,dim=1)
- return w_loss
-
-
- def train(self, mode=None):
- if mode is None :
- mode=self._data_augmentation
- self.augment(mode=mode) #Inutile si mode=None
- super(Data_augV4, self).train(mode)
-
- def eval(self):
- self.train(mode=False)
-
- def augment(self, mode=True):
- self._data_augmentation=mode
-
- def __getitem__(self, key):
- return self._params[key]
-
- def __str__(self):
- if not self._mix_dist:
- return "Data_augV4(Uniform-%d TF x %d)" % (self._nb_tf, self._N_seqTF)
- else:
- return "Data_augV4(Mix %.1f-%d TF x %d)" % (self._mix_factor, self._nb_tf, self._N_seqTF)
-
-class Data_augV5(nn.Module): #Optimisation jointe (mag, proba)
- def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mix_dist=0.0, fixed_prob=False, fixed_mag=True, shared_mag=True):
- super(Data_augV5, self).__init__()
- assert len(TF_dict)>0
-
- self._data_augmentation = True
-
- self._TF_dict = TF_dict
- self._TF= list(self._TF_dict.keys())
- self._nb_tf= len(self._TF)
-
- self._N_seqTF = N_TF
- self._shared_mag = shared_mag
- self._fixed_mag = fixed_mag
-
- #self._fixed_mag=5 #[0, PARAMETER_MAX]
- self._params = nn.ParameterDict({
- "prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme
- "mag" : nn.Parameter(torch.tensor(float(TF.PARAMETER_MAX)) if self._shared_mag
- else torch.tensor(float(TF.PARAMETER_MAX)).expand(self._nb_tf)), #[0, PARAMETER_MAX]
- })
-
- #for t in TF.TF_no_mag: self._params['mag'][self._TF.index(t)].data-=self._params['mag'][self._TF.index(t)].data #Mag inutile pour les TF ignore_mag
-
- #Distribution
- self._fixed_prob=fixed_prob
- self._samples = []
- self._mix_dist = False
- if mix_dist != 0.0:
- self._mix_dist = True
- self._mix_factor = max(min(mix_dist, 1.0), 0.0)
-
- #Mag regularisation
- if not self._fixed_mag:
- if self._shared_mag :
- self._reg_tgt = torch.tensor(TF.PARAMETER_MAX, dtype=torch.float) #Encourage amplitude max
- else:
- self._reg_mask=[self._TF.index(t) for t in self._TF if t not in TF.TF_ignore_mag]
- self._reg_tgt=torch.full(size=(len(self._reg_mask),), fill_value=TF.PARAMETER_MAX) #Encourage amplitude max
-
- def forward(self, x):
- self._samples = []
- if self._data_augmentation:# and TF.random.random() < 0.5:
- device = x.device
- batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
-
- x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
-
- for _ in range(self._N_seqTF):
- ## Echantillonage ##
- uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=1)
-
- if not self._mix_dist:
- self._distrib = uniforme_dist
- else:
- prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"]
- self._distrib = (self._mix_factor*prob+(1-self._mix_factor)*uniforme_dist).softmax(dim=1) #Mix distrib reel / uniforme avec mix_factor
-
- cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*self._distrib)
- sample = cat_distrib.sample()
- self._samples.append(sample)
-
- ## Transformations ##
- x = self.apply_TF(x, sample)
- return x
-
- def apply_TF(self, x, sampled_TF):
- device = x.device
- batch_size, channels, h, w = x.shape
- smps_x=[]
-
- for tf_idx in range(self._nb_tf):
- mask = sampled_TF==tf_idx #Create selection mask
- smp_x = x[mask] #torch.masked_select() ? (NEcessite d'expand le mask au meme dim)
-
- if smp_x.shape[0]!=0: #if there's data to TF
- magnitude=self._params["mag"] if self._shared_mag else self._params["mag"][tf_idx]
- if self._fixed_mag: magnitude=magnitude.detach() #Fmodel tente systematiquement de tracker les gradient de tout les param
-
- tf=self._TF[tf_idx]
- #print(magnitude)
-
- #In place
- #x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude)
-
- #Out of place
- smp_x = self._TF_dict[tf](x=smp_x, mag=magnitude)
- idx= mask.nonzero()
- idx= idx.expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
- x=x.scatter(dim=0, index=idx, src=smp_x)
-
- return x
-
- def adjust_param(self, soft=False): #Detach from gradient ?
- if not self._fixed_prob:
- if soft :
- self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible
- else:
- self._params['prob'].data = F.relu(self._params['prob'].data)
- #self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0)
- self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
-
- if not self._fixed_mag:
- #self._params['mag'].data = self._params['mag'].data.clamp(min=0.0,max=TF.PARAMETER_MAX) #Bloque une fois au extreme
- self._params['mag'].data = F.relu(self._params['mag'].data) - F.relu(self._params['mag'].data - TF.PARAMETER_MAX)
-
- def loss_weight(self):
- if len(self._samples)==0 : return 1 #Pas d'echantillon = pas de ponderation
-
- prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"]
- # 1 seule TF
- #self._sample = self._samples[-1]
- #w_loss = torch.zeros((self._sample.shape[0],self._nb_tf), device=self._sample.device)
- #w_loss.scatter_(dim=1, index=self._sample.view(-1,1), value=1)
- #w_loss = w_loss * self._params["prob"]/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss)
- #w_loss = torch.sum(w_loss,dim=1)
-
- #Plusieurs TF sequentielles (Attention ne prend pas en compte ordre !)
- w_loss = torch.zeros((self._samples[0].shape[0],self._nb_tf), device=self._samples[0].device)
- for sample in self._samples:
- tmp_w = torch.zeros(w_loss.size(),device=w_loss.device)
- tmp_w.scatter_(dim=1, index=sample.view(-1,1), value=1/self._N_seqTF)
- w_loss += tmp_w
-
- w_loss = w_loss * prob/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss)
- w_loss = torch.sum(w_loss,dim=1)
- return w_loss
-
- def reg_loss(self, reg_factor=0.005):
- if self._fixed_mag: # or self._fixed_prob: #Pas de regularisation si trop peu de DOF
- return torch.tensor(0)
- else:
- #return reg_factor * F.l1_loss(self._params['mag'][self._reg_mask], target=self._reg_tgt, reduction='mean')
- params = self._params['mag'] if self._params['mag'].shape==torch.Size([]) else self._params['mag'][self._reg_mask]
- return reg_factor * F.mse_loss(params, target=self._reg_tgt.to(params.device), reduction='mean')
-
- def train(self, mode=None):
- if mode is None :
- mode=self._data_augmentation
- self.augment(mode=mode) #Inutile si mode=None
- super(Data_augV5, self).train(mode)
-
- def eval(self):
- self.train(mode=False)
-
- def augment(self, mode=True):
- self._data_augmentation=mode
-
- def __getitem__(self, key):
- return self._params[key]
-
- def __str__(self):
- dist_param=''
- if self._fixed_prob: dist_param+='Fx'
- mag_param='Mag'
- if self._fixed_mag: mag_param+= 'Fx'
- if self._shared_mag: mag_param+= 'Sh'
- if not self._mix_dist:
- return "Data_augV5(Uniform%s-%dTFx%d-%s)" % (dist_param, self._nb_tf, self._N_seqTF, mag_param)
- else:
- return "Data_augV5(Mix%.1f%s-%dTFx%d-%s)" % (self._mix_factor,dist_param, self._nb_tf, self._N_seqTF, mag_param)
-
-class Data_augV6(nn.Module): #Optimisation sequentielle
- def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mix_dist=0.0, fixed_prob=False, prob_set_size=None, fixed_mag=True, shared_mag=True):
- super(Data_augV6, self).__init__()
- assert len(TF_dict)>0
-
- self._data_augmentation = True
-
- self._TF_dict = TF_dict
- self._TF= list(self._TF_dict.keys())
- self._nb_tf= len(self._TF)
-
- self._N_seqTF = N_TF
- self._shared_mag = shared_mag
- self._fixed_mag = fixed_mag
-
- self._TF_set_size = prob_set_size if prob_set_size else self._nb_tf
-
- self._fixed_TF=[0] #Identite
- assert self._TF_set_size>=len(self._fixed_TF)
-
- if self._TF_set_size>self._nb_tf:
- print("Warning : TF sets size higher than number of TF. Reducing set size to %d"%self._nb_tf)
- self._TF_set_size=self._nb_tf
-
- ## Genenerate TF sets ##
- if self._TF_set_size==len(self._fixed_TF):
- print("Warning : using only fixed set of TF : ", self._fixed_TF)
- self._TF_sets=torch.tensor([self._fixed_TF])
- else:
- def generate_TF_sets(n_TF, set_size, idx_prefix=[]):
- TF_sets=[]
- if len(idx_prefix)!=0:
- if set_size>2:
- for i in range(idx_prefix[-1]+1, n_TF):
- TF_sets += generate_TF_sets(n_TF=n_TF, set_size=set_size-1, idx_prefix=idx_prefix+[i])
- else:
- #if i not in idx_prefix:
- TF_sets+=[torch.tensor(idx_prefix+[i]) for i in range(idx_prefix[-1]+1, n_TF)]
- elif set_size>1:
- for i in range(0, n_TF):
- TF_sets += generate_TF_sets(n_TF=n_TF, set_size=set_size, idx_prefix=[i])
- else:
- TF_sets+=[torch.tensor([i]) for i in range(0, n_TF)]
- return TF_sets
-
- self._TF_sets=generate_TF_sets(self._nb_tf, self._TF_set_size, self._fixed_TF)
-
- ## Plan TF learning schedule ##
- self._TF_schedule = [list(range(len(self._TF_sets))) for _ in range(self._N_seqTF)]
- for n_tf in range(self._N_seqTF) :
- TF.random.shuffle(self._TF_schedule[n_tf])
-
- self._current_TF_idx=0 #random.randint
- self._start_prob = 1/self._TF_set_size
-
-
- self._params = nn.ParameterDict({
- "prob": nn.Parameter(torch.tensor(self._start_prob).expand(self._nb_tf)), #Proba independantes
- "mag" : nn.Parameter(torch.tensor(float(TF.PARAMETER_MAX)) if self._shared_mag
- else torch.tensor(float(TF.PARAMETER_MAX)).expand(self._nb_tf)), #[0, PARAMETER_MAX]
- })
-
- #for t in TF.TF_no_mag: self._params['mag'][self._TF.index(t)].data-=self._params['mag'][self._TF.index(t)].data #Mag inutile pour les TF ignore_mag
-
- #Distribution
- self._fixed_prob=fixed_prob
- self._samples = []
- self._mix_dist = False
- if mix_dist != 0.0:
- self._mix_dist = True
- self._mix_factor = max(min(mix_dist, 1.0), 0.0)
-
- #Mag regularisation
- if not self._fixed_mag:
- if self._shared_mag :
- self._reg_tgt = torch.tensor(TF.PARAMETER_MAX, dtype=torch.float) #Encourage amplitude max
- else:
- self._reg_mask=[self._TF.index(t) for t in self._TF if t not in TF.TF_ignore_mag]
- self._reg_tgt=torch.full(size=(len(self._reg_mask),), fill_value=TF.PARAMETER_MAX) #Encourage amplitude max
-
- def forward(self, x):
- self._samples = []
- if self._data_augmentation:# and TF.random.random() < 0.5:
- device = x.device
- batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
-
- x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
-
- for n_tf in range(self._N_seqTF):
-
- tf_set = self._TF_sets[self._TF_schedule[n_tf][self._current_TF_idx]].to(device)
- #print(n_tf, tf_set)
- ## Echantillonage ##
- uniforme_dist = torch.ones(1,len(tf_set),device=device).softmax(dim=1)
-
- if not self._mix_dist:
- self._distrib = uniforme_dist
- else:
- prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"]
- curr_prob = torch.index_select(prob, 0, tf_set)
- curr_prob = curr_prob /sum(curr_prob) #Contrainte sum(p)=1
- self._distrib = (self._mix_factor*curr_prob+(1-self._mix_factor)*uniforme_dist).softmax(dim=1) #Mix distrib reel / uniforme avec mix_factor
-
- cat_distrib= Categorical(probs=torch.ones((batch_size, len(tf_set)), device=device)*self._distrib)
- sample = cat_distrib.sample()
- self._samples.append(sample)
-
- ## Transformations ##
- x = self.apply_TF(x, sample)
- return x
-
- def apply_TF(self, x, sampled_TF):
- device = x.device
- batch_size, channels, h, w = x.shape
- smps_x=[]
-
- for sel_idx, tf_idx in enumerate(self._TF_sets[self._current_TF_idx]):
- mask = sampled_TF==sel_idx #Create selection mask
- smp_x = x[mask] #torch.masked_select() ? (NEcessite d'expand le mask au meme dim)
-
- if smp_x.shape[0]!=0: #if there's data to TF
- magnitude=self._params["mag"] if self._shared_mag else self._params["mag"][tf_idx]
- if self._fixed_mag: magnitude=magnitude.detach() #Fmodel tente systematiquement de tracker les gradient de tout les param
-
- tf=self._TF[tf_idx]
- #print(magnitude)
-
- #In place
- #x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude)
-
- #Out of place
- smp_x = self._TF_dict[tf](x=smp_x, mag=magnitude)
- idx= mask.nonzero()
- idx= idx.expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
- x=x.scatter(dim=0, index=idx, src=smp_x)
-
- return x
-
- def adjust_param(self, soft=False): #Detach from gradient ?
- if not self._fixed_prob:
- if soft :
- self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible
- else:
- self._params['prob'].data = F.relu(self._params['prob'].data)
- #self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0)
- #self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
-
- self._params['prob'].data[0]=self._start_prob #Fixe p identite
-
- if not self._fixed_mag:
- #self._params['mag'].data = self._params['mag'].data.clamp(min=0.0,max=TF.PARAMETER_MAX) #Bloque une fois au extreme
- self._params['mag'].data = F.relu(self._params['mag'].data) - F.relu(self._params['mag'].data - TF.PARAMETER_MAX)
-
- def loss_weight(self): #A verifier
- if len(self._samples)==0 : return 1 #Pas d'echantillon = pas de ponderation
-
- prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"]
-
- #Plusieurs TF sequentielles (Attention ne prend pas en compte ordre !)
- w_loss = torch.zeros((self._samples[0].shape[0],self._TF_set_size), device=self._samples[0].device)
- for n_tf in range(self._N_seqTF):
- tmp_w = torch.zeros(w_loss.size(),device=w_loss.device)
- tmp_w.scatter_(dim=1, index=self._samples[n_tf].view(-1,1), value=1/self._N_seqTF)
-
- tf_set = self._TF_sets[self._TF_schedule[n_tf][self._current_TF_idx]].to(prob.device)
- curr_prob = torch.index_select(prob, 0, tf_set)
- curr_prob = curr_prob /sum(curr_prob) #Contrainte sum(p)=1
-
- #ATTENTION DISTRIB DIFFERENTE AVEC MIX
- assert not self._mix_dist
- w_loss += tmp_w * curr_prob /self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss)
-
- w_loss = torch.sum(w_loss,dim=1)
- return w_loss
-
- def reg_loss(self, reg_factor=0.005):
- if self._fixed_mag: # or self._fixed_prob: #Pas de regularisation si trop peu de DOF
- return torch.tensor(0)
- else:
- #return reg_factor * F.l1_loss(self._params['mag'][self._reg_mask], target=self._reg_tgt, reduction='mean')
- params = self._params['mag'] if self._params['mag'].shape==torch.Size([]) else self._params['mag'][self._reg_mask]
- return reg_factor * F.mse_loss(params, target=self._reg_tgt.to(params.device), reduction='mean')
-
- def next_TF_set(self, idx=None):
- if idx:
- self._current_TF_idx=idx
- else:
- self._current_TF_idx+=1
-
- if self._current_TF_idx>=len(self._TF_schedule[0]):
- self._current_TF_idx=0
- for n_tf in range(self._N_seqTF) :
- TF.random.shuffle(self._TF_schedule[n_tf])
-
- def train(self, mode=None):
- if mode is None :
- mode=self._data_augmentation
- self.augment(mode=mode) #Inutile si mode=None
- super(Data_augV6, self).train(mode)
-
- def eval(self):
- self.train(mode=False)
-
- def augment(self, mode=True):
- self._data_augmentation=mode
-
- def __getitem__(self, key):
- return self._params[key]
-
- def __str__(self):
- dist_param=''
- if self._fixed_prob: dist_param+='Fx'
- mag_param='Mag'
- if self._fixed_mag: mag_param+= 'Fx'
- if self._shared_mag: mag_param+= 'Sh'
- if not self._mix_dist:
- return "Data_augV6(Uniform%s-%dTF(%d)x%d-%s)" % (dist_param, self._nb_tf, self._TF_set_size, self._N_seqTF, mag_param)
- else:
- return "Data_augV6(Mix%.1f%s-%dTF(%d)x%d-%s)" % (self._mix_factor,dist_param, self._nb_tf, self._TF_set_size, self._N_seqTF, mag_param)
-
-
-class RandAug(nn.Module): #RandAugment = UniformFx-MagFxSh + rapide
- def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mag=TF.PARAMETER_MAX):
- super(RandAug, self).__init__()
-
- self._data_augmentation = True
-
- self._TF_dict = TF_dict
- self._TF= list(self._TF_dict.keys())
- self._nb_tf= len(self._TF)
- self._N_seqTF = N_TF
-
- self.mag=nn.Parameter(torch.tensor(float(mag)))
- self._params = nn.ParameterDict({
- "prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #pas utilise
- "mag" : nn.Parameter(torch.tensor(float(mag))),
- })
- self._shared_mag = True
- self._fixed_mag = True
-
- def forward(self, x):
- if self._data_augmentation:# and TF.random.random() < 0.5:
- device = x.device
- batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
-
- x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
-
- for _ in range(self._N_seqTF):
- ## Echantillonage ## == sampled_ops = np.random.choice(transforms, N)
- uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=1)
- cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*uniforme_dist)
- sample = cat_distrib.sample()
-
- ## Transformations ##
- x = self.apply_TF(x, sample)
- return x
-
- def apply_TF(self, x, sampled_TF):
- smps_x=[]
-
- for tf_idx in range(self._nb_tf):
- mask = sampled_TF==tf_idx #Create selection mask
- smp_x = x[mask] #torch.masked_select() ? (NEcessite d'expand le mask au meme dim)
-
- if smp_x.shape[0]!=0: #if there's data to TF
- magnitude=self._params["mag"].detach()
-
- tf=self._TF[tf_idx]
- #print(magnitude)
-
- #In place
- x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude)
-
- return x
-
- def adjust_param(self, soft=False):
- pass #Pas de parametre a opti
-
- def loss_weight(self):
- return 1 #Pas d'echantillon = pas de ponderation
-
- def reg_loss(self, reg_factor=0.005):
- return torch.tensor(0) #Pas de regularisation
-
- def train(self, mode=None):
- if mode is None :
- mode=self._data_augmentation
- self.augment(mode=mode) #Inutile si mode=None
- super(RandAug, self).train(mode)
-
- def eval(self):
- self.train(mode=False)
-
- def augment(self, mode=True):
- self._data_augmentation=mode
-
- def __getitem__(self, key):
- return self._params[key]
-
- def __str__(self):
- return "RandAug(%dTFx%d-Mag%d)" % (self._nb_tf, self._N_seqTF, self.mag)
-
-class RandAugUDA(nn.Module): #RandAugment from UDA (for DA during training)
- def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mag=TF.PARAMETER_MAX):
- super(RandAugUDA, self).__init__()
-
- self._data_augmentation = True
-
- self._TF_dict = TF_dict
- self._TF= list(self._TF_dict.keys())
- self._nb_tf= len(self._TF)
- self._N_seqTF = N_TF
-
- self.mag=nn.Parameter(torch.tensor(float(mag)))
- self._params = nn.ParameterDict({
- "prob": nn.Parameter(torch.tensor(0.5).unsqueeze(dim=0)),
- "mag" : nn.Parameter(torch.tensor(float(TF.PARAMETER_MAX))),
- })
- self._shared_mag = True
- self._fixed_mag = True
-
- self._op_list =[]
- for tf in self._TF:
- for mag in range(1, int(self._params['mag']*10), 1):
- self._op_list+=[(tf, self._params['prob'].item(), mag/10)]
- self._nb_op = len(self._op_list)
-
- def forward(self, x):
- if self._data_augmentation:# and TF.random.random() < 0.5:
- device = x.device
- batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
-
- x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
-
- for _ in range(self._N_seqTF):
- ## Echantillonage ## == sampled_ops = np.random.choice(transforms, N)
- uniforme_dist = torch.ones(1, self._nb_op, device=device).softmax(dim=1)
- cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_op), device=device)*uniforme_dist)
- sample = cat_distrib.sample()
-
- ## Transformations ##
- x = self.apply_TF(x, sample)
- return x
-
- def apply_TF(self, x, sampled_TF):
- smps_x=[]
-
- for op_idx in range(self._nb_op):
- mask = sampled_TF==op_idx #Create selection mask
- smp_x = x[mask] #torch.masked_select() ? (Necessite d'expand le mask au meme dim)
-
- if smp_x.shape[0]!=0: #if there's data to TF
- if TF.random.random() < self._op_list[op_idx][1]:
- magnitude=self._op_list[op_idx][2]
- tf=self._op_list[op_idx][0]
-
- #In place
- x[mask]=self._TF_dict[tf](x=smp_x, mag=torch.tensor(magnitude, device=x.device))
-
- return x
-
- def adjust_param(self, soft=False):
- pass #Pas de parametre a opti
-
- def loss_weight(self):
- return 1 #Pas d'echantillon = pas de ponderation
-
- def reg_loss(self, reg_factor=0.005):
- return torch.tensor(0) #Pas de regularisation
-
- def train(self, mode=None):
- if mode is None :
- mode=self._data_augmentation
- self.augment(mode=mode) #Inutile si mode=None
- super(RandAugUDA, self).train(mode)
-
- def eval(self):
- self.train(mode=False)
-
- def augment(self, mode=True):
- self._data_augmentation=mode
-
- def __getitem__(self, key):
- return self._params[key]
-
- def __str__(self):
- return "RandAugUDA(%dTFx%d-Mag%d)" % (self._nb_tf, self._N_seqTF, self.mag)
-
-class Augmented_model(nn.Module):
- def __init__(self, data_augmenter, model):
- super(Augmented_model, self).__init__()
-
- self._mods = nn.ModuleDict({
- 'data_aug': data_augmenter,
- 'model': model
- })
-
- self.augment(mode=True)
-
- def initialize(self):
- self._mods['model'].initialize()
-
- def forward(self, x):
- return self._mods['model'](self._mods['data_aug'](x))
-
- def augment(self, mode=True):
- self._data_augmentation=mode
- self._mods['data_aug'].augment(mode)
-
- def train(self, mode=None):
- if mode is None :
- mode=self._data_augmentation
- self._mods['data_aug'].augment(mode)
- super(Augmented_model, self).train(mode)
- return self
-
- def eval(self):
- return self.train(mode=False)
- #super(Augmented_model, self).eval()
-
- def items(self):
- """Return an iterable of the ModuleDict key/value pairs.
- """
- return self._mods.items()
-
- def update(self, modules):
- self._mods.update(modules)
-
- def is_augmenting(self):
- return self._data_augmentation
-
- def TF_names(self):
- try:
- return self._mods['data_aug']._TF
- except:
- return None
-
- def __getitem__(self, key):
- return self._mods[key]
-
- def __str__(self):
- return "Aug_mod("+str(self._mods['data_aug'])+"-"+str(self._mods['model'])+")"
\ No newline at end of file
diff --git a/Old/salvador/dataug_utils.py b/Old/salvador/dataug_utils.py
deleted file mode 100755
index ea81ea3..0000000
--- a/Old/salvador/dataug_utils.py
+++ /dev/null
@@ -1,314 +0,0 @@
-import numpy as np
-import json, math, time, os
-import matplotlib.pyplot as plt
-import copy
-import gc
-
-from torchviz import make_dot
-
-import torch
-import torch.nn.functional as F
-
-import time
-
-class timer():
- def __init__(self):
- self._start_time=time.time()
- def exec_time(self):
- end = time.time()
- res = end-self._start_time
- self._start_time=end
- return res
-
-def print_graph(PyTorch_obj, fig_name='graph'):
- graph=make_dot(PyTorch_obj) #Loss give the whole graph
- graph.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
- graph.render(fig_name)
-
-def plot_res(log, fig_name='res', param_names=None):
-
- epochs = [x["epoch"] for x in log]
-
- fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
-
- ax[0].set_title('Loss')
- ax[0].plot(epochs,[x["train_loss"] for x in log], label='Train')
- ax[0].plot(epochs,[x["val_loss"] for x in log], label='Val')
- ax[0].legend()
-
- ax[1].set_title('Acc')
- ax[1].plot(epochs,[x["acc"] for x in log])
-
- if log[0]["param"]!= None:
- if isinstance(log[0]["param"],float):
- ax[2].set_title('Mag')
- ax[2].plot(epochs,[x["param"] for x in log], label='Mag')
- ax[2].legend()
- else :
- ax[2].set_title('Prob')
- #for idx, _ in enumerate(log[0]["param"]):
- #ax[2].plot(epochs,[x["param"][idx] for x in log], label='P'+str(idx))
- if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])]
- proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])]
- ax[2].stackplot(epochs, proba, labels=param_names)
- ax[2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
-
-
- fig_name = fig_name.replace('.',',')
- plt.savefig(fig_name)
- plt.close()
-
-def plot_resV2(log, fig_name='res', param_names=None):
-
- epochs = [x["epoch"] for x in log]
-
- fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(30, 15))
-
- ax[0, 0].set_title('Loss')
- ax[0, 0].plot(epochs,[x["train_loss"] for x in log], label='Train')
- ax[0, 0].plot(epochs,[x["val_loss"] for x in log], label='Val')
- ax[0, 0].legend()
-
- ax[1, 0].set_title('Acc')
- ax[1, 0].plot(epochs,[x["acc"] for x in log])
-
- if log[0]["param"]!= None:
- if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])]
- #proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])]
- proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])]
- mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])]
-
- ax[0, 1].set_title('Prob =f(epoch)')
- ax[0, 1].stackplot(epochs, proba, labels=param_names)
- #ax[0, 1].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
-
- ax[1, 1].set_title('Prob =f(TF)')
- mean = np.mean(proba, axis=1)
- std = np.std(proba, axis=1)
- ax[1, 1].bar(param_names, mean, yerr=std)
- plt.sca(ax[1, 1]), plt.xticks(rotation=90)
-
- ax[0, 2].set_title('Mag =f(epoch)')
- ax[0, 2].stackplot(epochs, mag, labels=param_names)
- ax[0, 2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
-
- ax[1, 2].set_title('Mag =f(TF)')
- mean = np.mean(mag, axis=1)
- std = np.std(mag, axis=1)
- ax[1, 2].bar(param_names, mean, yerr=std)
- plt.sca(ax[1, 2]), plt.xticks(rotation=90)
-
-
- fig_name = fig_name.replace('.',',')
- plt.savefig(fig_name, bbox_inches='tight')
- plt.close()
-
-def plot_compare(filenames, fig_name='res'):
-
- all_data=[]
- legend=""
- for idx, file in enumerate(filenames):
- legend+=str(idx)+'-'+file+'\n'
- with open(file) as json_file:
- data = json.load(json_file)
- all_data.append(data)
-
- fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
-
- for data_idx, log in enumerate(all_data):
- log=log['Log']
- epochs = [x["epoch"] for x in log]
-
- ax[0].plot(epochs,[x["train_loss"] for x in log], label=str(data_idx)+'-Train')
- ax[0].plot(epochs,[x["val_loss"] for x in log], label=str(data_idx)+'-Val')
-
- ax[1].plot(epochs,[x["acc"] for x in log], label=str(data_idx))
- #ax[1].text(x=0.5,y=0,s=str(data_idx)+'-'+filenames[data_idx], transform=ax[1].transAxes)
-
- if log[0]["param"]!= None:
- if isinstance(log[0]["param"],float):
- ax[2].plot(epochs,[x["param"] for x in log], label=str(data_idx)+'-Mag')
-
- else :
- for idx, _ in enumerate(log[0]["param"]):
- ax[2].plot(epochs,[x["param"][idx] for x in log], label=str(data_idx)+'-P'+str(idx))
-
- fig.suptitle(legend)
- ax[0].set_title('Loss')
- ax[1].set_title('Acc')
- ax[2].set_title('Param')
- for a in ax: a.legend()
-
- fig_name = fig_name.replace('.',',')
- plt.savefig(fig_name, bbox_inches='tight')
- plt.close()
-
-def plot_res_compare(filenames, fig_name='res'):
-
- all_data=[]
- #legend=""
- for idx, file in enumerate(filenames):
- #legend+=str(idx)+'-'+file+'\n'
- with open(file) as json_file:
- data = json.load(json_file)
- all_data.append(data)
-
- n_tf = [len(x["Param_names"]) for x in all_data]
- acc = [x["Accuracy"] for x in all_data]
- time = [x["Time"][0] for x in all_data]
-
- fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
-
- ax[0].plot(n_tf, acc)
- ax[1].plot(n_tf, time)
-
- ax[0].set_title('Acc')
- ax[1].set_title('Time')
- #for a in ax: a.legend()
-
- fig_name = fig_name.replace('.',',')
- plt.savefig(fig_name, bbox_inches='tight')
- plt.close()
-
-def plot_TF_res(log, tf_names, fig_name='res'):
-
- mean = np.mean([x["param"] for x in log], axis=0)
- std = np.std([x["param"] for x in log], axis=0)
-
- fig, ax = plt.subplots(1, 1, figsize=(30, 8), sharey=True)
- ax.bar(tf_names, mean, yerr=std)
- #ax.bar(tf_names, log[-1]["param"])
-
- fig_name = fig_name.replace('.',',')
- plt.savefig(fig_name, bbox_inches='tight')
- plt.close()
-
-def viz_sample_data(imgs, labels, fig_name='data_sample'):
-
- sample = imgs[0:25,].permute(0, 2, 3, 1).squeeze().cpu()
-
- plt.figure(figsize=(10,10))
- for i in range(25):
- plt.subplot(5,5,i+1)
- plt.xticks([])
- plt.yticks([])
- plt.grid(False)
- plt.imshow(sample[i,].detach().numpy(), cmap=plt.cm.binary)
- plt.xlabel(labels[i].item())
-
- plt.savefig(fig_name)
- print("Sample saved :", fig_name)
- plt.close()
-
-def model_copy(src,dst, patch_copy=True, copy_grad=True):
- #model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats)
-
- dst.load_state_dict(src.state_dict()) #Do not copy gradient !
-
- if patch_copy:
- dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ?
- dst['data_aug'].load_state_dict(src['data_aug'].state_dict())
-
- #Copie des gradients
- if copy_grad:
- for paramName, paramValue, in src.named_parameters():
- for netCopyName, netCopyValue, in dst.named_parameters():
- if paramName == netCopyName:
- netCopyValue.grad = paramValue.grad
- #netCopyValue=copy.deepcopy(paramValue)
-
- try: #Data_augV4
- dst['data_aug']._input_info = src['data_aug']._input_info
- dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix
- except:
- pass
-
-def optim_copy(dopt, opt):
-
- #inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state...
- #opt_param=higher.optim.get_trainable_opt_params(diffopt)
-
- for group_idx, group in enumerate(opt.param_groups):
- # print('gp idx',group_idx)
- for p_idx, p in enumerate(group['params']):
- opt.state[p]=dopt.state[group_idx][p_idx]
-
-def print_torch_mem(add_info=''):
-
- nb=0
- max_size=0
- for obj in gc.get_objects():
- #print(type(obj))
- try:
- if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
- #print(i, type(obj), obj.size())
- size = np.sum(obj.size())
- if(size>max_size): max_size=size
- nb+=1
- except:
- pass
- print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
-
- #print(add_info, "-Garbage size :",len(gc.garbage))
-
- """Simple GPU memory report."""
-
- mega_bytes = 1024.0 * 1024.0
- string = add_info + ' memory (MB)'
- string += ' | allocated: {}'.format(
- torch.cuda.memory_allocated() / mega_bytes)
- string += ' | max allocated: {}'.format(
- torch.cuda.max_memory_allocated() / mega_bytes)
- string += ' | cached: {}'.format(torch.cuda.memory_cached() / mega_bytes)
- string += ' | max cached: {}'.format(
- torch.cuda.max_memory_cached()/ mega_bytes)
- print(string)
-
-def plot_TF_influence(log, fig_name='TF_influence', param_names=None):
- proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])]
- mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])]
-
- plt.figure()
-
- mean = np.mean(proba, axis=1)*np.mean(mag, axis=1) #Pourrait etre interessant de multiplier avant le mean
- std = np.std(proba, axis=1)*np.std(mag, axis=1)
- plt.bar(param_names, mean, yerr=std)
-
- plt.xticks(rotation=90)
- fig_name = fig_name.replace('.',',')
- plt.savefig(fig_name, bbox_inches='tight')
- plt.close()
-
-class loss_monitor(): #Voir https://github.com/pytorch/ignite
- def __init__(self, patience, end_train=1):
- self.patience = patience
- self.end_train = end_train
- self.counter = 0
- self.best_score = None
- self.reached_limit = 0
-
- def register(self, loss):
- if self.best_score is None:
- self.best_score = loss
- elif loss > self.best_score:
- self.counter += 1
- #if not self.reached_limit:
- print("loss no improve counter", self.counter, self.reached_limit)
- else:
- self.best_score = loss
- self.counter = 0
- def limit_reached(self):
- if self.counter >= self.patience:
- self.counter = 0
- self.reached_limit +=1
- self.best_score = None
- return self.reached_limit
-
- def end_training(self):
- if self.limit_reached() >= self.end_train:
- return True
- else:
- return False
-
- def reset(self):
- self.__init__(self.patience, self.end_train)
\ No newline at end of file
diff --git a/Old/salvador/grad_cam.py b/Old/salvador/grad_cam.py
deleted file mode 100755
index 2aeada5..0000000
--- a/Old/salvador/grad_cam.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import torch
-import numpy as np
-import torchvision
-from PIL import Image
-from torch import topk
-from torch import nn
-import torch.nn.functional as F
-from torch import topk
-import cv2
-from torchvision import transforms
-import os
-
-class Lambda(nn.Module):
- "Create a layer that simply calls `func` with `x`"
- def __init__(self, func):
- super().__init__()
- self.func=func
- def forward(self, x): return self.func(x)
-
-class SaveFeatures():
- activations, gradients = None, None
- def __init__(self, m):
- self.forward = m.register_forward_hook(self.forward_hook_fn)
- self.backward = m.register_backward_hook(self.backward_hook_fn)
-
- def forward_hook_fn(self, module, input, output):
- self.activations = output.cpu().detach()
-
- def backward_hook_fn(self, module, grad_input, grad_output):
- self.gradients = grad_output[0].cpu().detach()
-
- def remove(self):
- self.forward.remove()
- self.backward.remove()
-
-def main(cam):
- device = 'cuda:0'
- model_name = 'resnet50'
- root = '/mnt/md0/data/cifar10/tmp/cifar/train'
- _root = 'cifar'
-
- os.makedirs(os.path.join(_root + '_CAM'), exist_ok=True)
- os.makedirs(os.path.join(_root + '_CAM'), exist_ok=True)
-
- train_transform = transforms.Compose([
- transforms.ToTensor(),
- ])
-
- dataset = torchvision.datasets.ImageFolder(
- root=root, transform=train_transform,
- )
-
- loader = torch.utils.data.DataLoader(dataset, batch_size=1)
- model = torchvision.models.__dict__[model_name](pretrained=True)
- flat = list(model.children())
- body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(loader.dataset.classes)))
- model = nn.Sequential(body, head)
-
- model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
- model = model.to(device)
- model.eval()
-
- activated_features = SaveFeatures(model[0])
-
- for i, (img, target ) in enumerate(loader):
- img = img.to(device)
- pred = model(img)
- import ipdb; ipdb.set_trace()
- # get the gradient of the output with respect to the parameters of the model
- pred[:, target.item()].backward()
-
- # import ipdb; ipdb.set_trace()
- # pull the gradients out of the model
- gradients = activated_features.gradients[0]
-
- pooled_gradients = gradients.mean(1).mean(1)
-
- # get the activations of the last convolutional layer
- activations = activated_features.activations[0]
-
- heatmap = F.relu(((activations*pooled_gradients[...,None,None])).sum(0))
- heatmap /= torch.max(heatmap)
-
- heatmap = heatmap.numpy()
-
-
- image = cv2.imread(dataset.imgs[i][0])
- heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]))
- heatmap = np.uint8(255 * heatmap)
- heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
- # superimposed_img = heatmap * 0.3 + image * 0.5
- superimposed_img = heatmap
-
- clss = dataset.imgs[i][0].split(os.sep)[1]
- name = dataset.imgs[i][0].split(os.sep)[2].split('.')[0]
- cv2.imwrite(os.path.join(_root+"_CAM", name + '.jpg'), superimposed_img)
- print(f'{os.path.join(_root+"_CAM", name + ".jpg")} saved')
-
- activated_features.remove()
-
-if __name__ == "__main__":
- main(cam=True)
diff --git a/Old/salvador/train.py b/Old/salvador/train.py
deleted file mode 100755
index f481d3f..0000000
--- a/Old/salvador/train.py
+++ /dev/null
@@ -1,382 +0,0 @@
-import datetime
-import os
-import time
-import sys
-
-import torch
-import torch.utils.data
-from torch import nn
-import torchvision
-from torchvision import transforms
-from PIL import ImageEnhance
-import random
-
-import utils
-from fastprogress import master_bar, progress_bar
-import numpy as np
-
-## DATA AUG ##
-import higher
-from dataug import *
-from dataug_utils import *
-tf_names = [
- ## Geometric TF ##
- 'Identity',
- 'FlipUD',
- 'FlipLR',
- 'Rotate',
- 'TranslateX',
- 'TranslateY',
- 'ShearX',
- 'ShearY',
-
- ## Color TF (Expect image in the range of [0, 1]) ##
- #'Contrast',
- #'Color',
- #'Brightness',
- #'Sharpness',
- #'Posterize',
- #'Solarize', #=>Image entre [0,1] #Pas opti pour des batch
-]
-
-class Lambda(nn.Module):
- "Create a layer that simply calls `func` with `x`"
- def __init__(self, func):
- super().__init__()
- self.func=func
- def forward(self, x): return self.func(x)
-
-class SubsetSampler(torch.utils.data.SubsetRandomSampler):
- def __init__(self, indices):
- super().__init__(indices)
-
- def __iter__(self):
- return (self.indices[i] for i in range(len(self.indices)))
-
- def __len__(self):
- return len(self.indices)
-
-def sharpness(img, factor):
- sharpness_factor = random.uniform(1, factor)
- sharp = ImageEnhance.Sharpness(img)
- sharped = sharp.enhance(sharpness_factor)
- return sharped
-
-def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, master_bar, Kldiv=False):
- model.train()
- metric_logger = utils.MetricLogger(delimiter=" ")
- confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
- header = 'Epoch: {}'.format(epoch)
- for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=master_bar):
-
- image, target = image.to(device), target.to(device)
-
- if not Kldiv :
- output = model(image)
- #output = F.log_softmax(output, dim=1)
- loss = criterion(output, target) #Pas de softmax ?
-
- else : #Consume x2 memory
- model.augment(mode=False)
- output = model(image)
- model.augment(mode=True)
- log_sup=F.log_softmax(output, dim=1)
- sup_loss = F.cross_entropy(log_sup, target)
-
- aug_output = model(image)
- log_aug=F.log_softmax(aug_output, dim=1)
- aug_loss=F.cross_entropy(log_aug, target)
-
- #KL div w/ logits - Similarite predictions (distributions)
- KL_loss = F.softmax(output, dim=1)*(log_sup-log_aug)
- KL_loss = KL_loss.sum(dim=-1)
- #KL_loss = F.kl_div(aug_logits, sup_logits, reduction='none')
- KL_loss = KL_loss.mean()
-
- unsupp_coeff = 1
- loss = sup_loss + (aug_loss + KL_loss) * unsupp_coeff
- #print(sup_loss.item(), (aug_loss + KL_loss).item())
-
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
-
- acc1 = utils.accuracy(output, target)[0]
- batch_size = image.shape[0]
- metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
- metric_logger.update(loss=loss.item())
-
- confmat.update(target.flatten(), output.argmax(1).flatten())
-
-
- return metric_logger.loss.global_avg, confmat
-
-
-def evaluate(model, criterion, data_loader, device):
- model.eval()
- metric_logger = utils.MetricLogger(delimiter=" ")
- confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
- header = 'Test:'
- missed = []
- with torch.no_grad():
- for i, (image, target) in metric_logger.log_every(data_loader, leave=False, header=header, parent=None):
- image, target = image.to(device), target.to(device)
- output = model(image)
- loss = criterion(output, target)
- if target.item() != output.topk(1)[1].item():
- missed.append(data_loader.dataset.imgs[data_loader.sampler.indices[i]])
-
- confmat.update(target.flatten(), output.argmax(1).flatten())
-
- acc1 = utils.accuracy(output, target)[0]
- batch_size = image.shape[0]
- metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
- metric_logger.update(loss=loss.item())
-
-
- return metric_logger.loss.global_avg, missed, confmat
-
-def get_train_valid_loader(args, augment, random_seed, valid_size=0.1, shuffle=True, num_workers=4, pin_memory=True):
- """
- Utility function for loading and returning train and valid
- multi-process iterators over the CIFAR-10 dataset. A sample
- 9x9 grid of the images can be optionally displayed.
- If using CUDA, num_workers should be set to 1 and pin_memory to True.
- Params
- ------
- - data_dir: path directory to the dataset.
- - batch_size: how many samples per batch to load.
- - augment: whether to apply the data augmentation scheme
- mentioned in the paper. Only applied on the train split.
- - random_seed: fix seed for reproducibility.
- - valid_size: percentage split of the training set used for
- the validation set. Should be a float in the range [0, 1].
- - shuffle: whether to shuffle the train/validation indices.
- - show_sample: plot 9x9 sample grid of the dataset.
- - num_workers: number of subprocesses to use when loading the dataset.
- - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
- True if using GPU.
- Returns
- -------
- - train_loader: training set iterator.
- - valid_loader: validation set iterator.
- """
- error_msg = "[!] valid_size should be in the range [0, 1]."
- assert ((valid_size >= 0) and (valid_size <= 1)), error_msg
-
- # normalize = transforms.Normalize(
- # mean=[0.4914, 0.4822, 0.4465],
- # std=[0.2023, 0.1994, 0.2010],
- # )
-
- # define transforms
- if augment:
- train_transform = transforms.Compose([
- # transforms.ColorJitter(brightness=0.3),
- # transforms.Lambda(lambda img: sharpness(img, 5)),
- transforms.RandomHorizontalFlip(),
- transforms.ToTensor(),
- # normalize,
- ])
-
- valid_transform = transforms.Compose([
- # transforms.ColorJitter(brightness=0.3),
- # transforms.RandomHorizontalFlip(),
- transforms.ToTensor(),
- # normalize,
- ])
- else:
- train_transform = transforms.Compose([
- transforms.ToTensor(),
- # normalize,
- ])
-
- valid_transform = transforms.Compose([
- transforms.ToTensor(),
- # normalize,
- ])
-
-
- # load the dataset
- train_dataset = torchvision.datasets.ImageFolder(
- root=args.data_path, transform=train_transform
- )
-
- valid_dataset = torchvision.datasets.ImageFolder(
- root=args.data_path, transform=valid_transform
- )
-
- num_train = len(train_dataset)
- indices = list(range(num_train))
- split = int(np.floor(valid_size * num_train))
-
- if shuffle:
- np.random.seed(random_seed)
- np.random.shuffle(indices)
-
- train_idx, valid_idx = indices[split:], indices[:split]
- train_sampler = torch.utils.data.SubsetRandomSampler(train_idx) if not args.test_only else SubsetSampler(train_idx)
- valid_sampler = SubsetSampler(valid_idx)
-
- train_loader = torch.utils.data.DataLoader(
- train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=train_sampler,
- num_workers=num_workers, pin_memory=pin_memory,
- )
- valid_loader = torch.utils.data.DataLoader(
- valid_dataset, batch_size=1, sampler=valid_sampler,
- num_workers=num_workers, pin_memory=pin_memory,
- )
-
- imgs = np.asarray(train_dataset.imgs)
-
- # print('Train')
- # print(imgs[train_idx])
- #print('Valid')
- #print(imgs[valid_idx])
-
- tgt = [0,0]
- for _, targets in train_loader:
- for target in targets:
- tgt[target]+=1
- print("Train targets :", tgt)
-
- tgt = [0,0]
- for _, targets in valid_loader:
- for target in targets:
- tgt[target]+=1
- print("Valid targets :", tgt)
-
- return (train_loader, valid_loader)
-
-def main(args):
- print(args)
-
- device = torch.device(args.device)
-
- torch.backends.cudnn.benchmark = True
-
-
- #augment = True if not args.test_only else False
-
- if not args.test_only and args.augment=='flip' : augment = True
- else : augment = False
-
- print("Augment", augment)
- data_loader, data_loader_test = get_train_valid_loader(args=args, pin_memory=True, augment=augment,
- num_workers=args.workers, valid_size=0.3, random_seed=999)
-
- print("Creating model")
- model = torchvision.models.__dict__[args.model](pretrained=True)
- flat = list(model.children())
-
- body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(data_loader.dataset.classes)))
- model = nn.Sequential(body, head)
-
- Kldiv=False
- if not args.test_only and (args.augment=='Rand' or args.augment=='RandKL'):
- tf_dict = {k: TF.TF_dict[k] for k in tf_names}
- model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
-
- if args.augment=='RandKL': Kldiv=True
-
- model['data_aug']['mag'].data = model['data_aug']['mag'].data * args.magnitude
- print("Augmodel")
-
- # model.fc = nn.Linear(model.fc.in_features, 2)
- # import ipdb; ipdb.set_trace()
-
- criterion = nn.CrossEntropyLoss().to(device)
-
- # optimizer = torch.optim.SGD(
- # model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
-
- optimizer = torch.optim.Adam(
- model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
-
- lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
- optimizer,
- lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9)
-
- es = utils.EarlyStopping() if not (args.augment=='Rand' or args.augment=='RandKL') else utils.EarlyStopping(augmented_model=True)
-
- if args.test_only:
- model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
- model = model.to(device)
- print('TEST')
- _, missed, _ = evaluate(model, criterion, data_loader_test, device=device)
- print(missed)
- print('TRAIN')
- _, missed, _ = evaluate(model, criterion, data_loader, device=device)
- print(missed)
- return
-
- model = model.to(device)
-
- print("Start training")
- start_time = time.time()
- mb = master_bar(range(args.epochs))
-
- for epoch in mb:
- _, train_confmat = train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, mb, Kldiv)
- lr_scheduler.step( (epoch+1)*len(data_loader) )
- val_loss, _, valid_confmat = evaluate(model, criterion, data_loader_test, device=device)
- es(val_loss, model)
-
- # print('Valid Missed')
- # print(valid_missed)
-
- # print('Train')
- # print(train_confmat)
- #print('Valid')
- #print(valid_confmat)
-
- # if es.early_stop:
- # break
-
- total_time = time.time() - start_time
- total_time_str = str(datetime.timedelta(seconds=int(total_time)))
- print('Training time {}'.format(total_time_str))
-
-
-def parse_args():
- import argparse
- parser = argparse.ArgumentParser(description='PyTorch Classification Training')
-
- parser.add_argument('--data-path', default='/github/smart_augmentation/salvador/data', help='dataset')
- parser.add_argument('--model', default='resnet18', help='model') #'resnet18'
- parser.add_argument('--device', default='cuda:0', help='device')
- parser.add_argument('-b', '--batch-size', default=8, type=int)
- parser.add_argument('--epochs', default=3, type=int, metavar='N',
- help='number of total epochs to run')
- parser.add_argument('-j', '--workers', default=0, type=int, metavar='N',
- help='number of data loading workers (default: 16)')
- parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate')
- parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
- help='momentum')
- parser.add_argument('--wd', '--weight-decay', default=4e-5, type=float,
- metavar='W', help='weight decay (default: 1e-4)',
- dest='weight_decay')
-
- parser.add_argument(
- "--test-only",
- dest="test_only",
- help="Only test the model",
- action="store_true",
- )
-
- parser.add_argument('-a', '--augment', default='None', type=str,
- metavar='N', help='Data augment',
- dest='augment')
- parser.add_argument('-m', '--magnitude', default=1.0, type=float,
- metavar='N', help='Augmentation magnitude',
- dest='magnitude')
-
-
- args = parser.parse_args()
-
- return args
-
-
-if __name__ == "__main__":
- args = parse_args()
- main(args)
\ No newline at end of file
diff --git a/Old/salvador/train_dataug.py b/Old/salvador/train_dataug.py
deleted file mode 100755
index a867167..0000000
--- a/Old/salvador/train_dataug.py
+++ /dev/null
@@ -1,585 +0,0 @@
-import datetime
-import os
-import time
-import sys
-
-import torch
-import torch.utils.data
-from torch import nn
-import torchvision
-from torchvision import transforms
-from PIL import ImageEnhance
-import random
-
-import utils
-from fastprogress import master_bar, progress_bar
-import numpy as np
-
-
-## DATA AUG ##
-import higher
-from dataug import *
-from dataug_utils import *
-tf_names = [
- ## Geometric TF ##
- 'Identity',
- 'FlipUD',
- 'FlipLR',
- 'Rotate',
- 'TranslateX',
- 'TranslateY',
- 'ShearX',
- 'ShearY',
-
- ## Color TF (Expect image in the range of [0, 1]) ##
- 'Contrast',
- 'Color',
- 'Brightness',
- 'Sharpness',
- 'Posterize',
- 'Solarize', #=>Image entre [0,1] #Pas opti pour des batch
-]
-
-def compute_vaLoss(model, dl_it, dl):
- device = next(model.parameters()).device
- try:
- xs, ys = next(dl_it)
- except StopIteration: #Fin epoch val
- dl_it = iter(dl)
- xs, ys = next(dl_it)
- xs, ys = xs.to(device), ys.to(device)
-
- model.eval() #Validation sans transfornations !
-
- return F.cross_entropy(model(xs), ys)
-
-def model_copy(src,dst, patch_copy=True, copy_grad=True):
- #model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats)
-
- dst.load_state_dict(src.state_dict()) #Do not copy gradient !
-
- if patch_copy:
- dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ?
- dst['data_aug'].load_state_dict(src['data_aug'].state_dict())
-
- #Copie des gradients
- if copy_grad:
- for paramName, paramValue, in src.named_parameters():
- for netCopyName, netCopyValue, in dst.named_parameters():
- if paramName == netCopyName:
- netCopyValue.grad = paramValue.grad
- #netCopyValue=copy.deepcopy(paramValue)
-
- try: #Data_augV4
- dst['data_aug']._input_info = src['data_aug']._input_info
- dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix
- except:
- pass
-
-def optim_copy(dopt, opt):
-
- #inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state...
- #opt_param=higher.optim.get_trainable_opt_params(diffopt)
-
- for group_idx, group in enumerate(opt.param_groups):
- # print('gp idx',group_idx)
- for p_idx, p in enumerate(group['params']):
- opt.state[p]=dopt.state[group_idx][p_idx]
-
-
-#############
-
-class Lambda(nn.Module):
- "Create a layer that simply calls `func` with `x`"
- def __init__(self, func):
- super().__init__()
- self.func=func
- def forward(self, x): return self.func(x)
-
-class SubsetSampler(torch.utils.data.SubsetRandomSampler):
- def __init__(self, indices):
- super().__init__(indices)
-
- def __iter__(self):
- return (self.indices[i] for i in range(len(self.indices)))
-
- def __len__(self):
- return len(self.indices)
-
-def sharpness(img, factor):
- sharpness_factor = random.uniform(1, factor)
- sharp = ImageEnhance.Sharpness(img)
- sharped = sharp.enhance(sharpness_factor)
- return sharped
-
-def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, master_bar):
- model.train()
- metric_logger = utils.MetricLogger(delimiter=" ")
- confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
- header = 'Epoch: {}'.format(epoch)
- for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=master_bar):
-
- image, target = image.to(device), target.to(device)
- output = model(image)
- loss = criterion(output, target)
-
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
-
- acc1 = utils.accuracy(output, target)[0]
- batch_size = image.shape[0]
- metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
- metric_logger.update(loss=loss.item())
-
- confmat.update(target.flatten(), output.argmax(1).flatten())
-
-
- return metric_logger.loss.global_avg, confmat
-
-
-def evaluate(model, criterion, data_loader, device):
- model.eval()
- metric_logger = utils.MetricLogger(delimiter=" ")
- confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
- header = 'Test:'
- missed = []
- with torch.no_grad():
- for i, (image, target) in metric_logger.log_every(data_loader, leave=False, header=header, parent=None):
- image, target = image.to(device), target.to(device)
- output = model(image)
- loss = criterion(output, target)
- if target.item() != output.topk(1)[1].item():
- missed.append(data_loader.dataset.imgs[data_loader.sampler.indices[i]])
-
- confmat.update(target.flatten(), output.argmax(1).flatten())
-
- acc1 = utils.accuracy(output, target)[0]
- batch_size = image.shape[0]
- metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
- metric_logger.update(loss=loss.item())
-
-
- return metric_logger.loss.global_avg, missed, confmat
-
-def get_train_valid_loader(args, augment, random_seed, train_size=0.5, test_size=0.1, shuffle=True, num_workers=4, pin_memory=True):
- """
- Utility function for loading and returning train and valid
- multi-process iterators over the CIFAR-10 dataset. A sample
- 9x9 grid of the images can be optionally displayed.
- If using CUDA, num_workers should be set to 1 and pin_memory to True.
- Params
- ------
- - data_dir: path directory to the dataset.
- - batch_size: how many samples per batch to load.
- - augment: whether to apply the data augmentation scheme
- mentioned in the paper. Only applied on the train split.
- - random_seed: fix seed for reproducibility.
- - valid_size: percentage split of the training set used for
- the validation set. Should be a float in the range [0, 1].
- - shuffle: whether to shuffle the train/validation indices.
- - show_sample: plot 9x9 sample grid of the dataset.
- - num_workers: number of subprocesses to use when loading the dataset.
- - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
- True if using GPU.
- Returns
- -------
- - train_loader: training set iterator.
- - valid_loader: validation set iterator.
- """
- error_msg = "[!] test_size should be in the range [0, 1]."
- assert ((test_size >= 0) and (test_size <= 1)), error_msg
-
- # normalize = transforms.Normalize(
- # mean=[0.4914, 0.4822, 0.4465],
- # std=[0.2023, 0.1994, 0.2010],
- # )
-
- # define transforms
- if augment:
- train_transform = transforms.Compose([
- # transforms.ColorJitter(brightness=0.3),
- # transforms.Lambda(lambda img: sharpness(img, 5)),
- transforms.RandomHorizontalFlip(),
- transforms.ToTensor(),
- # normalize,
- ])
-
- valid_transform = transforms.Compose([
- # transforms.ColorJitter(brightness=0.3),
- # transforms.RandomHorizontalFlip(),
- transforms.ToTensor(),
- # normalize,
- ])
- else:
- train_transform = transforms.Compose([
- transforms.ToTensor(),
- # normalize,
- ])
-
- valid_transform = transforms.Compose([
- transforms.ToTensor(),
- # normalize,
- ])
-
-
- # load the dataset
- train_dataset = torchvision.datasets.ImageFolder(
- root=args.data_path, transform=train_transform
- )
-
- test_dataset = torchvision.datasets.ImageFolder(
- root=args.data_path, transform=valid_transform
- )
-
- num_train = len(train_dataset)
- indices = list(range(num_train))
- split = int(np.floor(test_size * num_train))
-
- if shuffle:
- np.random.seed(random_seed)
- np.random.shuffle(indices)
-
- train_idx, test_idx = indices[split:], indices[:split]
- train_idx, valid_idx = train_idx[:int(len(train_idx)*train_size)], train_idx[int(len(train_idx)*train_size):]
- print("\nTrain", len(train_idx), "\nValid", len(valid_idx), "\nTest", len(test_idx))
- train_sampler = torch.utils.data.SubsetRandomSampler(train_idx) if not args.test_only else SubsetSampler(train_idx)
- valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx) if not args.test_only else SubsetSampler(valid_idx)
- test_sampler = SubsetSampler(test_idx)
-
- train_loader = torch.utils.data.DataLoader(
- train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=train_sampler,
- num_workers=num_workers, pin_memory=pin_memory,
- )
- valid_loader = torch.utils.data.DataLoader(
- train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=valid_sampler,
- num_workers=num_workers, pin_memory=pin_memory,
- )
- test_loader = torch.utils.data.DataLoader(
- test_dataset, batch_size=1, sampler=test_sampler,
- num_workers=num_workers, pin_memory=pin_memory,
- )
-
- imgs = np.asarray(train_dataset.imgs)
-
- # print('Train')
- # print(imgs[train_idx])
- #print('Valid')
- #print(imgs[valid_idx])
-
- return (train_loader, valid_loader, test_loader)
-
-def main(args):
- print(args)
-
- device = torch.device(args.device)
-
- torch.backends.cudnn.benchmark = True
-
- #augment = True if not args.test_only else False
- augment = False
-
- data_loader, dl_val, data_loader_test = get_train_valid_loader(args=args, pin_memory=True, augment=augment,
- num_workers=args.workers, train_size=0.99, test_size=0.2, random_seed=999)
-
- print("Creating model")
- model = torchvision.models.__dict__[args.model](pretrained=True)
- flat = list(model.children())
-
- body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(data_loader.dataset.classes)))
- model = nn.Sequential(body, head)
-
- # model.fc = nn.Linear(model.fc.in_features, 2)
- # import ipdb; ipdb.set_trace()
-
- criterion = nn.CrossEntropyLoss().to(device)
-
- # optimizer = torch.optim.SGD(
- # model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
- '''
- optimizer = torch.optim.Adam(
- model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
-
- lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
- optimizer,
- lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9)
- '''
- es = utils.EarlyStopping()
-
- if args.test_only:
- model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
- model = model.to(device)
- print('TEST')
- _, missed, _ = evaluate(model, criterion, data_loader_test, device=device)
- print(missed)
- print('TRAIN')
- _, missed, _ = evaluate(model, criterion, data_loader, device=device)
- print(missed)
- return
-
- model = model.to(device)
-
- print("Start training")
- start_time = time.time()
- mb = master_bar(range(args.epochs))
- """
- for epoch in mb:
- _, train_confmat = train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, mb)
- lr_scheduler.step( (epoch+1)*len(data_loader) )
- val_loss, _, valid_confmat = evaluate(model, criterion, data_loader_test, device=device)
- es(val_loss, model)
-
- # print('Valid Missed')
- # print(valid_missed)
-
-
- # print('Train')
- # print(train_confmat)
- print('Valid')
- print(valid_confmat)
-
- # if es.early_stop:
- # break
-
- total_time = time.time() - start_time
- total_time_str = str(datetime.timedelta(seconds=int(total_time)))
- print('Training time {}'.format(total_time_str))
-
- """
-
- #######
-
- inner_it = args.inner_it
- dataug_epoch_start=0
- print_freq=1
- KLdiv=False
-
- tf_dict = {k: TF.TF_dict[k] for k in tf_names}
- model = Augmented_model(Data_augV5(TF_dict=tf_dict, N_TF=3, mix_dist=0.0, fixed_prob=False, fixed_mag=False, shared_mag=False), model).to(device)
- #model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
-
- val_loss=torch.tensor(0) #Necessaire si pas de metastep sur une epoch
- dl_val_it = iter(dl_val)
- countcopy=0
-
- #if inner_it!=0:
- meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=args.lr) #lr=1e-2
- #inner_opt = torch.optim.SGD(model['model'].parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #lr=1e-2 / momentum=0.9
- inner_opt = torch.optim.Adam(model['model'].parameters(), lr=args.lr, weight_decay=args.weight_decay)
-
- lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
- inner_opt,
- lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9)
-
- high_grad_track = True
- if inner_it == 0:
- high_grad_track=False
-
- model.train()
- model.augment(mode=False)
-
- fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
- diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel,track_higher_grads=high_grad_track)
-
- i=0
-
- for epoch in mb:
-
- metric_logger = utils.MetricLogger(delimiter=" ")
- confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
- header = 'Epoch: {}'.format(epoch)
-
- t0 = time.process_time()
- for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=mb):
- #for i, (xs, ys) in enumerate(dl_train):
- #print_torch_mem("it"+str(i))
- i+=1
- image, target = image.to(device), target.to(device)
-
- if(not KLdiv):
- #Methode uniforme
- logits = fmodel(image) # modified `params` can also be passed as a kwarg
- output = F.log_softmax(logits, dim=1)
- loss = F.cross_entropy(output, target, reduction='none') # no need to call loss.backwards()
-
- if fmodel._data_augmentation: #Weight loss
- w_loss = fmodel['data_aug'].loss_weight()#.to(device)
- loss = loss * w_loss
- loss = loss.mean()
-
- else:
- #Methode KL div
- fmodel.augment(mode=False)
- sup_logits = fmodel(xs)
- log_sup=F.log_softmax(sup_logits, dim=1)
- fmodel.augment(mode=True)
- loss = F.cross_entropy(log_sup, ys)
-
- if fmodel._data_augmentation:
- aug_logits = fmodel(xs)
- log_aug=F.log_softmax(aug_logits, dim=1)
- aug_loss=0
- if epoch>50: #debut differe ?
- #KL div w/ logits - Similarite predictions (distributions)
- aug_loss = F.softmax(sup_logits, dim=1)*(log_sup-log_aug)
- aug_loss=aug_loss.sum(dim=-1)
- #aug_loss = F.kl_div(aug_logits, sup_logits, reduction='none')
- w_loss = fmodel['data_aug'].loss_weight() #Weight loss
- aug_loss = (w_loss * aug_loss).mean()
-
- aug_loss += (F.cross_entropy(log_aug, ys , reduction='none') * w_loss).mean()
- #print(aug_loss)
- unsupp_coeff = 1
- loss += aug_loss * unsupp_coeff
-
- diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
-
- if(high_grad_track and i%inner_it==0): #Perform Meta step
- #print("meta")
- #Peu utile si high_grad_track = False
- val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val) + fmodel['data_aug'].reg_loss()
- #print_graph(val_loss)
-
- val_loss.backward()
-
- countcopy+=1
- model_copy(src=fmodel, dst=model)
- optim_copy(dopt=diffopt, opt=inner_opt)
-
- #if epoch>50:
- meta_opt.step()
- model['data_aug'].adjust_param(soft=False) #Contrainte sum(proba)=1
- #model['data_aug'].next_TF_set()
-
- fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
- diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
-
-
- acc1 = utils.accuracy(output, target)[0]
- batch_size = image.shape[0]
- metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
- metric_logger.update(loss=loss.item())
-
- confmat.update(target.flatten(), output.argmax(1).flatten())
-
- if(not high_grad_track and (torch.cuda.memory_cached()/1024.0**2)>20000):
- countcopy+=1
- print_torch_mem("copy")
- model_copy(src=fmodel, dst=model)
- optim_copy(dopt=diffopt, opt=inner_opt)
- val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val)
-
- #Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False)
- fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
- diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
- print_torch_mem("copy")
-
- if(not high_grad_track):
- countcopy+=1
- print_torch_mem("end copy")
- model_copy(src=fmodel, dst=model)
- optim_copy(dopt=diffopt, opt=inner_opt)
- val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val)
-
- #Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False)
- fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
- diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
- print_torch_mem("end copy")
-
-
- tf = time.process_time()
-
-
- #### Print ####
- if(print_freq and epoch%print_freq==0):
- print('-'*9)
- print('Epoch : %d'%(epoch))
- print('Time : %.00f'%(tf - t0))
- print('Train loss :',loss.item(), '/ val loss', val_loss.item())
- print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
- print('TF Proba :', model['data_aug']['prob'].data)
- #print('proba grad',model['data_aug']['prob'].grad)
- print('TF Mag :', model['data_aug']['mag'].data)
- #print('Mag grad',model['data_aug']['mag'].grad)
- #print('Reg loss:', model['data_aug'].reg_loss().item())
- #print('Aug loss', aug_loss.item())
- #############
- #### Log ####
- #print(type(model['data_aug']) is dataug.Data_augV5)
- '''
- param = [{'p': p.item(), 'm':model['data_aug']['mag'].item()} for p in model['data_aug']['prob']] if model['data_aug']._shared_mag else [{'p': p.item(), 'm': m.item()} for p, m in zip(model['data_aug']['prob'], model['data_aug']['mag'])]
- data={
- "epoch": epoch,
- "train_loss": loss.item(),
- "val_loss": val_loss.item(),
- "acc": accuracy,
- "time": tf - t0,
-
- "param": param #if isinstance(model['data_aug'], Data_augV5)
- #else [p.item() for p in model['data_aug']['prob']],
- }
- log.append(data)
- '''
- #############
-
- train_confmat=confmat
- lr_scheduler.step( (epoch+1)*len(data_loader) )
-
- test_loss, _, test_confmat = evaluate(model, criterion, data_loader_test, device=device)
- es(test_loss, model)
-
- # print('Valid Missed')
- # print(valid_missed)
-
-
- # print('Train')
- # print(train_confmat)
- print('Test')
- print(test_confmat)
-
- # if es.early_stop:
- # break
-
- total_time = time.time() - start_time
- total_time_str = str(datetime.timedelta(seconds=int(total_time)))
- print('Training time {}'.format(total_time_str))
-
-
-def parse_args():
- import argparse
- parser = argparse.ArgumentParser(description='PyTorch Classification Training')
-
- parser.add_argument('--data-path', default='/github/smart_augmentation/salvador/data', help='dataset')
- parser.add_argument('--model', default='resnet18', help='model') #'resnet18'
- parser.add_argument('--device', default='cuda:0', help='device')
- parser.add_argument('-b', '--batch-size', default=8, type=int)
- parser.add_argument('--epochs', default=3, type=int, metavar='N',
- help='number of total epochs to run')
- parser.add_argument('-j', '--workers', default=0, type=int, metavar='N',
- help='number of data loading workers (default: 16)')
- parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate')
- parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
- help='momentum')
- parser.add_argument('--wd', '--weight-decay', default=4e-5, type=float,
- metavar='W', help='weight decay (default: 1e-4)',
- dest='weight_decay')
-
- parser.add_argument(
- "--test-only",
- dest="test_only",
- help="Only test the model",
- action="store_true",
- )
-
- parser.add_argument('--in_it', '--inner_it', default=0, type=int,
- metavar='N', help='higher inner_it',
- dest='inner_it')
-
- args = parser.parse_args()
-
- return args
-
-
-if __name__ == "__main__":
- args = parse_args()
- main(args)
\ No newline at end of file
diff --git a/Old/salvador/transformations.py b/Old/salvador/transformations.py
deleted file mode 100755
index 82a8d9e..0000000
--- a/Old/salvador/transformations.py
+++ /dev/null
@@ -1,346 +0,0 @@
-import torch
-import kornia
-import random
-
-### Available TF for Dataug ###
-'''
-TF_dict={ #Dataugv4
- ## Geometric TF ##
- 'Identity' : (lambda x, mag: x),
- 'FlipUD' : (lambda x, mag: flipUD(x)),
- 'FlipLR' : (lambda x, mag: flipLR(x)),
- 'Rotate': (lambda x, mag: rotate(x, angle=torch.tensor([rand_int(mag, maxval=30)for _ in x], device=x.device))),
- 'TranslateX': (lambda x, mag: translate(x, translation=torch.tensor([[rand_int(mag, maxval=20), 0] for _ in x], device=x.device))),
- 'TranslateY': (lambda x, mag: translate(x, translation=torch.tensor([[0, rand_int(mag, maxval=20)] for _ in x], device=x.device))),
- 'ShearX': (lambda x, mag: shear(x, shear=torch.tensor([[rand_float(mag, maxval=0.3), 0] for _ in x], device=x.device))),
- 'ShearY': (lambda x, mag: shear(x, shear=torch.tensor([[0, rand_float(mag, maxval=0.3)] for _ in x], device=x.device))),
-
- ## Color TF (Expect image in the range of [0, 1]) ##
- 'Contrast': (lambda x, mag: contrast(x, contrast_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
- 'Color':(lambda x, mag: color(x, color_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
- 'Brightness':(lambda x, mag: brightness(x, brightness_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
- 'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
- 'Posterize': (lambda x, mag: posterize(x, bits=torch.tensor([rand_int(mag, minval=4, maxval=8) for _ in x], device=x.device))),
- 'Solarize': (lambda x, mag: solarize(x, thresholds=torch.tensor([rand_int(mag,minval=1, maxval=256)/256. for _ in x], device=x.device))) , #=>Image entre [0,1] #Pas opti pour des batch
-
- #Non fonctionnel
- #'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
- #'Equalize': (lambda mag: None),
-}
-'''
-'''
-TF_dict={ #Dataugv5 #AutoAugment
- ## Geometric TF ##
- 'Identity' : (lambda x, mag: x),
- 'FlipUD' : (lambda x, mag: flipUD(x)),
- 'FlipLR' : (lambda x, mag: flipLR(x)),
- 'Rotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30))),
- 'TranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=0))),
- 'TranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=1))),
- 'ShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=0))),
- 'ShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=1))),
-
- ## Color TF (Expect image in the range of [0, 1]) ##
- 'Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
- 'Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
- 'Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
- 'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
- 'Posterize': (lambda x, mag: posterize(x, bits=rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
- 'Solarize': (lambda x, mag: solarize(x, thresholds=rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
-
- #Non fonctionnel
- #'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
- #'Equalize': (lambda mag: None),
-}
-'''
-TF_dict={ #Dataugv5
- ## Geometric TF ##
- 'Identity' : (lambda x, mag: x),
- 'FlipUD' : (lambda x, mag: flipUD(x)),
- 'FlipLR' : (lambda x, mag: flipLR(x)),
- 'Rotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30))),
- 'TranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=0))),
- 'TranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=1))),
- 'ShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=0))),
- 'ShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=1))),
-
- ## Color TF (Expect image in the range of [0, 1]) ##
- 'Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
- 'Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
- 'Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
- 'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
- 'Posterize': (lambda x, mag: posterize(x, bits=rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
- 'Solarize': (lambda x, mag: solarize(x, thresholds=rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
-
- #Color TF (Common mag scale)
- '+Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
- '+Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
- '+Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
- '+Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
- '-Contrast': (lambda x, mag: contrast(x, contrast_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
- '-Color':(lambda x, mag: color(x, color_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
- '-Brightness':(lambda x, mag: brightness(x, brightness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
- '-Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
- '=Posterize': (lambda x, mag: posterize(x, bits=invScale_rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
- '=Solarize': (lambda x, mag: solarize(x, thresholds=invScale_rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
-
-
- 'BRotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30*3))),
- 'BTranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20*3), zero_pos=0))),
- 'BTranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20*3), zero_pos=1))),
- 'BShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3*3), zero_pos=0))),
- 'BShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3*3), zero_pos=1))),
-
- 'BadTranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=20*2, maxval=20*3), zero_pos=0))),
- 'BadTranslateX_neg': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-20*3, maxval=-20*2), zero_pos=0))),
- 'BadTranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=20*2, maxval=20*3), zero_pos=1))),
- 'BadTranslateY_neg': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-20*3, maxval=-20*2), zero_pos=1))),
-
- 'BadColor':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
- 'BadSharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
- 'BadContrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
- 'BadBrightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
-
- #Non fonctionnel
- #'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
- #'Equalize': (lambda mag: None),
-}
-
-TF_no_mag={'Identity', 'FlipUD', 'FlipLR'}
-TF_ignore_mag= TF_no_mag | {'Solarize', 'Posterize'}
-
-def int_image(float_image): #ATTENTION : legere perte d'info (granularite : 1/256 = 0.0039)
- return (float_image*255.).type(torch.uint8)
-
-def float_image(int_image):
- return int_image.type(torch.float)/255.
-
-#def rand_inverse(value):
-# return value if random.random() < 0.5 else -value
-
-#def rand_int(mag, maxval, minval=None): #[(-maxval,minval), maxval]
-# real_max = int_parameter(mag, maxval=maxval)
-# if not minval : minval = -real_max
-# return random.randint(minval, real_max)
-
-#def rand_float(mag, maxval, minval=None): #[(-maxval,minval), maxval]
-# real_max = float_parameter(mag, maxval=maxval)
-# if not minval : minval = -real_max
-# return random.uniform(minval, real_max)
-
-def rand_floats(size, mag, maxval, minval=None): #[(-maxval,minval), maxval]
- real_mag = float_parameter(mag, maxval=maxval)
- if not minval : minval = -real_mag
- #return random.uniform(minval, real_max)
- return minval + (real_mag-minval) * torch.rand(size, device=mag.device) #[min_val, real_mag]
-
-def invScale_rand_floats(size, mag, maxval, minval):
- #Mag=[0,PARAMETER_MAX] => [PARAMETER_MAX, 0] = [maxval, minval]
- real_mag = float_parameter(float(PARAMETER_MAX) - mag, maxval=maxval-minval)+minval
- return real_mag + (maxval-real_mag) * torch.rand(size, device=mag.device) #[real_mag, max_val]
-
-def zero_stack(tensor, zero_pos):
- if zero_pos==0:
- return torch.stack((tensor, torch.zeros((tensor.shape[0],), device=tensor.device)), dim=1)
- if zero_pos==1:
- return torch.stack((torch.zeros((tensor.shape[0],), device=tensor.device), tensor), dim=1)
- else:
- raise Exception("Invalid zero_pos : ", zero_pos)
-
-#https://github.com/tensorflow/models/blob/fc2056bce6ab17eabdc139061fef8f4f2ee763ec/research/autoaugment/augmentation_transforms.py#L137
-PARAMETER_MAX = 1 # What is the max 'level' a transform could be predicted
-def float_parameter(level, maxval):
- """Helper function to scale `val` between 0 and maxval .
- Args:
- level: Level of the operation that will be between [0, `PARAMETER_MAX`].
- maxval: Maximum value that the operation can have. This will be scaled
- to level/PARAMETER_MAX.
- Returns:
- A float that results from scaling `maxval` according to `level`.
- """
-
- #return float(level) * maxval / PARAMETER_MAX
- return (level * maxval / PARAMETER_MAX)#.to(torch.float)
-
-#def int_parameter(level, maxval): #Perte de gradient
- """Helper function to scale `val` between 0 and maxval .
- Args:
- level: Level of the operation that will be between [0, `PARAMETER_MAX`].
- maxval: Maximum value that the operation can have. This will be scaled
- to level/PARAMETER_MAX.
- Returns:
- An int that results from scaling `maxval` according to `level`.
- """
- #return int(level * maxval / PARAMETER_MAX)
-# return (level * maxval / PARAMETER_MAX)
-
-def flipLR(x):
- device = x.device
- (batch_size, channels, h, w) = x.shape
-
- M =torch.tensor( [[[-1., 0., w-1],
- [ 0., 1., 0.],
- [ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
-
- # warp the original image by the found transform
- return kornia.warp_perspective(x, M, dsize=(h, w))
-
-def flipUD(x):
- device = x.device
- (batch_size, channels, h, w) = x.shape
-
- M =torch.tensor( [[[ 1., 0., 0.],
- [ 0., -1., h-1],
- [ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
-
- # warp the original image by the found transform
- return kornia.warp_perspective(x, M, dsize=(h, w))
-
-def rotate(x, angle):
- return kornia.rotate(x, angle=angle.type(torch.float)) #Kornia ne supporte pas les int
-
-def translate(x, translation):
- #print(translation)
- return kornia.translate(x, translation=translation.type(torch.float)) #Kornia ne supporte pas les int
-
-def shear(x, shear):
- return kornia.shear(x, shear=shear)
-
-def contrast(x, contrast_factor):
- return kornia.adjust_contrast(x, contrast_factor=contrast_factor) #Expect image in the range of [0, 1]
-
-#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageEnhance.py
-def color(x, color_factor):
- (batch_size, channels, h, w) = x.shape
-
- gray_x = kornia.rgb_to_grayscale(x)
- gray_x = gray_x.repeat_interleave(channels, dim=1)
- return blend(gray_x, x, color_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
-
-def brightness(x, brightness_factor):
- device = x.device
-
- return blend(torch.zeros(x.size(), device=device), x, brightness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
-
-def sharpeness(x, sharpness_factor):
- device = x.device
- (batch_size, channels, h, w) = x.shape
-
- k = torch.tensor([[[ 1., 1., 1.],
- [ 1., 5., 1.],
- [ 1., 1., 1.]]], device=device) #Smooth Filter : https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageFilter.py
- smooth_x = kornia.filter2D(x, kernel=k, border_type='reflect', normalized=True) #Peut etre necessaire de s'occuper du channel Alhpa differement
-
- return blend(smooth_x, x, sharpness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
-
-#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py
-def posterize(x, bits):
- bits = bits.type(torch.uint8) #Perte du gradient
- x = int_image(x) #Expect image in the range of [0, 1]
-
- mask = ~(2 ** (8 - bits) - 1).type(torch.uint8)
-
- (batch_size, channels, h, w) = x.shape
- mask = mask.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
-
- return float_image(x & mask)
-
-def auto_contrast(x): #PAS OPTIMISE POUR DES BATCH #EXTRA LENT
- # Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
- print("Warning : Pas encore check !")
- (batch_size, channels, h, w) = x.shape
- x = int_image(x) #Expect image in the range of [0, 1]
- #print('Start',x[0])
- for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
- #print(img.shape)
- for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
- #print(chan.shape)
- hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
-
- # find lowest/highest samples after preprocessing
- for lo in range(256):
- if hist[lo]:
- break
- for hi in range(255, -1, -1):
- if hist[hi]:
- break
- if hi <= lo:
- # don't bother
- pass
- else:
- scale = 255.0 / (hi - lo)
- offset = -lo * scale
- for ix in range(256):
- n_ix = int(ix * scale + offset)
- if n_ix < 0: n_ix = 0
- elif n_ix > 255: n_ix = 255
-
- chan[chan==ix]=n_ix
- x[im_idx, chan_idx]=chan
-
- #print('End',x[0])
- return float_image(x)
-
-def equalize(x): #PAS OPTIMISE POUR DES BATCH
- raise Exception(self, "not implemented")
- # Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
- (batch_size, channels, h, w) = x.shape
- x = int_image(x) #Expect image in the range of [0, 1]
- #print('Start',x[0])
- for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
- #print(img.shape)
- for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
- #print(chan.shape)
- hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
-
- return float_image(x)
-
-def solarize(x, thresholds):
- batch_size, channels, h, w = x.shape
- #imgs=[]
- #for idx, t in enumerate(thresholds): #Operation par image
- # mask = x[idx] > t #Perte du gradient
- #In place
- # inv_x = 1-x[idx][mask]
- # x[idx][mask]=inv_x
- #
-
- #Out of place
- # im = x[idx]
- # inv_x = 1-im[mask]
-
- # imgs.append(im.masked_scatter(mask,inv_x))
-
- #idxs=torch.tensor(range(x.shape[0]), device=x.device)
- #idxs=idxs.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
- #x=x.scatter(dim=0, index=idxs, src=torch.stack(imgs))
- #
-
- thresholds = thresholds.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
- #print(thresholds.grad_fn)
- x=torch.where(x>thresholds,1-x, x)
- #print(mask.grad_fn)
-
- #x=x.min(thresholds)
- #inv_x = 1-x[mask]
- #x=x.where(x= 0) & (a < n)
- inds = n * a[k].to(torch.int64) + b[k]
- self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)
-
- def reset(self):
- self.mat.zero_()
-
- def compute(self):
- h = self.mat.float()
- acc_global = torch.diag(h).sum() / h.sum()
- acc = torch.diag(h) / h.sum(1)
- return acc_global, acc
-
-
- def __str__(self):
- acc_global, acc = self.compute()
- return (
- 'global correct: {:.1f}\n'
- 'average row correct: {}').format(
- acc_global.item() * 100,
- ['{:.1f}'.format(i) for i in (acc * 100).tolist()])
-
-
-class MetricLogger(object):
- def __init__(self, delimiter="\t"):
- self.meters = defaultdict(SmoothedValue)
- self.delimiter = delimiter
-
- def update(self, **kwargs):
- for k, v in kwargs.items():
- if isinstance(v, torch.Tensor):
- v = v.item()
- assert isinstance(v, (float, int))
- self.meters[k].update(v)
-
- def __getattr__(self, attr):
- if attr in self.meters:
- return self.meters[attr]
- if attr in self.__dict__:
- return self.__dict__[attr]
- raise AttributeError("'{}' object has no attribute '{}'".format(
- type(self).__name__, attr))
-
- def __str__(self):
- loss_str = []
- for name, meter in self.meters.items():
- loss_str.append(
- "{}: {}".format(name, str(meter))
- )
- return self.delimiter.join(loss_str)
-
-
- def add_meter(self, name, meter):
- self.meters[name] = meter
-
- def log_every(self, iterable, parent, header=None, **kwargs):
- if not header:
- header = ''
- log_msg = self.delimiter.join([
- '{meters}'
- ])
-
- progrss = progress_bar(iterable, parent=parent, **kwargs)
-
- for idx, obj in enumerate(progrss):
- yield idx, obj
- progrss.comment = log_msg.format(
- meters=str(self))
-
- print('{header} {meters}'.format(header=header, meters=str(self)))
-
-def accuracy(output, target, topk=(1,)):
- """Computes the accuracy over the k top predictions for the specified values of k"""
- with torch.no_grad():
- maxk = max(topk)
- batch_size = target.size(0)
-
- _, pred = output.topk(maxk, 1, True, True)
- pred = pred.t()
- correct = pred.eq(target[None])
-
- res = []
- for k in topk:
- correct_k = correct[:k].flatten().sum(dtype=torch.float32)
- res.append(correct_k * (100.0 / batch_size))
- return res
-
-class EarlyStopping:
- """Early stops the training if validation loss doesn't improve after a given patience."""
- def __init__(self, patience=7, verbose=False, delta=0, augmented_model=False):
- """
- Args:
- patience (int): How long to wait after last time validation loss improved.
- Default: 7
- verbose (bool): If True, prints a message for each validation loss improvement.
- Default: False
- delta (float): Minimum change in the monitored quantity to qualify as an improvement.
- Default: 0
- """
- self.patience = patience
- self.verbose = verbose
- self.counter = 0
- self.best_score = None
- self.early_stop = False
- self.val_loss_min = np.Inf
- self.delta = delta
-
- self.augmented_model = augmented_model
-
- def __call__(self, val_loss, model):
-
- score = -val_loss
-
- if self.best_score is None:
- self.best_score = score
- self.save_checkpoint(val_loss, model)
- elif score < self.best_score - self.delta:
- self.counter += 1
- # print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
- # if self.counter >= self.patience:
- # self.early_stop = True
- else:
- self.best_score = score
- self.save_checkpoint(val_loss, model)
- self.counter = 0
-
- def save_checkpoint(self, val_loss, model):
- '''Saves model when validation loss decrease.'''
- if self.verbose:
- print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
- torch.save(model.state_dict(), 'checkpoint.pt') if not self.augmented_model else torch.save(model['model'].state_dict(), 'checkpoint.pt')
- self.val_loss_min = val_loss
\ No newline at end of file