Remove Old folder

2025-06-27 07:25:24 +02:00 · 2024-08-20 11:55:02 +02:00 · 2024-08-20 11:55:02 +02:00 · 431252992c
commit 431252992c
parent 18be4d85ca
38 changed files with 0 additions and 7821 deletions
--- a/Old/FAR-HO/augmentation_transforms.py
+++ b/Old/FAR-HO/augmentation_transforms.py
@ -1,456 +0,0 @@
-# Copyright 2018 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Transforms used in the Augmentation Policies."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import inspect
-import random
-import numpy as np
-# pylint:disable=g-multiple-import
-from PIL import ImageOps, ImageEnhance, ImageFilter, Image
-# pylint:enable=g-multiple-import
-
-
-IMAGE_SIZE = 28
-# What is the dataset mean and std of the images on the training set
-MEANS = [0.49139968, 0.48215841, 0.44653091]
-STDS = [0.24703223, 0.24348513, 0.26158784]
-PARAMETER_MAX = 10  # What is the max 'level' a transform could be predicted
-
-
-def random_flip(x):
-  """Flip the input x horizontally with 50% probability."""
-  if np.random.rand(1)[0] > 0.5:
-    return np.fliplr(x)
-  return x
-
-
-def zero_pad_and_crop(img, amount=4):
-  """Zero pad by `amount` zero pixels on each side then take a random crop.
-
-  Args:
-    img: numpy image that will be zero padded and cropped.
-    amount: amount of zeros to pad `img` with horizontally and verically.
-
-  Returns:
-    The cropped zero padded img. The returned numpy array will be of the same
-    shape as `img`.
-  """
-  padded_img = np.zeros((img.shape[0] + amount * 2, img.shape[1] + amount * 2,
-                         img.shape[2]))
-  padded_img[amount:img.shape[0] + amount, amount:
-             img.shape[1] + amount, :] = img
-  top = np.random.randint(low=0, high=2 * amount)
-  left = np.random.randint(low=0, high=2 * amount)
-  new_img = padded_img[top:top + img.shape[0], left:left + img.shape[1], :]
-  return new_img
-
-
-def create_cutout_mask(img_height, img_width, num_channels, size):
-  """Creates a zero mask used for cutout of shape `img_height` x `img_width`.
-
-  Args:
-    img_height: Height of image cutout mask will be applied to.
-    img_width: Width of image cutout mask will be applied to.
-    num_channels: Number of channels in the image.
-    size: Size of the zeros mask.
-
-  Returns:
-    A mask of shape `img_height` x `img_width` with all ones except for a
-    square of zeros of shape `size` x `size`. This mask is meant to be
-    elementwise multiplied with the original image. Additionally returns
-    the `upper_coord` and `lower_coord` which specify where the cutout mask
-    will be applied.
-  """
-  assert img_height == img_width
-
-  # Sample center where cutout mask will be applied
-  height_loc = np.random.randint(low=0, high=img_height)
-  width_loc = np.random.randint(low=0, high=img_width)
-
-  # Determine upper right and lower left corners of patch
-  upper_coord = (max(0, height_loc - size // 2), max(0, width_loc - size // 2))
-  lower_coord = (min(img_height, height_loc + size // 2),
-                 min(img_width, width_loc + size // 2))
-  mask_height = lower_coord[0] - upper_coord[0]
-  mask_width = lower_coord[1] - upper_coord[1]
-  assert mask_height > 0
-  assert mask_width > 0
-
-  mask = np.ones((img_height, img_width, num_channels))
-  zeros = np.zeros((mask_height, mask_width, num_channels))
-  mask[upper_coord[0]:lower_coord[0], upper_coord[1]:lower_coord[1], :] = (
-      zeros)
-  return mask, upper_coord, lower_coord
-
-
-def cutout_numpy(img, size=16):
-  """Apply cutout with mask of shape `size` x `size` to `img`.
-
-  The cutout operation is from the paper https://arxiv.org/abs/1708.04552.
-  This operation applies a `size`x`size` mask of zeros to a random location
-  within `img`.
-
-  Args:
-    img: Numpy image that cutout will be applied to.
-    size: Height/width of the cutout mask that will be
-
-  Returns:
-    A numpy tensor that is the result of applying the cutout mask to `img`.
-  """
-  img_height, img_width, num_channels = (img.shape[0], img.shape[1],
-                                         img.shape[2])
-  assert len(img.shape) == 3
-  mask, _, _ = create_cutout_mask(img_height, img_width, num_channels, size)
-  return img * mask
-
-
-def float_parameter(level, maxval):
-  """Helper function to scale `val` between 0 and maxval .
-
-  Args:
-    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
-    maxval: Maximum value that the operation can have. This will be scaled
-      to level/PARAMETER_MAX.
-
-  Returns:
-    A float that results from scaling `maxval` according to `level`.
-  """
-  return float(level) * maxval / PARAMETER_MAX
-
-
-def int_parameter(level, maxval):
-  """Helper function to scale `val` between 0 and maxval .
-
-  Args:
-    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
-    maxval: Maximum value that the operation can have. This will be scaled
-      to level/PARAMETER_MAX.
-
-  Returns:
-    An int that results from scaling `maxval` according to `level`.
-  """
-  return int(level * maxval / PARAMETER_MAX)
-
-
-def pil_wrap(img):
-  """Convert the `img` numpy tensor to a PIL Image."""
-  return Image.fromarray(
-      np.uint8((img * STDS + MEANS) * 255.0)).convert('RGBA')
-
-
-def pil_unwrap(pil_img):
-  """Converts the PIL img to a numpy array."""
-  pic_array = (np.array(pil_img.getdata()).reshape((IMAGE_SIZE, IMAGE_SIZE, 4)) / 255.0)
-  i1, i2 = np.where(pic_array[:, :, 3] == 0)
-  pic_array = (pic_array[:, :, :3] - MEANS) / STDS
-  pic_array[i1, i2] = [0, 0, 0]
-  return pic_array
-
-
-def apply_policy(policy, img):
-  """Apply the `policy` to the numpy `img`.
-
-  Args:
-    policy: A list of tuples with the form (name, probability, level) where
-      `name` is the name of the augmentation operation to apply, `probability`
-      is the probability of applying the operation and `level` is what strength
-      the operation to apply.
-    img: Numpy image that will have `policy` applied to it.
-
-  Returns:
-    The result of applying `policy` to `img`.
-  """
-  #print('img shape :',img.shape)
-  #print('Policy len :',len(policy))
-  pil_img = pil_wrap(img)
-  for xform in policy:
-    #print('xform :', len(xform))
-    assert len(xform) == 3
-    name, probability, level = xform
-    #xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability, level)
-    xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability.eval(), level)
-    pil_img = xform_fn(pil_img)
-  return pil_unwrap(pil_img)
-
-
-class TransformFunction(object):
-  """Wraps the Transform function for pretty printing options."""
-
-  def __init__(self, func, name):
-    self.f = func
-    self.name = name
-
-  def __repr__(self):
-    return '<' + self.name + '>'
-
-  def __call__(self, pil_img):
-    return self.f(pil_img)
-
-
-class TransformT(object):
-  """Each instance of this class represents a specific transform."""
-
-  def __init__(self, name, xform_fn):
-    self.name = name
-    self.xform = xform_fn
-
-  def pil_transformer(self, probability, level):
-
-    def return_function(im):
-      if random.random() < probability:
-        im = self.xform(im, level)
-      return im
-
-    name = self.name + '({:.1f},{})'.format(probability, level)
-    return TransformFunction(return_function, name)
-
-  def do_transform(self, image, level):
-    f = self.pil_transformer(PARAMETER_MAX, level)
-    return pil_unwrap(f(pil_wrap(image)))
-
-
-################## Transform Functions ##################
-identity = TransformT('identity', lambda pil_img, level: pil_img)
-flip_lr = TransformT(
-    'FlipLR',
-    lambda pil_img, level: pil_img.transpose(Image.FLIP_LEFT_RIGHT))
-flip_ud = TransformT(
-    'FlipUD',
-    lambda pil_img, level: pil_img.transpose(Image.FLIP_TOP_BOTTOM))
-# pylint:disable=g-long-lambda
-auto_contrast = TransformT(
-    'AutoContrast',
-    lambda pil_img, level: ImageOps.autocontrast(
-        pil_img.convert('RGB')).convert('RGBA'))
-equalize = TransformT(
-    'Equalize',
-    lambda pil_img, level: ImageOps.equalize(
-        pil_img.convert('RGB')).convert('RGBA'))
-invert = TransformT(
-    'Invert',
-    lambda pil_img, level: ImageOps.invert(
-        pil_img.convert('RGB')).convert('RGBA'))
-# pylint:enable=g-long-lambda
-blur = TransformT(
-    'Blur', lambda pil_img, level: pil_img.filter(ImageFilter.BLUR))
-smooth = TransformT(
-    'Smooth',
-    lambda pil_img, level: pil_img.filter(ImageFilter.SMOOTH))
-
-
-def _rotate_impl(pil_img, level):
-  """Rotates `pil_img` from -30 to 30 degrees depending on `level`."""
-  degrees = int_parameter(level, 30)
-  if random.random() > 0.5:
-    degrees = -degrees
-  return pil_img.rotate(degrees)
-
-
-rotate = TransformT('Rotate', _rotate_impl)
-
-
-def _posterize_impl(pil_img, level):
-  """Applies PIL Posterize to `pil_img`."""
-  level = int_parameter(level, 4)
-  return ImageOps.posterize(pil_img.convert('RGB'), 4 - level).convert('RGBA')
-
-
-posterize = TransformT('Posterize', _posterize_impl)
-
-
-def _shear_x_impl(pil_img, level):
-  """Applies PIL ShearX to `pil_img`.
-
-  The ShearX operation shears the image along the horizontal axis with `level`
-  magnitude.
-
-  Args:
-    pil_img: Image in PIL object.
-    level: Strength of the operation specified as an Integer from
-      [0, `PARAMETER_MAX`].
-
-  Returns:
-    A PIL Image that has had ShearX applied to it.
-  """
-  level = float_parameter(level, 0.3)
-  if random.random() > 0.5:
-    level = -level
-  return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, level, 0, 0, 1, 0))
-
-
-shear_x = TransformT('ShearX', _shear_x_impl)
-
-
-def _shear_y_impl(pil_img, level):
-  """Applies PIL ShearY to `pil_img`.
-
-  The ShearY operation shears the image along the vertical axis with `level`
-  magnitude.
-
-  Args:
-    pil_img: Image in PIL object.
-    level: Strength of the operation specified as an Integer from
-      [0, `PARAMETER_MAX`].
-
-  Returns:
-    A PIL Image that has had ShearX applied to it.
-  """
-  level = float_parameter(level, 0.3)
-  if random.random() > 0.5:
-    level = -level
-  return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, level, 1, 0))
-
-
-shear_y = TransformT('ShearY', _shear_y_impl)
-
-
-def _translate_x_impl(pil_img, level):
-  """Applies PIL TranslateX to `pil_img`.
-
-  Translate the image in the horizontal direction by `level`
-  number of pixels.
-
-  Args:
-    pil_img: Image in PIL object.
-    level: Strength of the operation specified as an Integer from
-      [0, `PARAMETER_MAX`].
-
-  Returns:
-    A PIL Image that has had TranslateX applied to it.
-  """
-  level = int_parameter(level, 10)
-  if random.random() > 0.5:
-    level = -level
-  return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, level, 0, 1, 0))
-
-
-translate_x = TransformT('TranslateX', _translate_x_impl)
-
-
-def _translate_y_impl(pil_img, level):
-  """Applies PIL TranslateY to `pil_img`.
-
-  Translate the image in the vertical direction by `level`
-  number of pixels.
-
-  Args:
-    pil_img: Image in PIL object.
-    level: Strength of the operation specified as an Integer from
-      [0, `PARAMETER_MAX`].
-
-  Returns:
-    A PIL Image that has had TranslateY applied to it.
-  """
-  level = int_parameter(level, 10)
-  if random.random() > 0.5:
-    level = -level
-  return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, 0, 1, level))
-
-
-translate_y = TransformT('TranslateY', _translate_y_impl)
-
-
-def _crop_impl(pil_img, level, interpolation=Image.BILINEAR):
-  """Applies a crop to `pil_img` with the size depending on the `level`."""
-  cropped = pil_img.crop((level, level, IMAGE_SIZE - level, IMAGE_SIZE - level))
-  resized = cropped.resize((IMAGE_SIZE, IMAGE_SIZE), interpolation)
-  return resized
-
-
-crop_bilinear = TransformT('CropBilinear', _crop_impl)
-
-
-def _solarize_impl(pil_img, level):
-  """Applies PIL Solarize to `pil_img`.
-
-  Translate the image in the vertical direction by `level`
-  number of pixels.
-
-  Args:
-    pil_img: Image in PIL object.
-    level: Strength of the operation specified as an Integer from
-      [0, `PARAMETER_MAX`].
-
-  Returns:
-    A PIL Image that has had Solarize applied to it.
-  """
-  level = int_parameter(level, 256)
-  return ImageOps.solarize(pil_img.convert('RGB'), 256 - level).convert('RGBA')
-
-
-solarize = TransformT('Solarize', _solarize_impl)
-
-
-def _cutout_pil_impl(pil_img, level):
-  """Apply cutout to pil_img at the specified level."""
-  size = int_parameter(level, 20)
-  if size <= 0:
-    return pil_img
-  img_height, img_width, num_channels = (IMAGE_SIZE, IMAGE_SIZE, 3)
-  _, upper_coord, lower_coord = (
-      create_cutout_mask(img_height, img_width, num_channels, size))
-  pixels = pil_img.load()  # create the pixel map
-  for i in range(upper_coord[0], lower_coord[0]):  # for every col:
-    for j in range(upper_coord[1], lower_coord[1]):  # For every row
-      pixels[i, j] = (125, 122, 113, 0)  # set the colour accordingly
-  return pil_img
-
-cutout = TransformT('Cutout', _cutout_pil_impl)
-
-
-def _enhancer_impl(enhancer):
-  """Sets level to be between 0.1 and 1.8 for ImageEnhance transforms of PIL."""
-  def impl(pil_img, level):
-    v = float_parameter(level, 1.8) + .1  # going to 0 just destroys it
-    return enhancer(pil_img).enhance(v)
-  return impl
-
-
-color = TransformT('Color', _enhancer_impl(ImageEnhance.Color))
-contrast = TransformT('Contrast', _enhancer_impl(ImageEnhance.Contrast))
-brightness = TransformT('Brightness', _enhancer_impl(
-    ImageEnhance.Brightness))
-sharpness = TransformT('Sharpness', _enhancer_impl(ImageEnhance.Sharpness))
-
-ALL_TRANSFORMS = [
-    flip_lr,
-    flip_ud,
-    auto_contrast,
-    equalize,
-    invert,
-    rotate,
-    posterize,
-    crop_bilinear,
-    solarize,
-    color,
-    contrast,
-    brightness,
-    sharpness,
-    shear_x,
-    shear_y,
-    translate_x,
-    translate_y,
-    cutout,
-    blur,
-    smooth
-]
-
-NAME_TO_TRANSFORM = {t.name: t for t in ALL_TRANSFORMS}
-TRANSFORM_NAMES = NAME_TO_TRANSFORM.keys()
--- a/Old/FAR-HO/blue_utils.py
+++ b/Old/FAR-HO/blue_utils.py
@ -1,131 +0,0 @@
-import matplotlib.pyplot as plt
-from far_ho.examples.datasets import Datasets, Dataset
-
-import os
-import numpy as np
-import tensorflow as tf
-
-import augmentation_transforms as augmentation_transforms ##### ATTENTION FICHIER EN DOUBLE => A REGLER MIEUX ####
-
-def viz_data(dataset, fig_name='data_sample',aug_policy=None):
-
-    plt.figure(figsize=(10,10))
-    for i in range(25):
-        plt.subplot(5,5,i+1)
-        plt.xticks([])
-        plt.yticks([])
-        plt.grid(False)
-
-        img = dataset.data[i][:,:,0]
-        if aug_policy :
-            img = augment_img(img,aug_policy)
-        #print('im shape',img.shape)
-        plt.imshow(img, cmap=plt.cm.binary)
-        plt.xlabel(np.nonzero(dataset.target[i])[0].item())
-
-    plt.savefig(fig_name)
-
-def augment_img(data, policy):
-
-    #print('Im shape',data.shape)
-    data = np.stack((data,)*3, axis=-1) #BOF BOF juste pour forcer 3 channels
-    #print('Im shape',data.shape)
-    final_img = augmentation_transforms.apply_policy(policy, data)
-    #final_img = augmentation_transforms.random_flip(augmentation_transforms.zero_pad_and_crop(final_img, 4))
-    # Apply cutout
-    #final_img = augmentation_transforms.cutout_numpy(final_img)
-    
-    im_rgb = np.array(final_img, np.float32)
-    im_gray = np.dot(im_rgb[...,:3], [0.2989, 0.5870, 0.1140]) #Just pour retourner a 1 channel
-
-    return im_gray
-
-
-### https://www.kaggle.com/raoulma/mnist-image-class-tensorflow-cnn-99-51-test-acc#5.-Build-the-neural-network-with-tensorflow-
-## build the neural network class
-# weight initialization
-def weight_variable(shape, name = None):
-    initial = tf.truncated_normal(shape, stddev=0.1)
-    return tf.Variable(initial, name = name)
-
-# bias initialization
-def bias_variable(shape, name = None):
-    initial = tf.constant(0.1, shape=shape) #  positive bias
-    return tf.Variable(initial, name = name)
-
-# 2D convolution
-def conv2d(x, W, name = None):
-    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name = name)
-
-# max pooling
-def max_pool_2x2(x, name = None):
-    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
-                              padding='SAME', name = name)
-
-def cnn(x_data_tf,y_data_tf, name='model'):
-     # tunable hyperparameters for nn architecture
-     s_f_conv1 = 3; # filter size of first convolution layer (default = 3)
-     n_f_conv1 = 36; # number of features of first convolution layer (default = 36)
-     s_f_conv2 = 3; # filter size of second convolution layer (default = 3)
-     n_f_conv2 = 36; # number of features of second convolution layer (default = 36)
-     s_f_conv3 = 3; # filter size of third convolution layer (default = 3)
-     n_f_conv3 = 36; # number of features of third convolution layer (default = 36)
-     n_n_fc1 = 576; # number of neurons of first fully connected layer (default = 576)
-
-     # 1.layer: convolution + max pooling
-     W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, 1, n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
-     b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
-     h_conv1_tf = tf.nn.relu(conv2d(x_data_tf, 
-                                                 W_conv1_tf) + b_conv1_tf, 
-                                     name = 'h_conv1_tf') # (.,28,28,32)
-     h_pool1_tf = max_pool_2x2(h_conv1_tf, 
-                                            name = 'h_pool1_tf') # (.,14,14,32)
-
-     # 2.layer: convolution + max pooling
-     W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2, 
-                                                n_f_conv1, n_f_conv2], 
-                                               name = 'W_conv2_tf')
-     b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
-     h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf, 
-                                                 W_conv2_tf) + b_conv2_tf, 
-                                     name ='h_conv2_tf') #(.,14,14,32)
-     h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
-
-     # 3.layer: convolution + max pooling
-     W_conv3_tf = weight_variable([s_f_conv3, s_f_conv3, 
-                                                n_f_conv2, n_f_conv3], 
-                                               name = 'W_conv3_tf')
-     b_conv3_tf = bias_variable([n_f_conv3], name = 'b_conv3_tf')
-     h_conv3_tf = tf.nn.relu(conv2d(h_pool2_tf, 
-                                                 W_conv3_tf) + b_conv3_tf, 
-                                     name = 'h_conv3_tf') #(.,7,7,32)
-     h_pool3_tf = max_pool_2x2(h_conv3_tf, 
-                                            name = 'h_pool3_tf') # (.,4,4,32)
-
-     # 4.layer: fully connected
-     W_fc1_tf = weight_variable([4*4*n_f_conv3,n_n_fc1], 
-                                             name = 'W_fc1_tf') # (4*4*32, 1024)
-     b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
-     h_pool3_flat_tf = tf.reshape(h_pool3_tf, [-1,4*4*n_f_conv3], 
-                                          name = 'h_pool3_flat_tf') # (.,1024)
-     h_fc1_tf = tf.nn.relu(tf.matmul(h_pool3_flat_tf, 
-                                             W_fc1_tf) + b_fc1_tf, 
-                                   name = 'h_fc1_tf') # (.,1024)
-      
-     # add dropout
-     #keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
-     #h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
-
-     # 5.layer: fully connected
-     W_fc2_tf = weight_variable([n_n_fc1, 10], name = 'W_fc2_tf')
-     b_fc2_tf = bias_variable([10], name = 'b_fc2_tf')
-     z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf), 
-                                b_fc2_tf, name = 'z_pred_tf')# => (.,10)
-     # predicted probabilities in one-hot encoding
-     y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf') 
-        
-     # tensor of correct predictions
-     y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
-                                          tf.argmax(y_data_tf, 1),
-                                          name = 'y_pred_correct_tf')  
-     return y_pred_proba_tf
--- a/Old/FAR-HO/far_pba_cifar.py
+++ b/Old/FAR-HO/far_pba_cifar.py
@ -1,166 +0,0 @@
-#https://github.com/arcelien/pba/blob/master/autoaugment/train_cifar.py
-from __future__ import absolute_import, print_function, division
-
-import os
-import numpy as np
-import tensorflow as tf
-#import tensorflow.contrib.layers as layers
-import far_ho as far
-import far_ho.examples as far_ex
-#import pprint
-
-import autoaugment.augmentation_transforms as augmentation_transforms
-#import autoaugment.policies as found_policies
-from autoaugment.wrn import build_wrn_model
-
-
-def build_model(inputs, num_classes, is_training, hparams):
-  """Constructs the vision model being trained/evaled.
-  Args:
-    inputs: input features/images being fed to the image model build built.
-    num_classes: number of output classes being predicted.
-    is_training: is the model training or not.
-    hparams: additional hyperparameters associated with the image model.
-  Returns:
-    The logits of the image model.
-  """
-  scopes = setup_arg_scopes(is_training)
-  with contextlib.nested(*scopes):
-    if hparams.model_name == 'pyramid_net':
-      logits = build_shake_drop_model(
-          inputs, num_classes, is_training)
-    elif hparams.model_name == 'wrn':
-      logits = build_wrn_model(
-          inputs, num_classes, hparams.wrn_size)
-    elif hparams.model_name == 'shake_shake':
-      logits = build_shake_shake_model(
-          inputs, num_classes, hparams, is_training)
-  return logits
-
-
-class CifarModel(object):
-  """Builds an image model for Cifar10/Cifar100."""
-
-  def __init__(self, hparams):
-    self.hparams = hparams
-
-  def build(self, mode):
-    """Construct the cifar model."""
-    assert mode in ['train', 'eval']
-    self.mode = mode
-    self._setup_misc(mode)
-    self._setup_images_and_labels()
-    self._build_graph(self.images, self.labels, mode)
-
-    self.init = tf.group(tf.global_variables_initializer(),
-                         tf.local_variables_initializer())
-
-  def _setup_misc(self, mode):
-    """Sets up miscellaneous in the cifar model constructor."""
-    self.lr_rate_ph = tf.Variable(0.0, name='lrn_rate', trainable=False)
-    self.reuse = None if (mode == 'train') else True
-    self.batch_size = self.hparams.batch_size
-    if mode == 'eval':
-      self.batch_size = 25
-
-  def _setup_images_and_labels(self):
-    """Sets up image and label placeholders for the cifar model."""
-    if FLAGS.dataset == 'cifar10':
-      self.num_classes = 10
-    else:
-      self.num_classes = 100
-    self.images = tf.placeholder(tf.float32, [self.batch_size, 32, 32, 3])
-    self.labels = tf.placeholder(tf.float32,
-                                 [self.batch_size, self.num_classes])
-
-  def assign_epoch(self, session, epoch_value):
-    session.run(self._epoch_update, feed_dict={self._new_epoch: epoch_value})
-
-  def _build_graph(self, images, labels, mode):
-    """Constructs the TF graph for the cifar model.
-    Args:
-      images: A 4-D image Tensor
-      labels: A 2-D labels Tensor.
-      mode: string indicating training mode ( e.g., 'train', 'valid', 'test').
-    """
-    is_training = 'train' in mode
-    if is_training:
-      self.global_step = tf.train.get_or_create_global_step()
-
-    logits = build_model(
-        images,
-        self.num_classes,
-        is_training,
-        self.hparams)
-    self.predictions, self.cost = helper_utils.setup_loss(
-        logits, labels)
-    self.accuracy, self.eval_op = tf.metrics.accuracy(
-        tf.argmax(labels, 1), tf.argmax(self.predictions, 1))
-    self._calc_num_trainable_params()
-
-    # Adds L2 weight decay to the cost
-    self.cost = helper_utils.decay_weights(self.cost,
-                                           self.hparams.weight_decay_rate)
- #### Attention: differe implem originale
-
-    self.init = tf.group(tf.global_variables_initializer(),
-                         tf.local_variables_initializer())
-
-
-########################################################
-
-######## PBA ############
-
-#Parallele Cifar model trainer
-tf.flags.DEFINE_string('model_name', 'wrn',
-                       'wrn, shake_shake_32, shake_shake_96, shake_shake_112, '
-                       'pyramid_net')
-tf.flags.DEFINE_string('checkpoint_dir', '/tmp/training', 'Training Directory.')
-tf.flags.DEFINE_string('data_path', '/tmp/data',
-                       'Directory where dataset is located.')
-tf.flags.DEFINE_string('dataset', 'cifar10',
-                       'Dataset to train with. Either cifar10 or cifar100')
-tf.flags.DEFINE_integer('use_cpu', 1, '1 if use CPU, else GPU.')
-## ???
-
-FLAGS = tf.flags.FLAGS
-FLAGS.dataset
-FLAGS.data_path
-FLAGS.model_name = 'wrn'
-
-hparams = tf.contrib.training.HParams(
-      train_size=50000,
-      validation_size=0,
-      eval_test=1,
-      dataset=FLAGS.dataset,
-      data_path=FLAGS.data_path,
-      batch_size=128,
-      gradient_clipping_by_global_norm=5.0)
-  if FLAGS.model_name == 'wrn':
-    hparams.add_hparam('model_name', 'wrn')
-    hparams.add_hparam('num_epochs', 200)
-    hparams.add_hparam('wrn_size', 160)
-    hparams.add_hparam('lr', 0.1)
-    hparams.add_hparam('weight_decay_rate', 5e-4)
-
-data_loader = data_utils.DataSet(hparams)
-data_loader.reset()
-
-with tf.Graph().as_default(): #, tf.device('/cpu:0' if FLAGS.use_cpu else '/gpu:0'):
-"""Builds the image models for train and eval."""
-    # Determine if we should build the train and eval model. When using
-    # distributed training we only want to build one or the other and not both.
-    with tf.variable_scope('model', use_resource=False):
-      m = CifarModel(self.hparams)
-      m.build('train')
-      #self._num_trainable_params = m.num_trainable_params
-      #self._saver = m.saver
-    #with tf.variable_scope('model', reuse=True, use_resource=False):
-    #  meval = CifarModel(self.hparams)
-    #  meval.build('eval')
-
-
-##### FAR-HO ####
-for _ in range(n_hyper_iterations):
-
-
--- a/Old/FAR-HO/test.py
+++ b/Old/FAR-HO/test.py
@ -1,92 +0,0 @@
-import os
-import numpy as np
-import tensorflow as tf
-import tensorflow.contrib.layers as layers
-import far_ho as far
-import far_ho.examples as far_ex
-import matplotlib.pyplot as plt
-
-sess = tf.InteractiveSession()
-
-
-def get_data():
-    # load a small portion of mnist data
-    datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=(.1, .1,))
-    return datasets.train, datasets.validation
-
-
-def g_logits(x,y):
-    with tf.variable_scope('model'):
-        h1 = layers.fully_connected(x, 300)
-        logits = layers.fully_connected(h1, int(y.shape[1]))
-    return logits
-
-
-x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
-y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
-logits = g_logits(x,y)
-train_set, validation_set = get_data()
-
-lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
-lr = far.get_hyperparameter('lr', initializer=0.01)
-
-ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
-L = tf.reduce_mean(tf.sigmoid(lambdas)*ce)
-E = tf.reduce_mean(ce)
-
-accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
-
-inner_optimizer = far.GradientDescentOptimizer(lr)
-outer_optimizer = tf.train.AdamOptimizer()
-rev_it =10
-hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
-hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
-
-T = 20  # Number of inner iterations
-train_set_supplier = train_set.create_supplier(x, y)
-validation_set_supplier = validation_set.create_supplier(x, y)
-tf.global_variables_initializer().run()
-
-print('inner:', L.eval(train_set_supplier()))
-print('outer:', E.eval(validation_set_supplier()))
-# print('-'*50)
-n_hyper_iterations = 200
-inner_losses = []
-outer_losses = []
-train_accs = []
-val_accs = []
-
-for _ in range(n_hyper_iterations):
-    hyper_step(T,
-               inner_objective_feed_dicts=train_set_supplier,
-               outer_objective_feed_dicts=validation_set_supplier)
-
-    inner_obj = L.eval(train_set_supplier())
-    outer_obj = E.eval(validation_set_supplier())
-    inner_losses.append(inner_obj)
-    outer_losses.append(outer_obj)
-    print('inner:', inner_obj)
-    print('outer:', outer_obj)
-
-    train_acc = accuracy.eval(train_set_supplier())
-    val_acc = accuracy.eval(validation_set_supplier())
-    train_accs.append(train_acc)
-    val_accs.append(val_acc)
-    print('training accuracy', train_acc)
-    print('validation accuracy', val_acc)
-
-    print('learning rate', lr.eval())
-    print('norm of examples weight', tf.norm(lambdas).eval())
-    print('-'*50)
-    
-plt.subplot(211)
-plt.plot(inner_losses, label='training loss')
-plt.plot(outer_losses, label='validation loss')
-plt.legend(loc=0, frameon=True)
-#plt.xlim(0, 19)
-plt.subplot(212)
-plt.plot(train_accs, label='training accuracy')
-plt.plot(val_accs, label='validation accuracy')
-plt.legend(loc=0, frameon=True)
-
-plt.savefig('H%d - I%d - R%d'%(n_hyper_iterations,T,rev_it))
--- a/Old/FAR-HO/test_cnn.py
+++ b/Old/FAR-HO/test_cnn.py
@ -1,126 +0,0 @@
-import warnings
-warnings.filterwarnings("ignore")
-
-import os
-import numpy as np
-import tensorflow as tf
-import tensorflow.contrib.layers as layers
-import far_ho as far
-import far_ho.examples as far_ex
-
-tf.logging.set_verbosity(tf.logging.ERROR)
-
-import matplotlib.pyplot as plt
-import blue_utils as butil
-
-#Reset
-try:
-    sess.close()
-except: pass
-rnd = np.random.RandomState(1)
-tf.reset_default_graph()
-sess = tf.InteractiveSession()
-
-def get_data(data_split):
-    # load a small portion of mnist data
-    datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False)
-    print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target)
-    [print("Nb samples : ", d.num_examples) for d in datasets]
-    return datasets.train, datasets.validation, datasets.test
-
-#Model
-# FC : reshape = True
-def g_logits(x,y, name='model'):
-    with tf.variable_scope(name):
-        h1 = layers.fully_connected(x, 300)
-        logits = layers.fully_connected(h1, int(y.shape[1]))
-    return logits
-
-#### Hyper-parametres ####
-n_hyper_iterations = 500
-T = 20  # Number of inner iterations
-rev_it =10
-hp_lr = 1.e-3
-##########################
-
-#MNIST
-#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
-#y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
-#logits = g_logits(x, y)
-
-#CNN : reshape = False
-x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
-y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
-
-logits = butil.cnn(x,y)
-
-train_set, validation_set, test_set = get_data(data_split=(.05, .05,))
-
-butil.viz_data(train_set)
-print('Data sampled !')
-
-# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
-#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, .1), 1.e-7))
-#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5))
-#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.))
-lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4))
-mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9))
-rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.00001), 0.00001))
-
-ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
-L = tf.reduce_mean(ce) + rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles
-E = tf.reduce_mean(ce)
-
-accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
-
-inner_optimizer = far.MomentumOptimizer(lr, mu)
-outer_optimizer = tf.train.AdamOptimizer(hp_lr)
-hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
-hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
-
-train_set_supplier = train_set.create_supplier(x, y, batch_size=256)  # stochastic GD
-validation_set_supplier = validation_set.create_supplier(x, y)
-
-his_params = []
-
-tf.global_variables_initializer().run()
-
-for hyt in range(n_hyper_iterations):
-    hyper_step(T,
-               inner_objective_feed_dicts=train_set_supplier,
-               outer_objective_feed_dicts=validation_set_supplier)
-    res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()), 
-                                             E.eval(validation_set_supplier()),
-                                             accuracy.eval(train_set_supplier()),
-                                             accuracy.eval(validation_set_supplier())]
-    his_params.append(res)
-
-    print('Hyper-it :',hyt,'/',n_hyper_iterations)
-    print('inner:', L.eval(train_set_supplier()))
-    print('outer:', E.eval(validation_set_supplier()))
-    print('training accuracy:', res[5])
-    print('validation accuracy:', res[6])
-    #print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
-    print('-'*50)
-
-test_set_supplier = test_set.create_supplier(x, y)
-print('Test accuracy:',accuracy.eval(test_set_supplier()))
-
-fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
-ax[0].set_title('Learning rate')
-ax[0].plot([e[0] for e in his_params])
-    
-ax[1].set_title('Momentum factor')
-ax[1].plot([e[1] for e in his_params]) 
-    
-#ax[2].set_title('L2 regulariz.')
-#ax[2].plot([e[2] for e in his_params])
-ax[2].set_title('Tr. and val. acc')
-ax[2].plot([e[5] for e in his_params])
-ax[2].plot([e[6] for e in his_params])
-    
-ax[3].set_title('Tr. and val. errors')
-ax[3].plot([e[3] for e in his_params])
-ax[3].plot([e[4] for e in his_params])  
-
-plt.savefig('res_cnn_H{}_I{}'.format(n_hyper_iterations,T))
--- a/Old/FAR-HO/test_cnn_aug.py
+++ b/Old/FAR-HO/test_cnn_aug.py
@ -1,141 +0,0 @@
-import warnings
-warnings.filterwarnings("ignore")
-
-import os
-import numpy as np
-import tensorflow as tf
-import tensorflow.contrib.layers as layers
-import far_ho as far
-import far_ho.examples as far_ex
-
-tf.logging.set_verbosity(tf.logging.ERROR)
-
-import matplotlib.pyplot as plt
-import blue_utils as butil
-
-#Reset
-try:
-    sess.close()
-except: pass
-rnd = np.random.RandomState(1)
-tf.reset_default_graph()
-sess = tf.InteractiveSession()
-
-def get_data(data_split):
-    # load a small portion of mnist data
-    datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False)
-    print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target)
-    [print("Nb samples : ", d.num_examples) for d in datasets]
-    return datasets.train, datasets.validation, datasets.test
-
-#Model
-# FC : reshape = True
-def g_logits(x,y, name='model'):
-    with tf.variable_scope(name):
-        h1 = layers.fully_connected(x, 300)
-        logits = layers.fully_connected(h1, int(y.shape[1]))
-    return logits
-
-#### Hyper-parametres ####
-n_hyper_iterations = 10
-T = 10 # Number of inner iterations
-rev_it =10
-hp_lr = 0.02
-##########################
-
-#MNIST
-#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
-#y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
-#logits = g_logits(x, y)
-
-#CNN : reshape = False
-x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
-y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
-
-logits = butil.cnn(x,y)
-
-train_set, validation_set, test_set = get_data(data_split=(.1, .1,))
-
-probX = far.get_hyperparameter('probX', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9))
-probY = far.get_hyperparameter('probY', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9))
-
-#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4))
-#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9))
-
-#probX, probY = 0.5, 0.5
-#policy = [('TranslateX', probX, 8), ('TranslateY', probY, 8)]
-policy = [('TranslateX', probX, 8), ('FlipUD', probY, 8)]
-print('Hyp :',far.utils.hyperparameters(scope=None))
-
-#butil.viz_data(train_set, aug_policy= policy)
-#print('Data sampled !')
-
-#Ajout artificiel des transfo a la loss juste pour qu il soit compter dans la dynamique du graph
-probX_loss = tf.sigmoid(probX)
-probY_loss = tf.sigmoid(probY)
-
-ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
-L = tf.reduce_mean(probX_loss*probY_loss*ce)
-E = tf.reduce_mean(ce)
-
-accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
-
-inner_optimizer = far.AdamOptimizer()
-outer_optimizer = tf.train.AdamOptimizer(hp_lr)
-hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
-hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
-
-train_set_supplier = train_set.create_supplier(x, y, batch_size=256, aug_policy=policy)  # stochastic GD
-validation_set_supplier = validation_set.create_supplier(x, y)
-
-#print(train_set.dim_data,validation_set.dim_data)
-
-his_params = []
-
-tf.global_variables_initializer().run()
-
-butil.viz_data(train_set, fig_name= 'Start_sample',aug_policy= policy)
-print('Data sampled !')
-
-for hyt in range(n_hyper_iterations):
-    hyper_step(T,
-               inner_objective_feed_dicts=train_set_supplier,
-               outer_objective_feed_dicts=validation_set_supplier,
-               _skip_hyper_ts=True)
-    res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()), 
-                                             E.eval(validation_set_supplier()),
-                                             accuracy.eval(train_set_supplier()),
-                                             accuracy.eval(validation_set_supplier())]
-    his_params.append(res)
-
-    butil.viz_data(train_set, fig_name= 'Train_sample_{}'.format(hyt),aug_policy= policy)
-    print('Data sampled !')
-
-    print('Hyper-it :',hyt,'/',n_hyper_iterations)
-    print('inner:', L.eval(train_set_supplier()))
-    print('outer:', E.eval(validation_set_supplier()))
-    print('training accuracy:', res[4])
-    print('validation accuracy:', res[5])
-    print('Transformation : ProbX -',res[0],'/ProbY -',res[1])
-    #print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
-    print('-'*50)
-
-test_set_supplier = test_set.create_supplier(x, y)
-print('Test accuracy:',accuracy.eval(test_set_supplier()))
-
-fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
-ax[0].set_title('ProbX')
-ax[0].plot([e[0] for e in his_params])
-    
-ax[1].set_title('ProbY')
-ax[1].plot([e[1] for e in his_params]) 
-    
-ax[2].set_title('Tr. and val. errors')
-ax[2].plot([e[2] for e in his_params])
-ax[2].plot([e[3] for e in his_params])  
-
-ax[3].set_title('Tr. and val. acc')
-ax[3].plot([e[4] for e in his_params])
-ax[3].plot([e[5] for e in his_params])
-
-plt.savefig('res_cnn_aug_H{}_I{}'.format(n_hyper_iterations,T))
--- a/Old/FAR-HO/test_fc.py
+++ b/Old/FAR-HO/test_fc.py
@ -1,133 +0,0 @@
-#https://github.com/lucfra/FAR-HO/blob/master/far_ho/examples/autoMLDemos/Far-HO%20Demo%2C%20AutoML%202018%2C%20ICML%20workshop.ipynb
-import warnings
-warnings.filterwarnings("ignore")
-
-import os
-import numpy as np
-import tensorflow as tf
-import tensorflow.contrib.layers as layers
-import far_ho as far
-import far_ho.examples as far_ex
-
-tf.logging.set_verbosity(tf.logging.ERROR)
-
-import matplotlib.pyplot as plt
-#import blue_utils as butil
-
-#Reset
-try:
-    sess.close()
-except: pass
-rnd = np.random.RandomState(1)
-tf.reset_default_graph()
-sess = tf.InteractiveSession()
-
-def get_data(data_split):
-    # load a small portion of mnist data
-    datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=True)
-    print("Data shape : ", datasets.train.dim_data, " / Label shape : ", datasets.train.dim_target)
-    [print("Nb samples : ", d.num_examples) for d in datasets]
-    return datasets.train, datasets.validation, datasets.test
-
-#Model
-# FC : reshape = True
-def g_logits(x,y, name='model'):
-    with tf.variable_scope(name):
-        h1 = layers.fully_connected(x, 300)
-        logits = layers.fully_connected(h1, int(y.shape[1]))
-    return logits
-
-#### Hyper-parametres ####
-n_hyper_iterations = 90
-T = 20  # Number of inner iterations
-rev_it =10
-hp_lr = 0.1
-epochs =10
-batch_size = 256
-##########################
-
-#MNIST
-x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
-y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
-logits = g_logits(x, y)
-
-#CNN : reshape = False
-#x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
-#y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
-
-#logits = butil.cnn(x,y)
-
-train_set, validation_set, test_set = get_data(data_split=(.6, .3,))
-
-#butil.viz_data(train_set)
-
-# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
-lr = far.get_hyperparameter('lr', initializer=1e-2, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 1.e-7))
-mu = far.get_hyperparameter('mu', initializer=0.95, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5))
-#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.))
-
-
-ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
-L = tf.reduce_mean(ce) #+ rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles
-E = tf.reduce_mean(ce)
-
-accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
-
-inner_optimizer = far.MomentumOptimizer(lr, mu)
-#inner_optimizer = far.GradientDescentOptimizer(lr)
-outer_optimizer = tf.train.AdamOptimizer(hp_lr)
-hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
-hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)#, global_step=tf.train.get_or_create_step())
-
-train_set_supplier = train_set.create_supplier(x, y, batch_size=batch_size)#, epochs=1)  # stochastic GD
-validation_set_supplier = validation_set.create_supplier(x, y)
-
-
-print('Hyper iterations par epochs',int(train_set.num_examples/batch_size*epochs/T))
-
-his_params = []
-
-tf.global_variables_initializer().run()
-
-for hyt in range(n_hyper_iterations):
-    hyper_step(T,
-               inner_objective_feed_dicts=train_set_supplier,
-               outer_objective_feed_dicts=validation_set_supplier,
-               _skip_hyper_ts=False)
-    res = sess.run(far.hyperparameters()) + [0, L.eval(train_set_supplier()), 
-                                             E.eval(validation_set_supplier()),
-                                             accuracy.eval(train_set_supplier()),
-                                             accuracy.eval(validation_set_supplier())]
-
-    his_params.append(res)
-
-    print('Hyper-it :',hyt,'/',n_hyper_iterations)
-    print('inner:', res[3])
-    print('outer:', res[4])
-    print('training accuracy:', res[5])
-    print('validation accuracy:', res[6])
-    #print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
-    print('-'*50)
-
-test_set_supplier = test_set.create_supplier(x, y)
-print('Test accuracy:',accuracy.eval(test_set_supplier()))
-
-fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
-ax[0].set_title('Learning rate')
-ax[0].plot([e[0] for e in his_params])
-    
-ax[1].set_title('Momentum factor')
-ax[1].plot([e[1] for e in his_params]) 
-    
-#ax[2].set_title('L2 regulariz.')
-#ax[2].plot([e[2] for e in his_params])
-ax[2].set_title('Tr. and val. acc')
-ax[2].plot([e[5] for e in his_params])
-ax[2].plot([e[6] for e in his_params])
-    
-ax[3].set_title('Tr. and val. errors')
-ax[3].plot([e[3] for e in his_params])
-ax[3].plot([e[4] for e in his_params])  
-
-plt.savefig('resultats/res_fc_H{}_I{}'.format(n_hyper_iterations,T))
-#plt.savefig('resultats/res_fc_H{}_I{}_noHyp'.format(n_hyper_iterations,T))
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/.gitignore
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/.gitignore
@ -1,5 +0,0 @@
-venv/
-__pycache__
-data/
-log/
-.vscode/
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/20190929-paper.pdf
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/20190929-paper.pdf
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/README.md
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/README.md
@ -1,33 +0,0 @@
-# Gradient Descent: The Ultimate Optimizer
-
-[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black)
-
-| ⚠️ WARNING: THIS IS NOT MY WORK ⚠️ |
-| --- |
-
-This repository contains the paper and code to the paper [Gradient Descent:
-The Ultimate Optimizer](https://arxiv.org/abs/1909.13371).
-
-I couldn't find the code (which is found in the appendix at the end of the
-paper) anywhere on the web. What I present here is the code of the paper with
-instructions on how to set it up.
-
-Getting the code in a runnable state required some fixes on my part so the
-code might be slightly different than that presented in the paper.
-
-## Set up 
-
-```sh
-git clone https://github.com/Rainymood/Gradient-Descent-The-Ultimate-Optimizer 
-cd Gradient-Descent-The-Ultimate-Optimizer
-virtualenv -p python3 venv
-source venv/bin/activate
-pip install -r requirements.txt
-python main.py
-```
-
-When you are done you can exit the virtualenv with 
-
-```shell
-deactivate
-```
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/data_aug.py
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/data_aug.py
@ -1,244 +0,0 @@
-from hyperopt import *
-#from hyperopt_v2 import *
-
-import torchvision.transforms.functional as TF
-import torchvision.transforms as T
-
-#from scipy import ndimage
-import kornia
-
-import random
-
-
-class MNIST_FullyConnected_Augmented(Optimizable):
-    """
-    A fully-connected NN for the MNIST task. This is Optimizable but not itself
-    an optimizer.
-    """
-
-    def __init__(self, num_inp, num_hid, num_out, optimizer, device = torch.device('cuda')):
-        self.device = device
-        #print(self.device)
-        parameters = {
-            "w1": torch.zeros(num_inp, num_hid, device=self.device).t(),
-            "b1": torch.zeros(num_hid, device=self.device).t(),
-            "w2": torch.zeros(num_hid, num_out, device=self.device).t(),
-            "b2": torch.zeros(num_out, device=self.device).t(),
-
-            #Data augmentation
-            "prob": torch.tensor(0.5, device=self.device),
-            "mag": torch.tensor(180.0, device=self.device),
-        }
-        super().__init__(parameters, optimizer)
-
-    def initialize(self):
-        nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
-        nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
-        self.optimizer.initialize()
-        #print(self.device)
-
-    def forward(self, x):
-        """Compute a prediction."""
-        #print("Prob:",self.parameters["prob"].item())
-        if random.random() < self.parameters["prob"]:
-            #angle = 45
-            #x = TF.rotate(x, angle)
-            #print(self.device)
-            #x = F.linear(x, torch.ones(28*28, 28*28, device=self.device).t()*self.parameters["mag"], bias=None)
-            x = x + self.parameters["mag"]
-
-        x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
-        x = torch.tanh(x)
-        x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
-        x = torch.tanh(x)
-        x = F.log_softmax(x, dim=1)
-        return x
-
-    def adjust(self):
-        self.optimizer.adjust(self.parameters)
-
-    def __str__(self):
-        return "mnist_FC_augmented / " + str(self.optimizer)
-
-class LeNet(Optimizable, nn.Module):
-    def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
-        nn.Module.__init__(self)
-        self.device = device
-        parameters = {
-            "w1": torch.zeros(20, num_inp, 5, 5, device=self.device),
-            "b1": torch.zeros(20, device=self.device),
-            "w2": torch.zeros(50, 20, 5, 5, device=self.device),
-            "b2": torch.zeros(50, device=self.device),
-            "w3": torch.zeros(500,4*4*50, device=self.device),
-            "b3": torch.zeros(500, device=self.device),
-            "w4": torch.zeros(10, 500, device=self.device),
-            "b4": torch.zeros(10, device=self.device),
-
-            #Data augmentation
-            "prob": torch.tensor(1.0, device=self.device),
-            "mag": torch.tensor(180.0, device=self.device),
-        }
-        super().__init__(parameters, optimizer)
-
-    def initialize(self):
-        nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
-        nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
-        nn.init.kaiming_uniform_(self.parameters["w3"], a=math.sqrt(5))
-        nn.init.kaiming_uniform_(self.parameters["w4"], a=math.sqrt(5))
-        self.optimizer.initialize()
-
-    def forward(self, x):
-
-        if random.random() < self.parameters["prob"]:
-            
-            batch_size = x.shape[0]
-            # create transformation (rotation)
-            alpha = self.parameters["mag"] # in degrees
-            angle = torch.ones(batch_size, device=self.device) * alpha
-
-            # define the rotation center
-            center = torch.ones(batch_size, 2, device=self.device)
-            center[..., 0] = x.shape[3] / 2  # x
-            center[..., 1] = x.shape[2] / 2  # y
-
-            #print(x.shape, center)
-            # define the scale factor
-            scale = torch.ones(batch_size, device=self.device)
-
-            # compute the transformation matrix
-            M = kornia.get_rotation_matrix2d(center, angle, scale)
-
-            # apply the transformation to original image
-            x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
-
-        #print("Start Shape ", x.shape)
-        out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
-        #print("Shape ", out.shape)
-        out = F.max_pool2d(out, 2)
-        #print("Shape ", out.shape)
-        out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
-        #print("Shape ", out.shape)
-        out = F.max_pool2d(out, 2)
-        #print("Shape ", out.shape)
-        out = out.view(out.size(0), -1)
-        #print("Shape ", out.shape)
-        out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
-        #print("Shape ", out.shape)
-        out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
-        #print("Shape ", out.shape)
-        return F.log_softmax(out, dim=1)
-
-    def adjust(self):
-        self.optimizer.adjust(self.parameters)
-
-    def __str__(self):
-        return "mnist_CNN_augmented / " + str(self.optimizer)
-
-class LeNet_v2(Optimizable, nn.Module):
-    def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
-        
-        nn.Module.__init__(self)
-        self.device = device
-        self.conv1 = nn.Conv2d(num_inp, 20, 5, 1)
-        self.conv2 = nn.Conv2d(20, 50, 5, 1)
-        #self.fc1 = nn.Linear(4*4*50, 500)
-        self.fc1 = nn.Linear(1250, 500)
-        self.fc2 = nn.Linear(500, 10)
-
-        #print(self.conv1.weight)
-        parameters = {
-            "w1": self.conv1.weight,
-            "b1": self.conv1.bias,
-            "w2": self.conv2.weight,
-            "b2": self.conv2.bias,
-            "w3": self.fc1.weight,
-            "b3": self.fc1.bias,
-            "w4": self.fc2.weight,
-            "b4": self.fc2.bias,
-
-            #Data augmentation
-            "prob": torch.tensor(0.5, device=self.device),
-            "mag": torch.tensor(1.0, device=self.device),
-        }
-        Optimizable.__init__(self, parameters, optimizer)
-
-    '''
-    def forward(self, x): #Sature la memoire ???
-        x = F.relu(self.conv1(x))
-        x = F.max_pool2d(x, 2, 2)
-        x = F.relu(self.conv2(x))
-        x = F.max_pool2d(x, 2, 2)
-        #x = x.view(-1, 4*4*50)
-        x = x.view(x.size(0), -1)
-        x = F.relu(self.fc1(x))
-        x = self.fc2(x)
-        return F.log_softmax(x, dim=1)
-    '''
-    def forward(self, x):
-
-        if random.random() < self.parameters["prob"].item():
-            #print(self.parameters["prob"])
-            #x = [T.ToTensor()(
-            #        TF.affine(img=T.ToPILImage()(im), angle=self.parameters["mag"], translate=(0,0), scale=1, shear=0, resample=0, fillcolor=None))
-            #    for im in torch.unbind(x,dim=0)]
-            #x = torch.stack(x,dim=0)
-
-            #x = [ndimage.rotate(im, self.parameters["mag"], reshape=False)
-            #    for im in torch.unbind(x,dim=0)]
-            #x = torch.stack(x,dim=0)
-
-            #x = [im + self.parameters["mag"]
-            #    for im in torch.unbind(x,dim=0)]
-            #x = torch.stack(x,dim=0)
-            
-            batch_size = x.shape[0]
-            # create transformation (rotation)
-            alpha = self.parameters["mag"] * 180 # in degrees
-            angle = torch.ones(batch_size, device=self.device) * alpha
-
-            # define the rotation center
-            center = torch.ones(batch_size, 2, device=self.device)
-            center[..., 0] = x.shape[3] / 2  # x
-            center[..., 1] = x.shape[2] / 2  # y
-
-            #print(x.shape, center)
-            # define the scale factor
-            scale = torch.ones(batch_size, device=self.device)
-
-            # compute the transformation matrix
-            M = kornia.get_rotation_matrix2d(center, angle, scale)
-
-            # apply the transformation to original image
-            x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
-
-        #print("Start Shape ", x.shape)
-        out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
-        #print("Shape ", out.shape)
-        out = F.max_pool2d(out, 2)
-        #print("Shape ", out.shape)
-        out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
-        #print("Shape ", out.shape)
-        out = F.max_pool2d(out, 2)
-        #print("Shape ", out.shape)
-        out = out.view(out.size(0), -1)
-        #print("Shape ", out.shape)
-        out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
-        #print("Shape ", out.shape)
-        out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
-        #print("Shape ", out.shape)
-        return F.log_softmax(out, dim=1)
-    
-    def initialize(self):
-        self.optimizer.initialize()
-
-    def adjust(self):
-        self.optimizer.adjust(self.parameters)
-
-    def adjust_val(self):
-        self.optimizer.adjust_val(self.parameters)
-
-    def eval(self):
-        self.parameters['prob']=torch.tensor(0.0, device=self.device)
-
-    def __str__(self):
-        return "mnist_CNN_augmented / " + str(self.optimizer)
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug.py
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug.py
@ -1,52 +0,0 @@
-import torch
-from torch.utils.data import Dataset, DataLoader
-from torchvision import transforms
-import torchvision.transforms.functional as TF
-
-class MNIST_aug(Dataset):
-
-    training_file = 'training.pt'
-    test_file = 'test.pt'
-    classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four',
-               '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
-               
-    def __init__(self):
-        self.images = [TF.to_pil_image(x) for x in torch.ByteTensor(10, 3, 48, 48)]
-        self.set_stage(0) # initial stage
-        
-    def __getitem__(self, index):
-        image = self.images[index]
-        
-        # Just apply your transformations here
-        image = self.crop(image)
-        x = TF.to_tensor(image)
-        return x
-        
-    def set_stage(self, stage):
-        if stage == 0:
-            print('Using (32, 32) crops')
-            self.crop = transforms.RandomCrop((32, 32))
-        elif stage == 1:
-            print('Using (28, 28) crops')
-            self.crop = transforms.RandomCrop((28, 28))
-        
-    def __len__(self):
-        return len(self.images)
-
-
-dataset = MyData()
-loader = DataLoader(dataset,
-                    batch_size=2,
-                    num_workers=2,
-                    shuffle=True)
-
-for batch_idx, data in enumerate(loader):
-    print('Batch idx {}, data shape {}'.format(
-        batch_idx, data.shape))
-    
-loader.dataset.set_stage(1)
-
-for batch_idx, data in enumerate(loader):
-    print('Batch idx {}, data shape {}'.format(
-        batch_idx, data.shape))
-
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug_v2.py
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug_v2.py
@ -1,150 +0,0 @@
-#from hyperopt import *
-from hyperopt_v2 import *
-
-import torchvision.transforms.functional as TF
-import torchvision.transforms as T
-
-#from scipy import ndimage
-import kornia
-
-import random
-
-
-class LeNet_v3(nn.Module):
-    def __init__(self, num_inp, num_out):
-        super(LeNet_v3, self).__init__()
-        self.params = nn.ParameterDict({
-            'w1': nn.Parameter(torch.zeros(20, num_inp, 5, 5)),
-            'b1': nn.Parameter(torch.zeros(20)),
-            'w2': nn.Parameter(torch.zeros(50, 20, 5, 5)),
-            'b2': nn.Parameter(torch.zeros(50)),
-            'w3': nn.Parameter(torch.zeros(500,4*4*50)),
-            'b3': nn.Parameter(torch.zeros(500)),
-            'w4': nn.Parameter(torch.zeros(10, 500)),
-            'b4': nn.Parameter(torch.zeros(10))
-        })
-
-
-    def initialize(self):
-        nn.init.kaiming_uniform_(self.params["w1"], a=math.sqrt(5))
-        nn.init.kaiming_uniform_(self.params["w2"], a=math.sqrt(5))
-        nn.init.kaiming_uniform_(self.params["w3"], a=math.sqrt(5))
-        nn.init.kaiming_uniform_(self.params["w4"], a=math.sqrt(5))
-
-    def forward(self, x):
-        #print("Start Shape ", x.shape)
-        out = F.relu(F.conv2d(input=x, weight=self.params["w1"], bias=self.params["b1"]))
-        #print("Shape ", out.shape)
-        out = F.max_pool2d(out, 2)
-        #print("Shape ", out.shape)
-        out = F.relu(F.conv2d(input=out, weight=self.params["w2"], bias=self.params["b2"]))
-        #print("Shape ", out.shape)
-        out = F.max_pool2d(out, 2)
-        #print("Shape ", out.shape)
-        out = out.view(out.size(0), -1)
-        #print("Shape ", out.shape)
-        out = F.relu(F.linear(out, self.params["w3"], self.params["b3"]))
-        #print("Shape ", out.shape)
-        out = F.linear(out, self.params["w4"], self.params["b4"])
-        #print("Shape ", out.shape)
-        return F.log_softmax(out, dim=1)
-
-
-    def print_grad_fn(self):
-        for n, p in self.params.items():
-            print(n, p.grad_fn)
-
-    def __str__(self):
-        return "mnist_CNN_augmented / "
-
-class Data_aug(nn.Module):
-    def __init__(self):
-        super(Data_aug, self).__init__()
-        self.data_augmentation = True
-        self.params = nn.ParameterDict({
-            "prob": nn.Parameter(torch.tensor(0.5)),
-            "mag": nn.Parameter(torch.tensor(180.0))
-        })
-
-        #self.params["mag"].register_hook(print)
-
-    def forward(self, x):
-
-        if self.data_augmentation and self.training and random.random() < self.params["prob"]:
-            #print('Aug')
-            batch_size = x.shape[0]
-            # create transformation (rotation)
-            alpha = self.params["mag"] # in degrees
-            angle = torch.ones(batch_size, device=x.device) * alpha
-
-            # define the rotation center
-            center = torch.ones(batch_size, 2, device=x.device)
-            center[..., 0] = x.shape[3] / 2  # x
-            center[..., 1] = x.shape[2] / 2  # y
-
-            #print(x.shape, center)
-            # define the scale factor
-            scale = torch.ones(batch_size, device=x.device)
-
-            # compute the transformation matrix
-            M = kornia.get_rotation_matrix2d(center, angle, scale)
-
-            # apply the transformation to original image
-            x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
-
-        return x
-
-    def eval(self):
-        self.params['prob']=torch.tensor(0.0, device=self.device)
-        nn.Module.eval(self)
-
-    def data_augmentation(self, mode=True):
-        self.data_augmentation=mode
-
-    def print_grad_fn(self):
-        for n, p in self.params.items():
-            print(n, p.grad_fn)
-
-    def __str__(self):
-        return "Data_Augmenter / "
-
-class Augmented_model(nn.Module):
-    def __init__(self, model, data_augmenter):
-        #self.model = model
-        #self.data_aug = data_augmenter
-        super(Augmented_model, self).__init__()#nn.Module.__init__(self)
-        #super().__init__()
-        self.mods = nn.ModuleDict({
-            'data_aug': data_augmenter,
-            'model': model
-            })
-        #for name, param in self.mods.named_parameters():
-        #    print(name, type(param.data), param.size())
-
-        #params = self.mods.named_parameters() #self.parameters()
-        #parameters = [param for param in self.model.parameters()] + [param for param in self.data_aug.parameters()] 
-        #Optimizable.__init__(self, params, optimizer)
-
-    def initialize(self):
-        self.mods['model'].initialize()
-
-    def forward(self, x):
-        return self.mods['model'](self.mods['data_aug'](x))
-
-    #def adjust(self):
-    #    self.optimizer.adjust(self) #Parametres des dict
-
-    def data_augmentation(self, mode=True):
-        self.mods['data_aug'].data_augmentation=mode
-
-    def begin(self):
-        for param in self.parameters():
-            param.requires_grad_()  # keep gradient information…
-            param.retain_grad()  # even if not a leaf…
-
-    def print_grad_fn(self):
-        for n, m in self.mods.items():
-            m.print_grad_fn()
-
-    def __str__(self):
-        return str(self.mods['data_aug'])+ str(self.mods['model'])# + str(self.optimizer)
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph
@ -1,5 +0,0 @@
-digraph {
-	graph [size="12,12"]
-	node [align=left fontsize=12 height=0.2 ranksep=0.1 shape=box style=filled]
-	94296775052080 [label=NoneType fillcolor=darkolivegreen1]
-}
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph.svg
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph.svg
@ -1,19 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
- "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<!-- Generated by graphviz version 2.40.1 (20161225.0304)
- -->
-<!-- Title: %3 Pages: 1 -->
-<svg width="75pt" height="30pt"
- viewBox="0.00 0.00 74.65 30.40" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 26.4)">
-<title>%3</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-26.4 70.6472,-26.4 70.6472,4 -4,4"/>
-<!-- 94296775052080 -->
-<g id="node1" class="node">
-<title>94296775052080</title>
-<polygon fill="#caff70" stroke="#000000" points="66.4717,-22.6036 .1755,-22.6036 .1755,.2036 66.4717,.2036 66.4717,-22.6036"/>
-<text text-anchor="middle" x="33.3236" y="-7.6" font-family="Times,serif" font-size="12.00" fill="#000000">NoneType</text>
-</g>
-</g>
-</svg>
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt.py
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt.py
@ -1,345 +0,0 @@
-import math
-import torch
-import torchvision
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-
-
-class Optimizable():#nn.Module):
-    """
-    This is the interface for anything that has parameters that need to be
-    optimized, somewhat like torch.nn.Model but with the right plumbing for
-    hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
-    interface which does not give us enough control about the detachments.)
-    Nominal operation of an Optimizable at the lowest level is as follows:
-        o = MyOptimizable(…)
-        o.initialize()
-        loop {
-            o.begin()
-            o.zero_grad()
-            loss = –compute loss function from parameters–
-            loss.backward()
-            o.adjust()
-        }
-    Optimizables recursively handle updates to their optimiz*ers*.
-    """
-    #def __init__(self):
-    #    super(Optimizable, self).__init__()
-    #    self.parameters = nn.Parameter(torch.zeros(()))
-
-    def __init__(self, parameters, optimizer):
-        #super(Optimizable, self).__init__()
-        self.parameters = parameters  # a dict mapping names to tensors
-        self.optimizer = optimizer  # which must itself be Optimizable!
-        self.all_params_with_gradients = []
-        #self.device = device
-
-    def initialize(self):
-        """Initialize parameters, e.g. with a Kaiming initializer."""
-        pass
-
-    def begin(self):
-        """Enable gradient tracking on current parameters."""
-        self.all_params_with_gradients = [] #Reintialisation pour eviter surcharge de la memoire
-        for name, param in self.parameters.items():
-        #for param in self.parameters:
-            param.requires_grad_()  # keep gradient information…
-            param.retain_grad()  # even if not a leaf…
-            #param.to(self.device)
-            #if param.device == torch.device('cuda:0'):
-            #    print(name, param.device)
-            self.all_params_with_gradients.append(param)
-        self.optimizer.begin()
-
-    def zero_grad(self):
-        """ Set all gradients to zero. """
-        for param in self.all_params_with_gradients:
-            #param = param.to(self.device)
-            param.grad = torch.zeros(param.shape, device=param.device)
-        self.optimizer.zero_grad()
-
-    """ Note: at this point you would probably call .backwards() on the loss
-    function. """
-
-    def adjust(self):
-        """ Update parameters """
-        pass
-
-
-    def print_grad_fn(self):
-        self.optimizer.print_grad_fn()
-        for n, p in self.parameters.items():
-                print(n," - ", p.grad_fn)
-
-    def param_grad(self):
-        return self.all_params_with_gradients
-
-    def param(self, param_name):
-        return self.parameters[param_name].item()
-
-
-class MNIST_FullyConnected(Optimizable):
-    """
-    A fully-connected NN for the MNIST task. This is Optimizable but not itself
-    an optimizer.
-    """
-
-    def __init__(self, num_inp, num_hid, num_out, optimizer):
-        parameters = {
-            "w1": torch.zeros(num_inp, num_hid).t(),
-            "b1": torch.zeros(num_hid).t(),
-            "w2": torch.zeros(num_hid, num_out).t(),
-            "b2": torch.zeros(num_out).t(),
-        }
-        super().__init__(parameters, optimizer)
-
-    def initialize(self):
-        nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
-        nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
-        self.optimizer.initialize()
-
-    def forward(self, x):
-        """Compute a prediction."""
-        x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
-        x = torch.tanh(x)
-        x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
-        x = torch.tanh(x)
-        x = F.log_softmax(x, dim=1)
-        return x
-
-    def adjust(self):
-        self.optimizer.adjust(self.parameters)
-
-    def __str__(self):
-        return "mnist / " + str(self.optimizer)
-
-
-class NoOpOptimizer(Optimizable):#, nn.Module):
-    """
-    NoOpOptimizer sits on top of a stack, and does not affect what lies below.
-    """
-
-    def __init__(self):
-        #super(Optimizable, self).__init__()
-        pass
-
-    def initialize(self):
-        pass
-
-    def begin(self):
-        pass
-
-    def zero_grad(self):
-        pass
-
-    def adjust(self, params):
-        pass
-
-    def adjust_val(self, params):
-        pass
-
-    def print_grad_fn(self):
-        pass
-
-    def __str__(self):
-        return "static"
-
-class Adam(Optimizable):
-    """
-    A fully hyperoptimizable Adam optimizer
-    """
-
-    def clamp(x):
-        return (x.tanh() + 1.0) / 2.0
-
-    def unclamp(y):
-        z = y * 2.0 - 1.0
-        return ((1.0 + z) / (1.0 - z)).log() / 2.0
-
-    def __init__(
-        self,
-        alpha=0.001,
-        beta1=0.9,
-        beta2=0.999,
-        log_eps=-8.0,
-        optimizer=NoOpOptimizer(),
-        device = torch.device('cuda')
-    ):
-        self.device = device
-        parameters = {
-            "alpha": torch.tensor(alpha, device=self.device),
-            "beta1": Adam.unclamp(torch.tensor(beta1, device=self.device)),
-            "beta2": Adam.unclamp(torch.tensor(beta2, device=self.device)),
-            "log_eps": torch.tensor(log_eps, device=self.device),
-        }
-        super().__init__(parameters, optimizer)
-        self.num_adjustments = 0
-        self.num_adjustments_val = 0
-        self.cache = {}
-
-        for name, param in parameters.items():
-            param.requires_grad_()  # keep gradient information…
-            param.retain_grad()  # even if not a leaf…
-            #param.to(self.device)
-            #if param.device == torch.device('cuda:0'):
-            #    print(name, param.device)
-
-    def adjust(self, params): #Update param d'apprentissage
-        self.num_adjustments += 1
-        self.optimizer.adjust(self.parameters)
-        #print('Adam update')
-        t = self.num_adjustments
-        beta1 = Adam.clamp(self.parameters["beta1"])
-        beta2 = Adam.clamp(self.parameters["beta2"])
-        for name, param in params.items():
-            if name == "mag": continue
-            if name not in self.cache:
-                self.cache[name] = {
-                    "m": torch.zeros(param.shape, device=self.device),
-                    "v": torch.zeros(param.shape, device=self.device)
-                    + 10.0 ** self.parameters["log_eps"].data
-                    # NOTE that we add a little ‘fudge factor' here because sqrt is not
-                    # differentiable at exactly zero
-                }
-            #print(name, param.device)
-            g = param.grad.detach()
-            self.cache[name]["m"] = m = (
-                beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
-            )
-            self.cache[name]["v"] = v = (
-                beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
-            )
-            self.all_params_with_gradients.append(m)
-            self.all_params_with_gradients.append(v)
-            m_hat = m / (1.0 - beta1 ** float(t))
-            v_hat = v / (1.0 - beta2 ** float(t))
-            dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
-            params[name] = param.detach() - self.parameters["alpha"] * dparam
-            #print(name)
-
-    def adjust_val(self, params): #Update param Transformations
-        self.num_adjustments_val += 1
-        self.optimizer.adjust_val(self.parameters)
-        #print('Adam update')
-        t = self.num_adjustments_val
-        beta1 = Adam.clamp(self.parameters["beta1"])
-        beta2 = Adam.clamp(self.parameters["beta2"])
-        for name, param in params.items():
-            if name != "mag": continue
-            if name not in self.cache:
-                self.cache[name] = {
-                    "m": torch.zeros(param.shape, device=self.device),
-                    "v": torch.zeros(param.shape, device=self.device)
-                    + 10.0 ** self.parameters["log_eps"].data
-                    # NOTE that we add a little ‘fudge factor' here because sqrt is not
-                    # differentiable at exactly zero
-                }
-            #print(name, param.device)
-            g = param.grad.detach()
-            self.cache[name]["m"] = m = (
-                beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
-            )
-            self.cache[name]["v"] = v = (
-                beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
-            )
-            self.all_params_with_gradients.append(m)
-            self.all_params_with_gradients.append(v)
-            m_hat = m / (1.0 - beta1 ** float(t))
-            v_hat = v / (1.0 - beta2 ** float(t))
-            dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
-            params[name] = param.detach() - self.parameters["alpha"] * dparam
-            #print(name)
-
-    def __str__(self):
-        return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
-'''
-class SGD(Optimizable):
-    """
-    A hyperoptimizable SGD
-    """
-
-    def __init__(self, alpha=0.01, optimizer=NoOpOptimizer()):
-        parameters = {"alpha": torch.tensor(alpha)}
-        super().__init__(parameters, optimizer)
-
-    def adjust(self, params):
-        self.optimizer.adjust(self.parameters)
-        for name, param in params.items():
-            g = param.grad.detach()
-            params[name] = param.detach() - g * self.parameters["alpha"]
-
-    def __str__(self):
-        return "sgd(%f) / " % self.parameters["alpha"] + str(self.optimizer)
-        
-class SGDPerParam(Optimizable):
-    """
-    Like above, but can be taught a separate step size for each parameter it
-    tunes.
-    """
-
-    def __init__(self, alpha=0.01, params=[], optimizer=NoOpOptimizer()):
-        parameters = {name + "_alpha": torch.tensor(alpha) for name in params}
-        super().__init__(parameters, optimizer)
-
-    def adjust(self, params):
-        self.optimizer.adjust(self.parameters)
-        for name, param in params.items():
-            g = param.grad.detach()
-            params[name] = param.detach() - g * self.parameters[name + "_alpha"]
-
-    def __str__(self):
-        return "sgd(%s) / " % str(
-            {k: t.item() for k, t in self.parameters.items()}
-        ) + str(self.optimizer)
-'''
-'''
-class AdamBaydin(Optimizable):
-    """ Same as above, but only optimizes the learning rate, treating the
-    remaining hyperparameters as constants. """
-
-    def __init__(
-        self,
-        alpha=0.001,
-        beta1=0.9,
-        beta2=0.999,
-        log_eps=-8.0,
-        optimizer=NoOpOptimizer(),
-    ):
-        parameters = {"alpha": torch.tensor(alpha)}
-        self.beta1 = beta1
-        self.beta2 = beta2
-        self.log_eps = log_eps
-        super().__init__(parameters, optimizer)
-        self.num_adjustments = 0
-        self.cache = {}
-
-    def adjust(self, params):
-        self.num_adjustments += 1
-        self.optimizer.adjust(self.parameters)
-        t = self.num_adjustments
-        beta1 = self.beta1
-        beta2 = self.beta2
-        for name, param in params.items():
-            if name not in self.cache:
-                self.cache[name] = {
-                    "m": torch.zeros(param.shape),
-                    "v": torch.zeros(param.shape) + 10.0 ** self.log_eps,
-                }
-            g = param.grad.detach()
-            self.cache[name]["m"] = m = (
-                beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
-            )
-            self.cache[name]["v"] = v = (
-                beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
-            )
-            self.all_params_with_gradients.append(m)
-            self.all_params_with_gradients.append(v)
-            m_hat = m / (1.0 - beta1 ** float(t))
-            v_hat = v / (1.0 - beta2 ** float(t))
-            dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.log_eps)
-            params[name] = param.detach() - self.parameters["alpha"] * dparam
-
-    def __str__(self):
-        return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
-'''
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt_v2.py
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt_v2.py
@ -1,296 +0,0 @@
-import math
-import torch
-import torchvision
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.optim.optimizer import Optimizer
-
-class Optimizable():
-    """
-    This is the interface for anything that has parameters that need to be
-    optimized, somewhat like torch.nn.Model but with the right plumbing for
-    hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
-    interface which does not give us enough control about the detachments.)
-    Nominal operation of an Optimizable at the lowest level is as follows:
-        o = MyOptimizable(…)
-        o.initialize()
-        loop {
-            o.begin()
-            o.zero_grad()
-            loss = –compute loss function from parameters–
-            loss.backward()
-            o.adjust()
-        }
-    Optimizables recursively handle updates to their optimiz*ers*.
-    """
-    #def __init__(self):
-    #    super(Optimizable, self).__init__()
-    #    self.parameters = nn.Parameter(torch.zeros(()))
-
-    def __init__(self, parameters, optimizer):
-        self.params = parameters  # a dict mapping names to tensors
-        self.optimizer = optimizer  # which must itself be Optimizable!
-        self.all_params_with_gradients = []
-        #self.device = device
-
-    def initialize(self):
-        """Initialize parameters, e.g. with a Kaiming initializer."""
-        pass
-
-    def begin(self):
-        """Enable gradient tracking on current parameters."""
-        self.all_params_with_gradients = nn.ParameterList() #Reintialisation pour eviter surcharge de la memoire
-        print("Opti param :", type(self.params))
-        #for name, param in self.params:
-        if isinstance(self.params,dict): #Dict
-            for name, param in self.params:
-                param.requires_grad_()  # keep gradient information…
-                param.retain_grad()  # even if not a leaf…
-                self.all_params_with_gradients.append(param)
-        if isinstance(self.params,list): #List
-            for param in self.params:
-                param.requires_grad_()  # keep gradient information…
-                param.retain_grad()  # even if not a leaf…
-                self.all_params_with_gradients.append(param)
-        self.optimizer.begin()
-
-    def zero_grad(self):
-        """ Set all gradients to zero. """
-        for param in self.all_params_with_gradients:
-            param.grad = torch.zeros(param.shape, device=param.device)
-        self.optimizer.zero_grad()
-
-    """ Note: at this point you would probably call .backwards() on the loss
-    function. """
-
-    def adjust(self):
-        """ Update parameters """
-        pass
-
-
-class NoOpOptimizer(Optimizable):#, nn.Module):
-    """
-    NoOpOptimizer sits on top of a stack, and does not affect what lies below.
-    """
-
-    def __init__(self):
-        #super(Optimizable, self).__init__()
-        pass
-
-    def initialize(self):
-        pass
-
-    def begin(self):
-        #print("NoOpt begin")
-        pass
-
-    def zero_grad(self):
-        pass
-
-    def adjust(self, params):
-        pass
-
-    def step(self):
-        pass
-
-    def print_grad_fn(self):
-        pass
-        
-    def __str__(self):
-        return "static"
-
-
-class SGD(Optimizer, nn.Module): #Eviter Optimizer
-    """
-    A hyperoptimizable SGD
-    """
-
-    def __init__(self, params, lr=0.01, height=0):
-        self.height=height
-        #params : a optimiser
-        #reste (defaults) param de l'opti
-        print('SGD - H', height)
-        nn.Module.__init__(self)
-
-        optim_keys = ('lr','') #A mettre dans Optimizable ? #'' pour eviter iteration dans la chaine de charactere...
-        '''
-        self_params = {"lr": torch.tensor(lr),
-                        "momentum": 0,
-                        "dampening":0,
-                        "weight_decay":0,
-                        "nesterov": False}
-        '''
-        #self_params = dict(lr=torch.tensor(lr), 
-        #                    momentum=0, dampening=0, weight_decay=0, nesterov=False)
-
-        self_params = nn.ParameterDict({
-            "lr": nn.Parameter(torch.tensor(lr)),
-            "momentum": nn.Parameter(torch.tensor(0.0)),
-            "dampening": nn.Parameter(torch.tensor(0.0)),
-            "weight_decay": nn.Parameter(torch.tensor(0.0)),
-        })
-
-        for k in self_params.keys() & optim_keys:
-            self_params[k].requires_grad_()  # keep gradient information…
-            self_params[k].retain_grad()  # even if not a leaf…
-            #self_params[k].register_hook(print)
-
-        if height==0:
-            optimizer = NoOpOptimizer()
-        else:
-            #def dict_generator(): yield {k: self_params[k] for k in self_params.keys() & optim_keys}
-            #(dict for dict in {k: self_params[k] for k in self_params.keys() & optim_keys}) #Devrait mar
-            optimizer = SGD(params=(self_params[k]for k in self_params.keys() & optim_keys), lr=lr, height=height-1)
-            #optimizer.register_backward_hook(print)
-
-        self.optimizer = optimizer
-        #if(height==0):
-        #    for n,p in params.items():
-        #        print(n,p)
-
-        #Optimizable.__init__(self, self_params, optimizer)
-
-        #print(type(params))
-        #for p in params:
-        #    print(type(p))
-        Optimizer.__init__(self, params, self_params)
-
-        for group in self.param_groups:
-            for p in group['params']:
-                print(type(p.data), p.size())
-        print('End SGD-H', height)  
-
-    def begin(self):
-        for group in self.param_groups:
-            for p in group['params']:
-                #print(type(p.data), p.size())
-                p.requires_grad_()  # keep gradient information…
-                p.retain_grad()  # even if not a leaf…
-                #p.register_hook(lambda x: print(self.height, x.grad_fn))
-
-        self.optimizer.begin()
-
-    def print_grad_fn(self):
-        self.optimizer.print_grad_fn()
-        for group in self.param_groups:
-            for i, p in enumerate(group['params']):
-                print(self.height," - ", i, p.grad_fn)
-
-    #def adjust(self, params):
-    #    self.optimizer.adjust(self.params)
-    #    for name, param in params.items():
-    #        g = param.grad.detach()
-    #        params[name] = param.detach() - g * self.params["lr"]
-
-    def step(self):
-        """Performs a single optimization step.
-
-        Arguments:
-            closure (callable, optional): A closure that reevaluates the model
-                and returns the loss.
-        """
-        print('SGD start')
-        self.optimizer.step()
-
-        for group in self.param_groups:
-            for i, p in enumerate(group['params']):
-                if p.grad is None:
-                    continue
-                #d_p = p.grad.data
-                d_p = p.grad.detach()
-
-                #print(group['lr'])
-                p.data.add_(-group['lr'].item(), d_p)
-                #group['params'][i] = p.detach() - d_p * group['lr']
-                p.data-= group['lr']*d_p #Data ne pas utiliser perte info
-
-            for p in group['params']:
-                if p.grad is None:
-                    print(p, p.grad)
-                    continue
-
-        print("SGD end")
-        #return loss
-
-    def __str__(self):
-        return "sgd(%f) / " % self.params["lr"] + str(self.optimizer)
-
-
-class Adam(Optimizable, nn.Module):
-    """
-    A fully hyperoptimizable Adam optimizer
-    """
-
-    def clamp(x):
-        return (x.tanh() + 1.0) / 2.0
-
-    def unclamp(y):
-        z = y * 2.0 - 1.0
-        return ((1.0 + z) / (1.0 - z)).log() / 2.0
-
-    def __init__(
-        self,
-        alpha=0.001,
-        beta1=0.9,
-        beta2=0.999,
-        log_eps=-8.0,
-        optimizer=NoOpOptimizer(),
-        device = torch.device('cuda')
-    ):
-        #super(Adam, self).__init__()
-        nn.Module.__init__(self)
-        self.device = device
-        params = nn.ParameterDict({
-            "alpha": nn.Parameter(torch.tensor(alpha, device=self.device)),
-            "beta1": nn.Parameter(Adam.unclamp(torch.tensor(beta1, device=self.device))),
-            "beta2": nn.Parameter(Adam.unclamp(torch.tensor(beta2, device=self.device))),
-            "log_eps": nn.Parameter(torch.tensor(log_eps, device=self.device)),
-        })
-        Optimizable.__init__(self, params, optimizer)
-        self.num_adjustments = 0
-        self.cache = {}
-
-        for name, param in params.items():
-            param.requires_grad_()  # keep gradient information…
-            param.retain_grad()  # even if not a leaf…
-
-    def adjust(self, params, pytorch_mod=False):
-        self.num_adjustments += 1
-        self.optimizer.adjust(self.params)
-        t = self.num_adjustments
-        beta1 = Adam.clamp(self.params["beta1"])
-        beta2 = Adam.clamp(self.params["beta2"])
-
-        updated_param = []
-        if pytorch_mod:
-            params = params.named_parameters(prefix='') #Changer nom d'input...
-
-        for name, param in params:
-            if name not in self.cache:
-                self.cache[name] = {
-                    "m": torch.zeros(param.shape, device=self.device),
-                    "v": torch.zeros(param.shape, device=self.device)
-                    + 10.0 ** self.params["log_eps"].data
-                    # NOTE that we add a little ‘fudge factor' here because sqrt is not
-                    # differentiable at exactly zero
-                }
-            #print(name, param.device)
-            g = param.grad.detach()
-            self.cache[name]["m"] = m = (
-                beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
-            )
-            self.cache[name]["v"] = v = (
-                beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
-            )
-            self.all_params_with_gradients.append(nn.Parameter(m)) #Risque de surcharger la memoire => Dict mieux ?
-            self.all_params_with_gradients.append(nn.Parameter(v))
-            m_hat = m / (1.0 - beta1 ** float(t))
-            v_hat = v / (1.0 - beta2 ** float(t))
-            dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.params["log_eps"])
-            updated_param[name] = param.detach() - self.params["alpha"] * dparam
-
-        if pytorch_mod: params.update(updated_param) #Changer nom d'input...
-        else: params = updated_param
-
-    def __str__(self):
-        return "adam(" + str(self.params) + ") / " + str(self.optimizer)
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/main.py
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/main.py
@ -1,182 +0,0 @@
-import numpy as np
-import json, math, time, os
-from hyperopt import *
-import gc
-
-BATCH_SIZE = 300
-
-mnist_train = torchvision.datasets.MNIST(
-    "./data", train=True, download=True, transform=torchvision.transforms.ToTensor()
-)
-
-mnist_test = torchvision.datasets.MNIST(
-    "./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
-)
-
-dl_train = torch.utils.data.DataLoader(
-    mnist_train, batch_size=BATCH_SIZE, shuffle=False
-)
-dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=10000, shuffle=False)
-
-
-def test(model):
-    for i, (features_, labels_) in enumerate(dl_test):
-        features, labels = torch.reshape(features_, (10000, 28 * 28)), labels_
-        pred = model.forward(features)
-        return pred.argmax(dim=1).eq(labels).sum().item() / 10000 * 100
-
-
-def train(model, epochs=3, height=1):
-    stats = []
-    for epoch in range(epochs):
-        for i, (features_, labels_) in enumerate(dl_train):
-            t0 = time.process_time()
-            model.begin()
-            features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
-            pred = model.forward(
-                features
-            )  # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
-            loss = F.nll_loss(pred, labels)
-            model.zero_grad()
-            loss.backward(create_graph=True)
-            model.adjust()
-            tf = time.process_time()
-            data = {
-                "time": tf - t0,
-                "iter": epoch * len(dl_train) + i,
-                "loss": loss.item(),
-                "params": {
-                    k: v.item()
-                    for k, v in model.optimizer.parameters.items()
-                    if "." not in k
-                },
-            }
-            stats.append(data)
-    return stats
-
-
-def run(opt, name="out", usr={}, epochs=3, height=1):
-    torch.manual_seed(0x42)
-    model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
-    print("Running...", str(model))
-    model.initialize()
-    log = train(model, epochs, height)
-    acc = test(model)
-    out = {"acc": acc, "log": log, "usr": usr}
-    with open("log/%s.json" % name, "w+") as f:
-        json.dump(out, f, indent=True)
-    times = [x["time"] for x in log]
-    print("Times (ms):", np.mean(times), "+/-", np.std(times))
-    print("Final accuracy:", acc)
-    return out
-
-
-def sgd_experiments():
-    run(SGD(0.01), "sgd", epochs=1)
-    out = run(SGD(0.01, optimizer=SGD(0.01)), "sgd+sgd", epochs=1)
-    alpha = out["log"][-1]["params"]["alpha"]
-    print(alpha)
-    run(SGD(alpha), "sgd-final", epochs=1)
-
-
-def adam_experiments():
-    run(Adam(), "adam", epochs=1)
-    print()
-    mo = SGDPerParam(
-        0.001, ["alpha", "beta1", "beta2", "log_eps"], optimizer=SGD(0.0001)
-    )
-    out = run(Adam(optimizer=mo), "adam+sgd", epochs=1)
-    p = out["log"][-1]["params"]
-    alpha = p["alpha"]
-    beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
-    beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
-    log_eps = p["log_eps"]
-    print(alpha, beta1, beta2, log_eps)
-    print(mo)
-    run(
-        Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
-        "adam+sgd-final",
-        epochs=1,
-    )
-    print()
-    out = run(Adam(optimizer=Adam()), "adam2", epochs=1)
-    p = out["log"][-1]["params"]
-    alpha = p["alpha"]
-    beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
-    beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
-    log_eps = p["log_eps"]
-    print(alpha, beta1, beta2, log_eps)
-    run(
-        Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
-        "adam2-final",
-        epochs=1,
-    )
-    print()
-    mo = SGDPerParam(0.001, ["alpha"], optimizer=SGD(0.0001))
-    out = run(AdamBaydin(optimizer=mo), "adambaydin+sgd", epochs=1)
-    p = out["log"][-1]["params"]
-    alpha = p["alpha"]
-    print(alpha)
-    print(mo)
-    run(Adam(alpha=p["alpha"]), "adambaydin+sgd-final", epochs=1)
-    print()
-    out = run(AdamBaydin(optimizer=Adam()), "adambaydin2", epochs=1)
-    p = out["log"][-1]["params"]
-    alpha = p["alpha"]
-    print(alpha)
-    run(Adam(alpha=p["alpha"]), "adambaydin2-final", epochs=1)
-
-
-def surface():
-    run(SGD(10 ** -3, optimizer=SGD(10 ** -1)), "tst", epochs=1)
-    for log_alpha in np.linspace(-3, 2, 10):
-        run(SGD(10 ** log_alpha), "sgd@1e%+.2f" % log_alpha, epochs=1)
-
-
-def make_sgd_stack(height, top):
-    if height == 0:
-        return SGD(alpha=top)
-    return SGD(alpha=top, optimizer=make_sgd_stack(height - 1, top))
-
-
-def make_adam_stack(height, top=0.0000001):
-    if height == 0:
-        return Adam(alpha=top)
-    return Adam(alpha=top, optimizer=make_adam_stack(height - 1))
-
-
-def stack_test():
-    for top in np.linspace(-7, 3, 20):
-        for height in range(6):
-            print("height =", height, "to p=", top)
-            opt = make_sgd_stack(height, 10 ** top)
-            run(
-                opt,
-                "metasgd3-%d@%+.2f" % (height, top),
-                {"height": height, "top": top},
-                epochs=1,
-                height=height,
-            )
-            gc.collect()
-
-
-def perf_test():
-    for h in range(51):
-        print("height:", h)
-        # opt = make_sgd_stack(h, 0.01)
-        opt = make_adam_stack(h)
-        run(opt, "adamperf-%d" % h, {"height": h}, epochs=1)
-        gc.collect()
-
-
-if __name__ == "__main__":
-    try:
-        os.mkdir("log")
-    except:
-        print("log/ exists already")
-
-    surface()
-    sgd_experiments()
-    adam_experiments()
-    stack_test()
-    perf_test()
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/requirements.txt
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/requirements.txt
@ -1,5 +0,0 @@
-numpy==1.17.2
-Pillow==6.2.0
-six==1.12.0
-torch==1.2.0
-torchvision==0.4.0
--- a/Old/Gradient-Descent-The-Ultimate-Optimizer/tests.py
+++ b/Old/Gradient-Descent-The-Ultimate-Optimizer/tests.py
@ -1,344 +0,0 @@
-import numpy as np
-import json, math, time, os
-from data_aug import *
-#from data_aug_v2 import *
-import gc
-
-import matplotlib.pyplot as plt
-from torchviz import make_dot, make_dot_from_trace
-
-from torch.utils.data import SubsetRandomSampler
-
-BATCH_SIZE = 300
-#TEST_SIZE = 10000
-TEST_SIZE = 300
-DATA_LIMIT = 10
-
-'''
-data_train = torchvision.datasets.MNIST(
-    "./data", train=True, download=True, 
-    transform=torchvision.transforms.Compose([
-            #torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
-            torchvision.transforms.ToTensor()
-        ])
-)
-data_test = torchvision.datasets.MNIST(
-    "./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
-)
-
-'''
-data_train = torchvision.datasets.CIFAR10(
-    "./data", train=True, download=True, 
-    transform=torchvision.transforms.Compose([
-            #torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
-            torchvision.transforms.ToTensor()
-        ])
-)
-
-data_test = torchvision.datasets.CIFAR10(
-    "./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
-)
-
-train_subset_indices=range(int(len(data_train)/2))
-val_subset_indices=range(int(len(data_train)/2),len(data_train))
-
-dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
-dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
-dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False)
-
-def test(model, reshape_in=True, device = torch.device('cuda')):
-    for i, (features_, labels_) in enumerate(dl_test):
-        if reshape_in :
-            features, labels = torch.reshape(features_, (TEST_SIZE, 28 * 28)), labels_
-        else:
-            features, labels =features_, labels_
-
-        features, labels = features.to(device), labels.to(device)
-
-        pred = model.forward(features)
-        return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
-
-def train_one_epoch(model, optimizer, epoch=0, reshape_in=True, device = torch.device('cuda'), train_data=True):
-    if train_data: dl = dl_train
-    else: dl = dl_val
-    for i, (features_, labels_) in enumerate(dl):
-        if i > DATA_LIMIT : break
-        #t0 = time.process_time()
-
-        if reshape_in :
-            features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
-        else:
-            features, labels =features_, labels_
-
-        features, labels = features.to(device), labels.to(device)
-
-        #optimizer.begin()
-        #optimizer.zero_grad()
-        model.begin()
-        model.zero_grad()
-        pred = model.forward(features)
-
-        #loss = F.nll_loss(pred, labels)
-        loss = F.cross_entropy(pred,labels)
-
-        #model.print_grad_fn()
-        #optimizer.print_grad_fn()
-        #print('-'*50)
-
-        loss.backward(create_graph=True)
-
-        #optimizer.step()
-        if train_data: model.adjust()
-        else: model.adjust_val()
-        
-        #tf = time.process_time()
-        #data = {
-        #    "time": tf - t0,
-        #    "iter": epoch * len(dl_train) + i,
-        #    "loss": loss.item(),
-        #    "params": {
-        #        k: v.item()
-        #        for k, v in model.optimizer.parameters.items()
-        #        if "." not in k
-        #    },
-        #}
-        #stats.append(data)
-
-        #print_torch_mem(i)
-    return loss.item()
-
-def train_v2(model, optimizer, epochs=3, reshape_in=True, device = torch.device('cuda')):
-    log = []
-    for epoch in range(epochs):
-
-        #dl_train.dataset.transform=torchvision.transforms.Compose([
-        #    torchvision.transforms.RandomAffine(degrees=model.param('mag'), translate=None, scale=None, shear=None, resample=False, fillcolor=0),
-        #    torchvision.transforms.ToTensor()
-        #])
-        viz_data(fig_name='res/data_sample')
-        t0 = time.process_time()
-        loss = train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device)
-        train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device,train_data=False)
-
-        #acc = test(model=model, reshape_in=reshape_in, device=device)
-        acc = 0
-
-        
-        tf = time.process_time()
-        data = {
-            "time": tf - t0,
-            "epoch": epoch,
-            "loss": loss,
-            "acc": acc,
-            "params": {
-                k: v.item()
-                for k, v in model.optimizer.parameters.items()
-                #for k, v in model.mods.data_aug.params.named_parameters()
-                if "." not in k
-
-            },
-        }
-        log.append(data)
-
-
-        print("Epoch :",epoch+1, "/",epochs, "- Loss :",log[-1]["loss"])
-        param = [p for p in model.param_grad() if p.grad is not None]
-        if(len(param)!=0):
-            print(param[-2],' / ', param[-2].grad)
-            print(param[-1],' / ', param[-1].grad)
-    return log
-
-def train(model, epochs=3, height=1, reshape_in=True, device = torch.device('cuda')):
-    stats = []
-    for epoch in range(epochs):
-        for i, (features_, labels_) in enumerate(dl_train):
-            t0 = time.process_time()
-            model.begin()
-            if reshape_in :
-                features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
-            else:
-            	features, labels =features_, labels_
-
-            features, labels = features.to(device), labels.to(device)
-            
-            pred = model.forward(
-                features
-            )  # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
-            #loss = F.nll_loss(pred, labels)
-            loss = F.cross_entropy(pred,labels)
-
-            #print('-'*50)
-            #param = [p for p in model.param_grad() if p.grad is not None]
-            #if(len(param)!=0):
-            #	print(param[-2],' / ', param[-2].grad)
-            #	print(param[-1],' / ', param[-1].grad)
-
-            model.zero_grad()
-            loss.backward(create_graph=True)
-            model.adjust()
-            tf = time.process_time()
-            data = {
-                "time": tf - t0,
-                "iter": epoch * len(dl_train) + i,
-                "loss": loss.item(),
-                "params": {
-                    k: v.item()
-                    for k, v in model.optimizer.parameters.items()
-                    if "." not in k
-                },
-            }
-            stats.append(data)
-
-        print('-'*50)
-        i=0
-        for obj in gc.get_objects():
-            try:
-                if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)) and len(obj.size())>1:
-                    print(i, type(obj), obj.size())
-                    i+=1
-            except:
-                pass
-        print("Epoch :",epoch+1, "/",epochs, "- Loss :",stats[-1]["loss"])
-        param = [p for p in model.param_grad() if p.grad is not None]
-        if(len(param)!=0):
-            print(param[-2],' / ', param[-2].grad)
-            print(param[-1],' / ', param[-1].grad)
-    return stats
-
-def run(opt, name="out", usr={}, epochs=10, height=1, cnn=True, device = torch.device('cuda')):
-    torch.manual_seed(0x42)
-    if not cnn:
-        reshape_in = True
-        #model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
-        model = MNIST_FullyConnected_Augmented(28 * 28, 128, 10, opt, device=device)
-        
-    else:
-        reshape_in = False
-        #model = LeNet(1, 10,opt, device)
-        #model = LeNet_v2(1, 10,opt, device).to(device=device)
-        model = LeNet_v2(3, 10,opt, device).to(device=device)
-        optimizer=None
-        '''
-        m = LeNet_v3(1, 10)
-        a = Data_aug()
-        model = Augmented_model(model=m, 
-                                data_augmenter=a,
-                                optimizer=opt).to(device) #deux fois le meme optimizer ?...
-        '''
-        '''
-        m = LeNet_v3(1, 10)
-        a = Data_aug()
-        model = Augmented_model(model=m, data_augmenter=a).to(device)
-        #optimizer = SGD(model.parameters())
-        optimizer = SGD(model.parameters(), lr=0.01, height=1)
-        '''
-        
-        
-    #for idx, m in enumerate(model.modules()):
-    #    print(idx, '->', m)
-    print("Running...", str(model))
-    model.initialize()
-    #print_model(model)
-    #model.data_augmentation(False)
-    #model.eval()
-
-    log = train_v2(model=model, optimizer=optimizer, epochs=epochs, reshape_in=reshape_in, device=device)
-    model.eval()
-    acc = test(model, reshape_in, device=device)
-
-    
-    #param = [p for p in model.param_grad() if p.grad is not None]
-    #if(len(param)!=0):
-    #    print(param[-2],' / ', param[-2].grad)
-    #    print(param[-1],' / ', param[-1].grad)
-	
-    out = {"acc": acc, "log": log, "usr": usr}
-    with open("log/%s.json" % name, "w+") as f:
-        json.dump(out, f, indent=True)
-    times = [x["time"] for x in log]
-    print("Times (ms):", np.mean(times), "+/-", np.std(times))
-    print("Final accuracy:", acc)
-
-    #plot_res(log, fig_name='res/'+name)
-
-    return out
-
-def make_adam_stack(height, top=0.0000001, device = torch.device('cuda')):
-    #print(height,device)
-    if height == 0:
-        return Adam(alpha=top, device=device)
-    return Adam(alpha=top, optimizer=make_adam_stack(height - 1, top, device=device), device=device)
-
-def plot_res(log, fig_name='res'):
-    
-    fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
-    ax[0].set_title('Loss')
-    ax[0].plot([x["loss"] for x in log])
-        
-    ax[1].set_title('Acc')
-    ax[1].plot([x["acc"] for x in log]) 
-
-    ax[2].set_title('mag')
-    ax[2].plot([x["data_aug"] for x in log]) 
-
-    plt.savefig(fig_name)
-
-def print_torch_mem(add_info=''):
-
-    nb=0
-    max_size=0
-    for obj in gc.get_objects():
-        try:
-            if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
-                #print(i, type(obj), obj.size())
-                size = np.sum(obj.size())
-                if(size>max_size): max_size=size
-                nb+=1
-        except:
-            pass
-    print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
-
-def print_model(model, fig_name='graph/graph'): #Semble ne pas marcher pour les models en fonctionnel
-    x = torch.randn(1,1,28,28, device=device)
-    dot=make_dot(model(x), params=dict(model.named_parameters()))
-    dot.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
-    dot.render(fig_name)
-    print("Model graph generated !")
-
-def viz_data(fig_name='data_sample'):
-
-    features_, labels_ = next(iter(dl_train))
-    plt.figure(figsize=(10,10))
-    #for i, (features_, labels_) in enumerate(dl_train):
-    for i in range(25):
-        if i==25: break
-        #print(features_.size(), labels_.size())
-
-        plt.subplot(5,5,i+1)
-        plt.xticks([])
-        plt.yticks([])
-        plt.grid(False)
-
-        img = features_[i,0,:,:]
-        
-        #print('im shape',img.shape)
-        plt.imshow(img, cmap=plt.cm.binary)
-        plt.xlabel(labels_[i].item())
-
-    plt.savefig(fig_name)
-
-##########################################
-if __name__ == "__main__":
-    try:
-        os.mkdir("log")
-    except:
-        print("log/ exists already")
-
-    device = torch.device('cuda')
-
-    run(make_adam_stack(height=1, top=0.001, device=device), 
-        "Augmented_MNIST", 
-        epochs=100, 
-        cnn=True, 
-        device = device)
-    print()
--- a/Old/PBA/LeNet.py
+++ b/Old/PBA/LeNet.py
@ -1,73 +0,0 @@
-import numpy as np
-import tensorflow as tf
-
-## build the neural network class
-# weight initialization
-def weight_variable(shape, name = None):
-    initial = tf.truncated_normal(shape, stddev=0.1)
-    return tf.Variable(initial, name = name)
-
-# bias initialization
-def bias_variable(shape, name = None):
-    initial = tf.constant(0.1, shape=shape) #  positive bias
-    return tf.Variable(initial, name = name)
-
-# 2D convolution
-def conv2d(x, W, name = None):
-    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID', name = name)
-
-# max pooling
-def max_pool_2x2(x, name = None):
-    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
-                              padding='SAME', name = name)
-
-def LeNet(images, num_classes):
-     # tunable hyperparameters for nn architecture
-     s_f_conv1 = 5; # filter size of first convolution layer (default = 3)
-     n_f_conv1 = 20; # number of features of first convolution layer (default = 36)
-     s_f_conv2 = 5; # filter size of second convolution layer (default = 3)
-     n_f_conv2 = 50; # number of features of second convolution layer (default = 36)
-     n_n_fc1 = 500; # number of neurons of first fully connected layer (default = 576)
-     n_n_fc2 = 500; # number of neurons of first fully connected layer (default = 576)
-
-     #print(images.shape)
-     # 1.layer: convolution + max pooling
-     W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, int(images.shape[3]), n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
-     b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
-     h_conv1_tf = tf.nn.relu(conv2d(images, W_conv1_tf) + b_conv1_tf, name = 'h_conv1_tf') # (.,28,28,32)
-     h_pool1_tf = max_pool_2x2(h_conv1_tf, name = 'h_pool1_tf') # (.,14,14,32)
-     #print(h_conv1_tf.shape)
-     #print(h_pool1_tf.shape)
-     # 2.layer: convolution + max pooling
-     W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2, n_f_conv1, n_f_conv2], name = 'W_conv2_tf')
-     b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
-     h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf, W_conv2_tf) + b_conv2_tf, name ='h_conv2_tf') #(.,14,14,32)
-     h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
-
-     #print(h_pool2_tf.shape)
-
-     # 4.layer: fully connected
-     W_fc1_tf = weight_variable([5*5*n_f_conv2,n_n_fc1], name = 'W_fc1_tf') # (4*4*32, 1024)
-     b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
-     h_pool2_flat_tf = tf.reshape(h_pool2_tf, [int(h_pool2_tf.shape[0]), -1], name = 'h_pool3_flat_tf') # (.,1024)
-     h_fc1_tf = tf.nn.relu(tf.matmul(h_pool2_flat_tf, W_fc1_tf) + b_fc1_tf, 
-                                   name = 'h_fc1_tf') # (.,1024)
-      
-     # add dropout
-     #keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
-     #h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
-     print(h_fc1_tf.shape)
-
-     # 5.layer: fully connected
-     W_fc2_tf = weight_variable([n_n_fc1, num_classes], name = 'W_fc2_tf')
-     b_fc2_tf = bias_variable([num_classes], name = 'b_fc2_tf')
-     z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf), b_fc2_tf, name = 'z_pred_tf')# => (.,10)
-     # predicted probabilities in one-hot encoding
-     #y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf') 
-        
-     # tensor of correct predictions
-     #y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
-     #                                     tf.argmax(y_data_tf, 1),
-     #                                     name = 'y_pred_correct_tf')  
-     logits = z_pred_tf
-     return logits #y_pred_proba_tf
--- a/Old/PBA/model.py
+++ b/Old/PBA/model.py
@ -1,353 +0,0 @@
-# Copyright 2018 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""PBA & AutoAugment Train/Eval module.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import contextlib
-import os
-import time
-
-import numpy as np
-import tensorflow as tf
-
-import autoaugment.custom_ops as ops
-from autoaugment.shake_drop import build_shake_drop_model
-from autoaugment.shake_shake import build_shake_shake_model
-import pba.data_utils as data_utils
-import pba.helper_utils as helper_utils
-from pba.wrn import build_wrn_model
-from pba.resnet import build_resnet_model
-
-from pba.LeNet import LeNet
-
-arg_scope = tf.contrib.framework.arg_scope
-
-
-def setup_arg_scopes(is_training):
-    """Sets up the argscopes that will be used when building an image model.
-
-    Args:
-      is_training: Is the model training or not.
-
-    Returns:
-      Arg scopes to be put around the model being constructed.
-    """
-
-    batch_norm_decay = 0.9
-    batch_norm_epsilon = 1e-5
-    batch_norm_params = {
-        # Decay for the moving averages.
-        'decay': batch_norm_decay,
-        # epsilon to prevent 0s in variance.
-        'epsilon': batch_norm_epsilon,
-        'scale': True,
-        # collection containing the moving mean and moving variance.
-        'is_training': is_training,
-    }
-
-    scopes = []
-
-    scopes.append(arg_scope([ops.batch_norm], **batch_norm_params))
-    return scopes
-
-
-def build_model(inputs, num_classes, is_training, hparams):
-    """Constructs the vision model being trained/evaled.
-
-    Args:
-      inputs: input features/images being fed to the image model build built.
-      num_classes: number of output classes being predicted.
-      is_training: is the model training or not.
-      hparams: additional hyperparameters associated with the image model.
-
-    Returns:
-      The logits of the image model.
-    """
-    scopes = setup_arg_scopes(is_training)
-    if len(scopes) != 1:
-        raise ValueError('Nested scopes depreciated in py3.')
-    with scopes[0]:
-        if hparams.model_name == 'pyramid_net':
-            logits = build_shake_drop_model(inputs, num_classes, is_training)
-        elif hparams.model_name == 'wrn':
-            logits = build_wrn_model(inputs, num_classes, hparams.wrn_size)
-        elif hparams.model_name == 'shake_shake':
-            logits = build_shake_shake_model(inputs, num_classes, hparams,
-                                             is_training)
-        elif hparams.model_name == 'resnet':
-            logits = build_resnet_model(inputs, num_classes, hparams,
-                                        is_training)
-        elif hparams.model_name == 'LeNet':
-            logits = LeNet(inputs, num_classes)
-        else:
-            raise ValueError("Unknown model name.")
-    return logits
-
-
-class Model(object):
-    """Builds an model."""
-
-    def __init__(self, hparams, num_classes, image_size):
-        self.hparams = hparams
-        self.num_classes = num_classes
-        self.image_size = image_size
-
-    def build(self, mode):
-        """Construct the model."""
-        assert mode in ['train', 'eval']
-        self.mode = mode
-        self._setup_misc(mode)
-        self._setup_images_and_labels(self.hparams.dataset)
-        self._build_graph(self.images, self.labels, mode)
-
-        self.init = tf.group(tf.global_variables_initializer(),
-                             tf.local_variables_initializer())
-
-    def _setup_misc(self, mode):
-        """Sets up miscellaneous in the model constructor."""
-        self.lr_rate_ph = tf.Variable(0.0, name='lrn_rate', trainable=False)
-        self.reuse = None if (mode == 'train') else True
-        self.batch_size = self.hparams.batch_size
-        if mode == 'eval':
-            self.batch_size = self.hparams.test_batch_size
-
-    def _setup_images_and_labels(self, dataset):
-        """Sets up image and label placeholders for the model."""
-        if dataset == 'cifar10' or dataset == 'cifar100' or self.mode == 'train':
-            self.images = tf.placeholder(tf.float32,
-                                         [self.batch_size, self.image_size, self.image_size, 3])
-            self.labels = tf.placeholder(tf.float32,
-                                         [self.batch_size, self.num_classes])
-        else:
-            self.images = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3])
-            self.labels = tf.placeholder(tf.float32, [None, self.num_classes])
-
-    def assign_epoch(self, session, epoch_value):
-        session.run(
-            self._epoch_update, feed_dict={self._new_epoch: epoch_value})
-
-    def _build_graph(self, images, labels, mode):
-        """Constructs the TF graph for the model.
-
-        Args:
-          images: A 4-D image Tensor
-          labels: A 2-D labels Tensor.
-          mode: string indicating training mode ( e.g., 'train', 'valid', 'test').
-        """
-        is_training = 'train' in mode
-        if is_training:
-            self.global_step = tf.train.get_or_create_global_step()
-
-        logits = build_model(images, self.num_classes, is_training,
-                             self.hparams)
-        self.predictions, self.cost = helper_utils.setup_loss(logits, labels)
-
-        self._calc_num_trainable_params()
-
-        # Adds L2 weight decay to the cost
-        self.cost = helper_utils.decay_weights(self.cost,
-                                               self.hparams.weight_decay_rate)
-
-        if is_training:
-            self._build_train_op()
-
-        # Setup checkpointing for this child model
-        # Keep 2 or more checkpoints around during training.
-        with tf.device('/cpu:0'):
-            self.saver = tf.train.Saver(max_to_keep=10)
-
-        self.init = tf.group(tf.global_variables_initializer(),
-                             tf.local_variables_initializer())
-
-    def _calc_num_trainable_params(self):
-        self.num_trainable_params = np.sum([
-            np.prod(var.get_shape().as_list())
-            for var in tf.trainable_variables()
-        ])
-        tf.logging.info('number of trainable params: {}'.format(
-            self.num_trainable_params))
-
-    def _build_train_op(self):
-        """Builds the train op for the model."""
-        hparams = self.hparams
-        tvars = tf.trainable_variables()
-        grads = tf.gradients(self.cost, tvars)
-        if hparams.gradient_clipping_by_global_norm > 0.0:
-            grads, norm = tf.clip_by_global_norm(
-                grads, hparams.gradient_clipping_by_global_norm)
-            tf.summary.scalar('grad_norm', norm)
-
-        # Setup the initial learning rate
-        initial_lr = self.lr_rate_ph
-        optimizer = tf.train.MomentumOptimizer(
-            initial_lr, 0.9, use_nesterov=True)
-
-        self.optimizer = optimizer
-        apply_op = optimizer.apply_gradients(
-            zip(grads, tvars), global_step=self.global_step, name='train_step')
-        train_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-        with tf.control_dependencies([apply_op]):
-            self.train_op = tf.group(*train_ops)
-
-
-class ModelTrainer(object):
-    """Trains an instance of the Model class."""
-
-    def __init__(self, hparams):
-        self._session = None
-        self.hparams = hparams
-
-        # Set the random seed to be sure the same validation set
-        # is used for each model
-        np.random.seed(0)
-        self.data_loader = data_utils.DataSet(hparams)
-        np.random.seed()  # Put the random seed back to random
-        self.data_loader.reset()
-
-        # extra stuff for ray
-        self._build_models()
-        self._new_session()
-        self._session.__enter__()
-
-    def save_model(self, checkpoint_dir, step=None):
-        """Dumps model into the backup_dir.
-
-        Args:
-          step: If provided, creates a checkpoint with the given step
-            number, instead of overwriting the existing checkpoints.
-        """
-        model_save_name = os.path.join(checkpoint_dir,
-                                       'model.ckpt') + '-' + str(step)
-        save_path = self.saver.save(self.session, model_save_name)
-        tf.logging.info('Saved child model')
-        return model_save_name
-
-    def extract_model_spec(self, checkpoint_path):
-        """Loads a checkpoint with the architecture structure stored in the name."""
-        self.saver.restore(self.session, checkpoint_path)
-        tf.logging.warning(
-            'Loaded child model checkpoint from {}'.format(checkpoint_path))
-
-    def eval_child_model(self, model, data_loader, mode):
-        """Evaluate the child model.
-
-        Args:
-          model: image model that will be evaluated.
-          data_loader: dataset object to extract eval data from.
-          mode: will the model be evalled on train, val or test.
-
-        Returns:
-          Accuracy of the model on the specified dataset.
-        """
-        tf.logging.info('Evaluating child model in mode {}'.format(mode))
-        while True:
-            try:
-                accuracy = helper_utils.eval_child_model(
-                    self.session, model, data_loader, mode)
-                tf.logging.info(
-                    'Eval child model accuracy: {}'.format(accuracy))
-                # If epoch trained without raising the below errors, break
-                # from loop.
-                break
-            except (tf.errors.AbortedError, tf.errors.UnavailableError) as e:
-                tf.logging.info(
-                    'Retryable error caught: {}.  Retrying.'.format(e))
-
-        return accuracy
-
-    @contextlib.contextmanager
-    def _new_session(self):
-        """Creates a new session for model m."""
-        # Create a new session for this model, initialize
-        # variables, and save / restore from checkpoint.
-        sess_cfg = tf.ConfigProto(
-            allow_soft_placement=True, log_device_placement=False)
-        sess_cfg.gpu_options.allow_growth = True
-        self._session = tf.Session('', config=sess_cfg)
-        self._session.run([self.m.init, self.meval.init])
-        return self._session
-
-    def _build_models(self):
-        """Builds the image models for train and eval."""
-        # Determine if we should build the train and eval model. When using
-        # distributed training we only want to build one or the other and not both.
-        with tf.variable_scope('model', use_resource=False):
-            m = Model(self.hparams, self.data_loader.num_classes, self.data_loader.image_size)
-            m.build('train')
-            self._num_trainable_params = m.num_trainable_params
-            self._saver = m.saver
-        with tf.variable_scope('model', reuse=True, use_resource=False):
-            meval = Model(self.hparams, self.data_loader.num_classes, self.data_loader.image_size)
-            meval.build('eval')
-        self.m = m
-        self.meval = meval
-
-    def _run_training_loop(self, curr_epoch):
-        """Trains the model `m` for one epoch."""
-        start_time = time.time()
-        while True:
-            try:
-                train_accuracy = helper_utils.run_epoch_training(
-                    self.session, self.m, self.data_loader, curr_epoch)
-                break
-            except (tf.errors.AbortedError, tf.errors.UnavailableError) as e:
-                tf.logging.info(
-                    'Retryable error caught: {}.  Retrying.'.format(e))
-        tf.logging.info('Finished epoch: {}'.format(curr_epoch))
-        tf.logging.info('Epoch time(min): {}'.format(
-            (time.time() - start_time) / 60.0))
-        return train_accuracy
-
-    def _compute_final_accuracies(self, iteration):
-        """Run once training is finished to compute final test accuracy."""
-        if (iteration >= self.hparams.num_epochs - 1):
-            test_accuracy = self.eval_child_model(self.meval, self.data_loader,
-                                                  'test')
-        else:
-            test_accuracy = 0
-        tf.logging.info('Test Accuracy: {}'.format(test_accuracy))
-        return test_accuracy
-
-    def run_model(self, epoch):
-        """Trains and evalutes the image model."""
-        valid_accuracy = 0.
-        training_accuracy = self._run_training_loop(epoch)
-        if self.hparams.validation_size > 0:
-            valid_accuracy = self.eval_child_model(self.meval,
-                                                   self.data_loader, 'val')
-        tf.logging.info('Train Acc: {}, Valid Acc: {}'.format(
-            training_accuracy, valid_accuracy))
-        return training_accuracy, valid_accuracy
-
-    def reset_config(self, new_hparams):
-        self.hparams = new_hparams
-        self.data_loader.reset_policy(new_hparams)
-        return
-
-    @property
-    def saver(self):
-        return self._saver
-
-    @property
-    def session(self):
-        return self._session
-
-    @property
-    def num_trainable_params(self):
-        return self._num_trainable_params
--- a/Old/PBA/search.sh
+++ b/Old/PBA/search.sh
@ -1,59 +0,0 @@
-#!/bin/bash
-export PYTHONPATH="$(pwd)"
-
-cifar10_LeNet_search() {
-    local_dir="$PWD/results/"
-    data_path="$PWD/datasets/cifar-10-batches-py"
-
-    python pba/search.py \
-    --local_dir "$local_dir" \
-    --model_name LeNet \
-    --data_path "$data_path" --dataset cifar10 \
-    --train_size 4000 --val_size 46000 \
-    --checkpoint_freq 0 \
-    --name "cifar10_search" --gpu 0.15 --cpu 2 \
-    --num_samples 16 --perturbation_interval 3 --epochs 150 \
-    --explore cifar10 --aug_policy cifar10 \
-    --lr 0.1 --wd 0.0005
-}
-
-cifar10_search() {
-    local_dir="$PWD/results/"
-    data_path="$PWD/datasets/cifar-10-batches-py"
-
-    python pba/search.py \
-    --local_dir "$local_dir" \
-    --model_name wrn_40_2 \
-    --data_path "$data_path" --dataset cifar10 \
-    --train_size 4000 --val_size 46000 \
-    --checkpoint_freq 0 \
-    --name "cifar10_search" --gpu 0.15 --cpu 2 \
-    --num_samples 16 --perturbation_interval 3 --epochs 200 \
-    --explore cifar10 --aug_policy cifar10 \
-    --lr 0.1 --wd 0.0005
-}
-
-svhn_search() {
-    local_dir="$PWD/results/"
-    data_path="$PWD/datasets/"
-
-    python pba/search.py \
-    --local_dir "$local_dir" --data_path "$data_path" \
-    --model_name wrn_40_2 --dataset svhn \
-    --train_size 1000 --val_size 7325 \
-    --checkpoint_freq 0 \
-    --name "svhn_search" --gpu 0.19 --cpu 2 \
-    --num_samples 16 --perturbation_interval 3 --epochs 160 \
-    --explore cifar10 --aug_policy cifar10 --no_cutout \
-    --lr 0.1 --wd 0.005
-}
-
-if [ "$1" = "rcifar10" ]; then
-    cifar10_search
-elif [ "$1" = "rsvhn" ]; then
-    svhn_search
-elif [ "$1" = "LeNet" ]; then
-    cifar10_LeNet_search
-else
-    echo "invalid args"
-fi
--- a/Old/PBA/setup.py
+++ b/Old/PBA/setup.py
@ -1,210 +0,0 @@
-"""Parse flags and set up hyperparameters."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import random
-import tensorflow as tf
-
-from pba.augmentation_transforms_hp import NUM_HP_TRANSFORM
-
-
-def create_parser(state):
-    """Create arg parser for flags."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--model_name',
-        default='wrn',
-        choices=('wrn_28_10', 'wrn_40_2', 'shake_shake_32', 'shake_shake_96',
-                 'shake_shake_112', 'pyramid_net', 'resnet', 'LeNet'))
-    parser.add_argument(
-        '--data_path',
-        default='/tmp/datasets/',
-        help='Directory where dataset is located.')
-    parser.add_argument(
-        '--dataset',
-        default='cifar10',
-        choices=('cifar10', 'cifar100', 'svhn', 'svhn-full', 'test'))
-    parser.add_argument(
-        '--recompute_dset_stats',
-        action='store_true',
-        help='Instead of using hardcoded mean/std, recompute from dataset.')
-    parser.add_argument('--local_dir', type=str, default='/tmp/ray_results/',  help='Ray directory.')
-    parser.add_argument('--restore', type=str, default=None, help='If specified, tries to restore from given path.')
-    parser.add_argument('--train_size', type=int, default=5000, help='Number of training examples.')
-    parser.add_argument('--val_size', type=int, default=45000, help='Number of validation examples.')
-    parser.add_argument('--checkpoint_freq', type=int, default=50, help='Checkpoint frequency.')
-    parser.add_argument(
-        '--cpu', type=float, default=4, help='Allocated by Ray')
-    parser.add_argument(
-        '--gpu', type=float, default=1, help='Allocated by Ray')
-    parser.add_argument(
-        '--aug_policy',
-        type=str,
-        default='cifar10',
-        help=
-        'which augmentation policy to use (in augmentation_transforms_hp.py)')
-    # search-use only
-    parser.add_argument(
-        '--explore',
-        type=str,
-        default='cifar10',
-        help='which explore function to use')
-    parser.add_argument(
-        '--epochs',
-        type=int,
-        default=0,
-        help='Number of epochs, or <=0 for default')
-    parser.add_argument(
-        '--no_cutout', action='store_true', help='turn off cutout')
-    parser.add_argument('--lr', type=float, default=0.1, help='learning rate')
-    parser.add_argument('--wd', type=float, default=0.0005, help='weight decay')
-    parser.add_argument('--bs', type=int, default=128, help='batch size')
-    parser.add_argument('--test_bs', type=int, default=25, help='test batch size')
-    parser.add_argument('--num_samples', type=int, default=1, help='Number of Ray samples')
-
-    if state == 'train':
-        parser.add_argument(
-            '--use_hp_policy',
-            action='store_true',
-            help='otherwise use autoaug policy')
-        parser.add_argument(
-            '--hp_policy',
-            type=str,
-            default=None,
-            help='either a comma separated list of values or a file')
-        parser.add_argument(
-            '--hp_policy_epochs',
-            type=int,
-            default=200,
-            help='number of epochs/iterations policy trained for')
-        parser.add_argument(
-            '--no_aug',
-            action='store_true',
-            help=
-            'no additional augmentation at all (besides cutout if not toggled)'
-        )
-        parser.add_argument(
-            '--flatten',
-            action='store_true',
-            help='randomly select aug policy from schedule')
-        parser.add_argument('--name', type=str, default='autoaug')
-
-    elif state == 'search':
-        parser.add_argument('--perturbation_interval', type=int, default=10)
-        parser.add_argument('--name', type=str, default='autoaug_pbt')
-    else:
-        raise ValueError('unknown state')
-    args = parser.parse_args()
-    tf.logging.info(str(args))
-    return args
-
-
-def create_hparams(state, FLAGS):  # pylint: disable=invalid-name
-    """Creates hyperparameters to pass into Ray config.
-
-  Different options depending on search or eval mode.
-
-  Args:
-    state: a string, 'train' or 'search'.
-    FLAGS: parsed command line flags.
-
-  Returns:
-    tf.hparams object.
-  """
-    epochs = 0
-    tf.logging.info('data path: {}'.format(FLAGS.data_path))
-    hparams = tf.contrib.training.HParams(
-        train_size=FLAGS.train_size,
-        validation_size=FLAGS.val_size,
-        dataset=FLAGS.dataset,
-        data_path=FLAGS.data_path,
-        batch_size=FLAGS.bs,
-        gradient_clipping_by_global_norm=5.0,
-        explore=FLAGS.explore,
-        aug_policy=FLAGS.aug_policy,
-        no_cutout=FLAGS.no_cutout,
-        recompute_dset_stats=FLAGS.recompute_dset_stats,
-        lr=FLAGS.lr,
-        weight_decay_rate=FLAGS.wd,
-        test_batch_size=FLAGS.test_bs)
-
-    if state == 'train':
-        hparams.add_hparam('no_aug', FLAGS.no_aug)
-        hparams.add_hparam('use_hp_policy', FLAGS.use_hp_policy)
-        if FLAGS.use_hp_policy:
-            if FLAGS.hp_policy == 'random':
-                tf.logging.info('RANDOM SEARCH')
-                parsed_policy = []
-                for i in range(NUM_HP_TRANSFORM * 4):
-                    if i % 2 == 0:
-                        parsed_policy.append(random.randint(0, 10))
-                    else:
-                        parsed_policy.append(random.randint(0, 9))
-            elif FLAGS.hp_policy.endswith('.txt') or FLAGS.hp_policy.endswith(
-                    '.p'):
-                # will be loaded in in data_utils
-                parsed_policy = FLAGS.hp_policy
-            else:
-                # parse input into a fixed augmentation policy
-                parsed_policy = FLAGS.hp_policy.split(', ')
-                parsed_policy = [int(p) for p in parsed_policy]
-            hparams.add_hparam('hp_policy', parsed_policy)
-            hparams.add_hparam('hp_policy_epochs', FLAGS.hp_policy_epochs)
-            hparams.add_hparam('flatten', FLAGS.flatten)
-    elif state == 'search':
-        hparams.add_hparam('no_aug', False)
-        hparams.add_hparam('use_hp_policy', True)
-        # default start value of 0
-        hparams.add_hparam('hp_policy',
-                           [0 for _ in range(4 * NUM_HP_TRANSFORM)])
-    else:
-        raise ValueError('unknown state')
-
-    if FLAGS.model_name == 'wrn_40_2':
-        hparams.add_hparam('model_name', 'wrn')
-        epochs = 200
-        hparams.add_hparam('wrn_size', 32)
-        hparams.add_hparam('wrn_depth', 40)
-    elif FLAGS.model_name == 'wrn_28_10':
-        hparams.add_hparam('model_name', 'wrn')
-        epochs = 200
-        hparams.add_hparam('wrn_size', 160)
-        hparams.add_hparam('wrn_depth', 28)
-    elif FLAGS.model_name == 'resnet':
-        hparams.add_hparam('model_name', 'resnet')
-        epochs = 200
-        hparams.add_hparam('resnet_size', 20)
-        hparams.add_hparam('num_filters', 32)
-    elif FLAGS.model_name == 'shake_shake_32':
-        hparams.add_hparam('model_name', 'shake_shake')
-        epochs = 1800
-        hparams.add_hparam('shake_shake_widen_factor', 2)
-    elif FLAGS.model_name == 'shake_shake_96':
-        hparams.add_hparam('model_name', 'shake_shake')
-        epochs = 1800
-        hparams.add_hparam('shake_shake_widen_factor', 6)
-    elif FLAGS.model_name == 'shake_shake_112':
-        hparams.add_hparam('model_name', 'shake_shake')
-        epochs = 1800
-        hparams.add_hparam('shake_shake_widen_factor', 7)
-    elif FLAGS.model_name == 'pyramid_net':
-        hparams.add_hparam('model_name', 'pyramid_net')
-        epochs = 1800
-        hparams.set_hparam('batch_size', 64)
-
-    elif FLAGS.model_name == 'LeNet':
-        hparams.add_hparam('model_name', 'LeNet')
-        epochs = 200
-
-    else:
-        raise ValueError('Not Valid Model Name: %s' % FLAGS.model_name)
-    if FLAGS.epochs > 0:
-        tf.logging.info('overwriting with custom epochs')
-        epochs = FLAGS.epochs
-    hparams.add_hparam('num_epochs', epochs)
-    tf.logging.info('epochs: {}, lr: {}, wd: {}'.format(
-        hparams.num_epochs, hparams.lr, hparams.weight_decay_rate))
-    return hparams
--- a/Old/PBA/table_1_cifar10.sh
+++ b/Old/PBA/table_1_cifar10.sh
@ -1,41 +0,0 @@
-#!/bin/bash
-export PYTHONPATH="$(pwd)"
-
-# args: [model name] [lr] [wd] #Learning rate / weight decay
-eval_cifar10() {
-  hp_policy="$PWD/schedules/rcifar10_16_wrn.txt"
-  local_dir="$PWD/results/"
-  data_path="$PWD/datasets/cifar-10-batches-py"
-
-  size=50000
-  dataset="cifar10"
-  name="eval_cifar10_$1" # has 8 cutout size
-
-  python pba/train.py \
-    --local_dir "$local_dir" --data_path "$data_path" \
-    --model_name "$1" --dataset "$dataset" \
-    --train_size "$size" --val_size 0 \
-    --checkpoint_freq 25 --gpu 1 --cpu 4 \
-    --use_hp_policy --hp_policy "$hp_policy" \
-    --hp_policy_epochs 200 \
-    --aug_policy cifar10 --name "$name" \
-    --lr "$2" --wd "$3"
-}
-
-if [ "$@" = "wrn_28_10" ]; then
-  eval_cifar10 wrn_28_10 0.1 0.0005
-elif [ "$@" = "ss_32" ]; then
-  eval_cifar10 shake_shake_32 0.01 0.001
-elif [ "$@" = "ss_96" ]; then
-  eval_cifar10 shake_shake_96 0.01 0.001
-elif [ "$@" = "ss_112" ]; then
-  eval_cifar10 shake_shake_112 0.01 0.001
-elif [ "$@" = "pyramid_net" ]; then
-  eval_cifar10 pyramid_net 0.05 0.00005
-
-elif [ "$@" = "LeNet" ]; then
-  eval_cifar10 LeNet 0.05 0.0
-
-else
-  echo "invalid args"
-fi
--- a/Old/UDA/LeNet.py
+++ b/Old/UDA/LeNet.py
@ -1,73 +0,0 @@
-import numpy as np
-import tensorflow as tf
-
-## build the neural network class
-# weight initialization
-def weight_variable(shape, name = None):
-    initial = tf.truncated_normal(shape, stddev=0.1)
-    return tf.Variable(initial, name = name)
-
-# bias initialization
-def bias_variable(shape, name = None):
-    initial = tf.constant(0.1, shape=shape) #  positive bias
-    return tf.Variable(initial, name = name)
-
-# 2D convolution
-def conv2d(x, W, name = None):
-    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID', name = name)
-
-# max pooling
-def max_pool_2x2(x, name = None):
-    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
-                              padding='SAME', name = name)
-
-def LeNet(images, num_classes):
-     # tunable hyperparameters for nn architecture
-     s_f_conv1 = 5; # filter size of first convolution layer (default = 3)
-     n_f_conv1 = 20; # number of features of first convolution layer (default = 36)
-     s_f_conv2 = 5; # filter size of second convolution layer (default = 3)
-     n_f_conv2 = 50; # number of features of second convolution layer (default = 36)
-     n_n_fc1 = 500; # number of neurons of first fully connected layer (default = 576)
-     n_n_fc2 = 500; # number of neurons of first fully connected layer (default = 576)
-
-     #print(images.shape)
-     # 1.layer: convolution + max pooling
-     W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, int(images.shape[3]), n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
-     b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
-     h_conv1_tf = tf.nn.relu(conv2d(images, W_conv1_tf) + b_conv1_tf, name = 'h_conv1_tf') # (.,28,28,32)
-     h_pool1_tf = max_pool_2x2(h_conv1_tf, name = 'h_pool1_tf') # (.,14,14,32)
-     #print(h_conv1_tf.shape)
-     #print(h_pool1_tf.shape)
-     # 2.layer: convolution + max pooling
-     W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2, n_f_conv1, n_f_conv2], name = 'W_conv2_tf')
-     b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
-     h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf, W_conv2_tf) + b_conv2_tf, name ='h_conv2_tf') #(.,14,14,32)
-     h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
-
-     #print(h_pool2_tf.shape)
-
-     # 4.layer: fully connected
-     W_fc1_tf = weight_variable([5*5*n_f_conv2,n_n_fc1], name = 'W_fc1_tf') # (4*4*32, 1024)
-     b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
-     h_pool2_flat_tf = tf.reshape(h_pool2_tf, [int(h_pool2_tf.shape[0]), -1], name = 'h_pool3_flat_tf') # (.,1024)
-     h_fc1_tf = tf.nn.relu(tf.matmul(h_pool2_flat_tf, W_fc1_tf) + b_fc1_tf, 
-                                   name = 'h_fc1_tf') # (.,1024)
-      
-     # add dropout
-     #keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
-     #h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
-     print(h_fc1_tf.shape)
-
-     # 5.layer: fully connected
-     W_fc2_tf = weight_variable([n_n_fc1, num_classes], name = 'W_fc2_tf')
-     b_fc2_tf = bias_variable([num_classes], name = 'b_fc2_tf')
-     z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf), b_fc2_tf, name = 'z_pred_tf')# => (.,10)
-     # predicted probabilities in one-hot encoding
-     #y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf') 
-        
-     # tensor of correct predictions
-     #y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
-     #                                     tf.argmax(y_data_tf, 1),
-     #                                     name = 'y_pred_correct_tf')  
-     logits = z_pred_tf
-     return logits #y_pred_proba_tf
--- a/Old/UDA/main.py
+++ b/Old/UDA/main.py
@ -1,620 +0,0 @@
-# coding=utf-8
-# Copyright 2019 The Google UDA Team Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""UDA on CIFAR-10 and SVHN.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import contextlib
-import os
-import time
-import json
-
-import numpy as np
-
-from absl import flags
-import absl.logging as _logging  # pylint: disable=unused-import
-
-import tensorflow as tf
-
-from randaugment import custom_ops as ops
-import data
-import utils
-
-from randaugment.wrn import build_wrn_model
-from randaugment.shake_drop import build_shake_drop_model
-from randaugment.shake_shake import build_shake_shake_model
-
-from randaugment.LeNet import LeNet
-
-
-# TPU related
-flags.DEFINE_string(
-    "master", default=None,
-    help="the TPU address. This should be set when using Cloud TPU")
-flags.DEFINE_string(
-    "tpu", default=None,
-    help="The Cloud TPU to use for training. This should be either the name "
-    "used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 url.")
-flags.DEFINE_string(
-    "gcp_project", default=None,
-    help="Project name for the Cloud TPU-enabled project. If not specified, "
-    "we will attempt to automatically detect the GCE project from metadata.")
-flags.DEFINE_string(
-    "tpu_zone", default=None,
-    help="GCE zone where the Cloud TPU is located in. If not specified, we "
-    "will attempt to automatically detect the GCE project from metadata.")
-flags.DEFINE_bool(
-    "use_tpu", default=False,
-    help="Use TPUs rather than GPU/CPU.")
-flags.DEFINE_enum(
-    "task_name", "cifar10",
-    enum_values=["cifar10", "svhn"],
-    help="The task to use")
-
-# UDA config:
-flags.DEFINE_integer(
-    "sup_size", default=4000,
-    help="Number of supervised pairs to use. "
-    "-1: all training samples. 4000: 4000 supervised examples.")
-flags.DEFINE_integer(
-    "aug_copy", default=0,
-    help="Number of different augmented data generated.")
-flags.DEFINE_integer(
-    "unsup_ratio", default=0,
-    help="The ratio between batch size of unlabeled data and labeled data, "
-    "i.e., unsup_ratio * train_batch_size is the batch_size for unlabeled data."
-    "Do not use the unsupervised objective if set to 0.")
-flags.DEFINE_enum(
-    "tsa", "",
-    enum_values=["", "linear_schedule", "log_schedule", "exp_schedule"],
-    help="anneal schedule of training signal annealing. "
-    "tsa='' means not using TSA. See the paper for other schedules.")
-flags.DEFINE_float(
-    "uda_confidence_thresh", default=-1,
-    help="The threshold on predicted probability on unsupervised data. If set,"
-    "UDA loss will only be calculated on unlabeled examples whose largest"
-    "probability is larger than the threshold")
-flags.DEFINE_float(
-    "uda_softmax_temp", -1,
-    help="The temperature of the Softmax when making prediction on unlabeled"
-    "examples. -1 means to use normal Softmax")
-flags.DEFINE_float(
-    "ent_min_coeff", default=0,
-    help="")
-flags.DEFINE_integer(
-    "unsup_coeff", default=1,
-    help="The coefficient on the UDA loss. "
-    "setting unsup_coeff to 1 works for most settings. "
-    "When you have extermely few samples, consider increasing unsup_coeff")
-
-# Experiment (data/checkpoint/directory) config
-flags.DEFINE_string(
-    "data_dir", default=None,
-    help="Path to data directory containing `*.tfrecords`.")
-flags.DEFINE_string(
-    "model_dir", default=None,
-    help="model dir of the saved checkpoints.")
-flags.DEFINE_bool(
-    "do_train", default=True,
-    help="Whether to run training.")
-flags.DEFINE_bool(
-    "do_eval", default=False,
-    help="Whether to run eval on the test set.")
-flags.DEFINE_integer(
-    "dev_size", default=-1,
-    help="dev set size.")
-flags.DEFINE_bool(
-    "verbose", default=False,
-    help="Whether to print additional information.")
-
-# Training config
-flags.DEFINE_integer(
-    "train_batch_size", default=32,
-    help="Size of train batch.")
-flags.DEFINE_integer(
-    "eval_batch_size", default=8,
-    help="Size of evalation batch.")
-flags.DEFINE_integer(
-    "train_steps", default=100000,
-    help="Total number of training steps.")
-flags.DEFINE_integer(
-    "iterations", default=10000,
-    help="Number of iterations per repeat loop.")
-flags.DEFINE_integer(
-    "save_steps", default=10000,
-    help="number of steps for model checkpointing.")
-flags.DEFINE_integer(
-    "max_save", default=10,
-    help="Maximum number of checkpoints to save.")
-
-# Model config
-flags.DEFINE_enum(
-    "model_name", default="wrn",
-    enum_values=["wrn", "shake_shake_32", "shake_shake_96", "shake_shake_112", "pyramid_net", "LeNet"],
-    help="Name of the model")
-flags.DEFINE_integer(
-    "num_classes", default=10,
-    help="Number of categories for classification.")
-flags.DEFINE_integer(
-    "wrn_size", default=32,
-    help="The size of WideResNet. It should be set to 32 for WRN-28-2"
-    "and should be set to 160 for WRN-28-10")
-
-# Optimization config
-flags.DEFINE_float(
-    "learning_rate", default=0.03,
-    help="Maximum learning rate.")
-flags.DEFINE_float(
-    "weight_decay_rate", default=5e-4,
-    help="Weight decay rate.")
-flags.DEFINE_float(
-    "min_lr_ratio", default=0.004,
-    help="Minimum ratio learning rate.")
-flags.DEFINE_integer(
-    "warmup_steps", default=20000,
-    help="Number of steps for linear lr warmup.")
-
-
-
-FLAGS = tf.flags.FLAGS
-
-arg_scope = tf.contrib.framework.arg_scope
-
-
-def get_tsa_threshold(schedule, global_step, num_train_steps, start, end):
-  step_ratio = tf.to_float(global_step) / tf.to_float(num_train_steps)
-  if schedule == "linear_schedule":
-    coeff = step_ratio
-  elif schedule == "exp_schedule":
-    scale = 5
-    # [exp(-5), exp(0)] = [1e-2, 1]
-    coeff = tf.exp((step_ratio - 1) * scale)
-  elif schedule == "log_schedule":
-    scale = 5
-    # [1 - exp(0), 1 - exp(-5)] = [0, 0.99]
-    coeff = 1 - tf.exp((-step_ratio) * scale)
-  return coeff * (end - start) + start
-
-
-def setup_arg_scopes(is_training):
-  """Sets up the argscopes that will be used when building an image model.
-
-  Args:
-    is_training: Is the model training or not.
-
-  Returns:
-    Arg scopes to be put around the model being constructed.
-  """
-
-  batch_norm_decay = 0.9
-  batch_norm_epsilon = 1e-5
-  batch_norm_params = {
-      # Decay for the moving averages.
-      "decay": batch_norm_decay,
-      # epsilon to prevent 0s in variance.
-      "epsilon": batch_norm_epsilon,
-      "scale": True,
-      # collection containing the moving mean and moving variance.
-      "is_training": is_training,
-  }
-
-  scopes = []
-
-  scopes.append(arg_scope([ops.batch_norm], **batch_norm_params))
-  return scopes
-
-
-def build_model(inputs, num_classes, is_training, update_bn, hparams):
-  """Constructs the vision model being trained/evaled.
-
-  Args:
-    inputs: input features/images being fed to the image model build built.
-    num_classes: number of output classes being predicted.
-    is_training: is the model training or not.
-    hparams: additional hyperparameters associated with the image model.
-
-  Returns:
-    The logits of the image model.
-  """
-  scopes = setup_arg_scopes(is_training)
-
-  try:
-      from contextlib import nested
-  except ImportError:
-      from contextlib import ExitStack, contextmanager
-
-      @contextmanager
-      def nested(*contexts):
-          with ExitStack() as stack:
-              for ctx in contexts:
-                  stack.enter_context(ctx)
-              yield contexts
-
-  with nested(*scopes):
-    if hparams.model_name == "pyramid_net":
-      logits = build_shake_drop_model(
-          inputs, num_classes, is_training)
-    elif hparams.model_name == "wrn":
-      logits = build_wrn_model(
-          inputs, num_classes, hparams.wrn_size, update_bn)
-    elif hparams.model_name == "shake_shake":
-      logits = build_shake_shake_model(
-          inputs, num_classes, hparams, is_training)
-
-    elif hparams.model_name == "LeNet":
-      logits = LeNet(inputs, num_classes)
-
-  return logits
-
-
-def _kl_divergence_with_logits(p_logits, q_logits):
-  p = tf.nn.softmax(p_logits)
-  log_p = tf.nn.log_softmax(p_logits)
-  log_q = tf.nn.log_softmax(q_logits)
-
-  kl = tf.reduce_sum(p * (log_p - log_q), -1)
-  return kl
-
-
-def anneal_sup_loss(sup_logits, sup_labels, sup_loss, global_step, metric_dict):
-  tsa_start = 1. / FLAGS.num_classes
-  eff_train_prob_threshold = get_tsa_threshold(
-      FLAGS.tsa, global_step, FLAGS.train_steps,
-      tsa_start, end=1)
-
-  one_hot_labels = tf.one_hot(
-      sup_labels, depth=FLAGS.num_classes, dtype=tf.float32)
-  sup_probs = tf.nn.softmax(sup_logits, axis=-1)
-  correct_label_probs = tf.reduce_sum(
-      one_hot_labels * sup_probs, axis=-1)
-  larger_than_threshold = tf.greater(
-      correct_label_probs, eff_train_prob_threshold)
-  loss_mask = 1 - tf.cast(larger_than_threshold, tf.float32)
-  loss_mask = tf.stop_gradient(loss_mask)
-  sup_loss = sup_loss * loss_mask
-  avg_sup_loss = (tf.reduce_sum(sup_loss) /
-                  tf.maximum(tf.reduce_sum(loss_mask), 1))
-  metric_dict["sup/sup_trained_ratio"] = tf.reduce_mean(loss_mask)
-  metric_dict["sup/eff_train_prob_threshold"] = eff_train_prob_threshold
-  return sup_loss, avg_sup_loss
-
-
-def get_ent(logits, return_mean=True):
-  log_prob = tf.nn.log_softmax(logits, axis=-1)
-  prob = tf.exp(log_prob)
-  ent = tf.reduce_sum(-prob * log_prob, axis=-1)
-  if return_mean:
-    ent = tf.reduce_mean(ent)
-  return ent
-
-
-def get_model_fn(hparams):
-  def model_fn(features, labels, mode, params):
-    sup_labels = tf.reshape(features["label"], [-1])
-
-    #### Configuring the optimizer
-    global_step = tf.train.get_global_step()
-    metric_dict = {}
-    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
-    if FLAGS.unsup_ratio > 0 and is_training:
-      all_images = tf.concat([features["image"],
-                              features["ori_image"],
-                              features["aug_image"]], 0)
-    else:
-      all_images = features["image"]
-
-    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
-      all_logits = build_model(
-          inputs=all_images,
-          num_classes=FLAGS.num_classes,
-          is_training=is_training,
-          update_bn=True and is_training,
-          hparams=hparams,
-      )
-
-      sup_bsz = tf.shape(features["image"])[0]
-      sup_logits = all_logits[:sup_bsz]
-
-      sup_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
-          labels=sup_labels,
-          logits=sup_logits)
-      sup_prob = tf.nn.softmax(sup_logits, axis=-1)
-      metric_dict["sup/pred_prob"] = tf.reduce_mean(
-          tf.reduce_max(sup_prob, axis=-1))
-    if FLAGS.tsa:
-      sup_loss, avg_sup_loss = anneal_sup_loss(sup_logits, sup_labels, sup_loss,
-                                               global_step, metric_dict)
-    else:
-      avg_sup_loss = tf.reduce_mean(sup_loss)
-    total_loss = avg_sup_loss
-
-    if FLAGS.unsup_ratio > 0 and is_training:
-      aug_bsz = tf.shape(features["ori_image"])[0]
-
-      ori_logits = all_logits[sup_bsz : sup_bsz + aug_bsz]
-      aug_logits = all_logits[sup_bsz + aug_bsz:]
-      if FLAGS.uda_softmax_temp != -1:
-        ori_logits_tgt = ori_logits / FLAGS.uda_softmax_temp
-      else:
-        ori_logits_tgt = ori_logits
-      ori_prob = tf.nn.softmax(ori_logits, axis=-1)
-      aug_prob = tf.nn.softmax(aug_logits, axis=-1)
-      metric_dict["unsup/ori_prob"] = tf.reduce_mean(
-          tf.reduce_max(ori_prob, axis=-1))
-      metric_dict["unsup/aug_prob"] = tf.reduce_mean(
-          tf.reduce_max(aug_prob, axis=-1))
-
-      aug_loss = _kl_divergence_with_logits(
-          p_logits=tf.stop_gradient(ori_logits_tgt),
-          q_logits=aug_logits)
-
-      if FLAGS.uda_confidence_thresh != -1:
-        ori_prob = tf.nn.softmax(ori_logits, axis=-1)
-        largest_prob = tf.reduce_max(ori_prob, axis=-1)
-        loss_mask = tf.cast(tf.greater(
-            largest_prob, FLAGS.uda_confidence_thresh), tf.float32)
-        metric_dict["unsup/high_prob_ratio"] = tf.reduce_mean(loss_mask)
-        loss_mask = tf.stop_gradient(loss_mask)
-        aug_loss = aug_loss * loss_mask
-        metric_dict["unsup/high_prob_loss"] = tf.reduce_mean(aug_loss)
-
-      if FLAGS.ent_min_coeff > 0:
-        ent_min_coeff = FLAGS.ent_min_coeff
-        metric_dict["unsup/ent_min_coeff"] = ent_min_coeff
-        per_example_ent = get_ent(ori_logits)
-        ent_min_loss = tf.reduce_mean(per_example_ent)
-        total_loss = total_loss + ent_min_coeff * ent_min_loss
-
-      avg_unsup_loss = tf.reduce_mean(aug_loss)
-      total_loss += FLAGS.unsup_coeff * avg_unsup_loss
-      metric_dict["unsup/loss"] = avg_unsup_loss
-
-    total_loss = utils.decay_weights(
-        total_loss,
-        FLAGS.weight_decay_rate)
-
-    #### Check model parameters
-    num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
-    tf.logging.info("#params: {}".format(num_params))
-
-    if FLAGS.verbose:
-      format_str = "{{:<{0}s}}\t{{}}".format(
-          max([len(v.name) for v in tf.trainable_variables()]))
-      for v in tf.trainable_variables():
-        tf.logging.info(format_str.format(v.name, v.get_shape()))
-
-    #### Evaluation mode
-    if mode == tf.estimator.ModeKeys.EVAL:
-      #### Metric function for classification
-      def metric_fn(per_example_loss, label_ids, logits):
-        # classification loss & accuracy
-        loss = tf.metrics.mean(per_example_loss)
-
-        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
-        accuracy = tf.metrics.accuracy(label_ids, predictions)
-
-        ret_dict = {
-            "eval/classify_loss": loss,
-            "eval/classify_accuracy": accuracy
-        }
-
-        return ret_dict
-
-      eval_metrics = (metric_fn, [sup_loss, sup_labels, sup_logits])
-
-      #### Constucting evaluation TPUEstimatorSpec.
-      eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
-          mode=mode,
-          loss=total_loss,
-          eval_metrics=eval_metrics)
-
-      return eval_spec
-
-    # increase the learning rate linearly
-    if FLAGS.warmup_steps > 0:
-      warmup_lr = tf.to_float(global_step) / tf.to_float(FLAGS.warmup_steps) \
-                  * FLAGS.learning_rate
-    else:
-      warmup_lr = 0.0
-
-    # decay the learning rate using the cosine schedule
-    decay_lr = tf.train.cosine_decay(
-        FLAGS.learning_rate,
-        global_step=global_step-FLAGS.warmup_steps,
-        decay_steps=FLAGS.train_steps-FLAGS.warmup_steps,
-        alpha=FLAGS.min_lr_ratio)
-
-    learning_rate = tf.where(global_step < FLAGS.warmup_steps,
-                             warmup_lr, decay_lr)
-
-    optimizer = tf.train.MomentumOptimizer(
-        learning_rate=learning_rate,
-        momentum=0.9,
-        use_nesterov=True)
-
-    if FLAGS.use_tpu:
-      optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
-
-    grads_and_vars = optimizer.compute_gradients(total_loss)
-    gradients, variables = zip(*grads_and_vars)
-    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-    with tf.control_dependencies(update_ops):
-      train_op = optimizer.apply_gradients(
-          zip(gradients, variables), global_step=tf.train.get_global_step())
-
-    #### Creating training logging hook
-    # compute accuracy
-    sup_pred = tf.argmax(sup_logits, axis=-1, output_type=sup_labels.dtype)
-    is_correct = tf.to_float(tf.equal(sup_pred, sup_labels))
-    acc = tf.reduce_mean(is_correct)
-    metric_dict["sup/sup_loss"] = avg_sup_loss
-    metric_dict["training/loss"] = total_loss
-    metric_dict["sup/acc"] = acc
-    metric_dict["training/lr"] = learning_rate
-    metric_dict["training/step"] = global_step
-
-    if not FLAGS.use_tpu:
-      log_info = ("step [{training/step}] lr {training/lr:.6f} "
-                  "loss {training/loss:.4f} "
-                  "sup/acc {sup/acc:.4f} sup/loss {sup/sup_loss:.6f} ")
-      if FLAGS.unsup_ratio > 0:
-        log_info += "unsup/loss {unsup/loss:.6f} "
-      formatter = lambda kwargs: log_info.format(**kwargs)
-      logging_hook = tf.train.LoggingTensorHook(
-          tensors=metric_dict,
-          every_n_iter=FLAGS.iterations,
-          formatter=formatter)
-      training_hooks = [logging_hook]
-      #### Constucting training TPUEstimatorSpec.
-      train_spec = tf.contrib.tpu.TPUEstimatorSpec(
-          mode=mode, loss=total_loss, train_op=train_op,
-          training_hooks=training_hooks)
-    else:
-      #### Constucting training TPUEstimatorSpec.
-      host_call = utils.construct_scalar_host_call(
-          metric_dict=metric_dict,
-          model_dir=params["model_dir"],
-          prefix="",
-          reduce_fn=tf.reduce_mean)
-      train_spec = tf.contrib.tpu.TPUEstimatorSpec(
-          mode=mode, loss=total_loss, train_op=train_op,
-          host_call=host_call)
-
-    return train_spec
-
-  return model_fn
-
-
-def train(hparams):
-  ##### Create input function
-  if FLAGS.unsup_ratio == 0:
-    FLAGS.aug_copy = 0
-  if FLAGS.dev_size != -1:
-    FLAGS.do_train = True
-    FLAGS.do_eval = True
-  if FLAGS.do_train:
-    train_input_fn = data.get_input_fn(
-        data_dir=FLAGS.data_dir,
-        split="train",
-        task_name=FLAGS.task_name,
-        sup_size=FLAGS.sup_size,
-        unsup_ratio=FLAGS.unsup_ratio,
-        aug_copy=FLAGS.aug_copy,
-    )
-
-  if FLAGS.do_eval:
-    if FLAGS.dev_size != -1:
-      eval_input_fn = data.get_input_fn(
-          data_dir=FLAGS.data_dir,
-          split="dev",
-          task_name=FLAGS.task_name,
-          sup_size=FLAGS.dev_size,
-          unsup_ratio=0,
-          aug_copy=0)
-      eval_size = FLAGS.dev_size
-    else:
-      eval_input_fn = data.get_input_fn(
-          data_dir=FLAGS.data_dir,
-          split="test",
-          task_name=FLAGS.task_name,
-          sup_size=-1,
-          unsup_ratio=0,
-          aug_copy=0)
-      if FLAGS.task_name == "cifar10":
-        eval_size = 10000
-      elif FLAGS.task_name == "svhn":
-        eval_size = 26032
-      else:
-        assert False, "You need to specify the size of your test set."
-    eval_steps = eval_size // FLAGS.eval_batch_size
-
-  ##### Get model function
-  model_fn = get_model_fn(hparams)
-  estimator = utils.get_TPU_estimator(FLAGS, model_fn)
-
-  #### Training
-  if FLAGS.dev_size != -1:
-    tf.logging.info("***** Running training and validation *****")
-    tf.logging.info("  Supervised batch size = %d", FLAGS.train_batch_size)
-    tf.logging.info("  Unsupervised batch size = %d",
-                    FLAGS.train_batch_size * FLAGS.unsup_ratio)
-    tf.logging.info("  Num train steps = %d", FLAGS.train_steps)
-    curr_step = 0
-    while True:
-      if curr_step >= FLAGS.train_steps:
-        break
-      tf.logging.info("Current step {}".format(curr_step))
-      train_step = min(FLAGS.save_steps, FLAGS.train_steps - curr_step)
-      estimator.train(input_fn=train_input_fn, steps=train_step)
-      estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
-      curr_step += FLAGS.save_steps
-  else:
-    if FLAGS.do_train:
-      tf.logging.info("***** Running training *****")
-      tf.logging.info("  Supervised batch size = %d", FLAGS.train_batch_size)
-      tf.logging.info("  Unsupervised batch size = %d",
-                      FLAGS.train_batch_size * FLAGS.unsup_ratio)
-      estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)
-    if FLAGS.do_eval:
-      tf.logging.info("***** Running evaluation *****")
-      results = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
-      tf.logging.info(">> Results:")
-      for key in results.keys():
-        tf.logging.info("  %s = %s", key, str(results[key]))
-        results[key] = results[key].item()
-      acc = results["eval/classify_accuracy"]
-      with tf.gfile.Open("{}/results.txt".format(FLAGS.model_dir), "w") as ouf:
-        ouf.write(str(acc))
-
-
-def main(_):
-
-  if FLAGS.do_train:
-    tf.gfile.MakeDirs(FLAGS.model_dir)
-    flags_dict = tf.app.flags.FLAGS.flag_values_dict()
-    with tf.gfile.Open(os.path.join(FLAGS.model_dir, "FLAGS.json"), "w") as ouf:
-      json.dump(flags_dict, ouf)
-  hparams = tf.contrib.training.HParams()
-
-  if FLAGS.model_name == "wrn":
-    hparams.add_hparam("model_name", "wrn")
-    hparams.add_hparam("wrn_size", FLAGS.wrn_size)
-  elif FLAGS.model_name == "shake_shake_32":
-    hparams.add_hparam("model_name", "shake_shake")
-    hparams.add_hparam("shake_shake_widen_factor", 2)
-  elif FLAGS.model_name == "shake_shake_96":
-    hparams.add_hparam("model_name", "shake_shake")
-    hparams.add_hparam("shake_shake_widen_factor", 6)
-  elif FLAGS.model_name == "shake_shake_112":
-    hparams.add_hparam("model_name", "shake_shake")
-    hparams.add_hparam("shake_shake_widen_factor", 7)
-  elif FLAGS.model_name == "pyramid_net":
-    hparams.add_hparam("model_name", "pyramid_net")
-
-  elif FLAGS.model_name == "LeNet":
-    hparams.add_hparam("model_name", "LeNet")
-
-  else:
-    raise ValueError("Not Valid Model Name: %s" % FLAGS.model_name)
-
-  train(hparams)
-
-
-if __name__ == "__main__":
-  tf.logging.set_verbosity(tf.logging.INFO)
-  tf.app.run()
--- a/Old/UDA/run_cifar10_gpu.sh
+++ b/Old/UDA/run_cifar10_gpu.sh
@ -1,31 +0,0 @@
-# coding=utf-8
-# Copyright 2019 The Google UDA Team Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#!/bin/bash
-
-task_name=cifar10
-
-python main.py \
-  --model_name="LeNet"\
-  --use_tpu=False \
-  --do_train=True \
-  --do_eval=True \
-  --task_name=${task_name} \
-  --sup_size=4000 \
-  --unsup_ratio=5 \
-  --train_batch_size=32 \
-  --data_dir=data/proc_data/${task_name} \
-  --model_dir=ckpt/cifar10_gpu \
-  --train_steps=400000 \
-  $@
--- a/Old/augmentations_randaugment.py
+++ b/Old/augmentations_randaugment.py
@ -1,271 +0,0 @@
-# code in this file is adpated from rpmcruz/autoaugment
-# https://github.com/rpmcruz/autoaugment/blob/master/transformations.py
-import random
-
-import PIL, PIL.ImageOps, PIL.ImageEnhance, PIL.ImageDraw
-import numpy as np
-import torch
-from PIL import Image
-
-def ShearX(img, v):  # [-0.3, 0.3]
-    assert -0.3 <= v <= 0.3
-    if random.random() > 0.5:
-        v = -v
-    return img.transform(img.size, PIL.Image.AFFINE, (1, v, 0, 0, 1, 0))
-
-
-def ShearY(img, v):  # [-0.3, 0.3]
-    assert -0.3 <= v <= 0.3
-    if random.random() > 0.5:
-        v = -v
-    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, v, 1, 0))
-
-
-def TranslateX(img, v):  # [-150, 150] => percentage: [-0.45, 0.45]
-    assert -0.45 <= v <= 0.45
-    if random.random() > 0.5:
-        v = -v
-    v = v * img.size[0]
-    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))
-
-
-def TranslateXabs(img, v):  # [-150, 150] => percentage: [-0.45, 0.45]
-    assert 0 <= v
-    if random.random() > 0.5:
-        v = -v
-    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))
-
-
-def TranslateY(img, v):  # [-150, 150] => percentage: [-0.45, 0.45]
-    assert -0.45 <= v <= 0.45
-    if random.random() > 0.5:
-        v = -v
-    v = v * img.size[1]
-    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))
-
-
-def TranslateYabs(img, v):  # [-150, 150] => percentage: [-0.45, 0.45]
-    assert 0 <= v
-    if random.random() > 0.5:
-        v = -v
-    return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))
-
-
-def Rotate(img, v):  # [-30, 30]
-    assert -30 <= v <= 30
-    if random.random() > 0.5:
-        v = -v
-    return img.rotate(v)
-
-
-def AutoContrast(img, _):
-    return PIL.ImageOps.autocontrast(img)
-
-
-def Invert(img, _):
-    return PIL.ImageOps.invert(img)
-
-
-def Equalize(img, _):
-    return PIL.ImageOps.equalize(img)
-
-
-def Flip(img, _):  # not from the paper
-    return PIL.ImageOps.mirror(img)
-
-def FlipLR(img, v):
-    return img.transpose(Image.FLIP_LEFT_RIGHT)
-
-def FlipUD(img, v):
-    return img.transpose(Image.FLIP_TOP_BOTTOM)
-
-def Solarize(img, v):  # [0, 256]
-    assert 0 <= v <= 256
-    return PIL.ImageOps.solarize(img, v)
-
-
-def SolarizeAdd(img, addition=0, threshold=128):
-    img_np = np.array(img).astype(np.int)
-    img_np = img_np + addition
-    img_np = np.clip(img_np, 0, 255)
-    img_np = img_np.astype(np.uint8)
-    img = Image.fromarray(img_np)
-    return PIL.ImageOps.solarize(img, threshold)
-
-
-def Posterize(img, v):  # [4, 8]
-    v = int(v)
-    v = max(1, v)
-    return PIL.ImageOps.posterize(img, v)
-
-
-def Contrast(img, v):  # [0.1,1.9]
-    assert 0.1 <= v <= 1.9
-    return PIL.ImageEnhance.Contrast(img).enhance(v)
-
-
-def Color(img, v):  # [0.1,1.9]
-    assert 0.1 <= v <= 1.9
-    return PIL.ImageEnhance.Color(img).enhance(v)
-
-
-def Brightness(img, v):  # [0.1,1.9]
-    assert 0.1 <= v <= 1.9
-    return PIL.ImageEnhance.Brightness(img).enhance(v)
-
-
-def Sharpness(img, v):  # [0.1,1.9]
-    assert 0.1 <= v <= 1.9
-    return PIL.ImageEnhance.Sharpness(img).enhance(v)
-
-
-def Cutout(img, v):  # [0, 60] => percentage: [0, 0.2]
-    assert 0.0 <= v <= 0.2
-    if v <= 0.:
-        return img
-
-    v = v * img.size[0]
-    return CutoutAbs(img, v)
-
-
-def CutoutAbs(img, v):  # [0, 60] => percentage: [0, 0.2]
-    # assert 0 <= v <= 20
-    if v < 0:
-        return img
-    w, h = img.size
-    x0 = np.random.uniform(w)
-    y0 = np.random.uniform(h)
-
-    x0 = int(max(0, x0 - v / 2.))
-    y0 = int(max(0, y0 - v / 2.))
-    x1 = min(w, x0 + v)
-    y1 = min(h, y0 + v)
-
-    xy = (x0, y0, x1, y1)
-    color = (125, 123, 114)
-    # color = (0, 0, 0)
-    img = img.copy()
-    PIL.ImageDraw.Draw(img).rectangle(xy, color)
-    return img
-
-
-def SamplePairing(imgs):  # [0, 0.4]
-    def f(img1, v):
-        i = np.random.choice(len(imgs))
-        img2 = PIL.Image.fromarray(imgs[i])
-        return PIL.Image.blend(img1, img2, v)
-
-    return f
-
-
-def Identity(img, v):
-    return img
-
-
-def augment_list():  # 16 oeprations and their ranges
-    # https://github.com/google-research/uda/blob/master/image/randaugment/policies.py#L57
-    l = [
-        (Identity, 0., 1.0),
-        (FlipUD, 0., 1.0),
-        (FlipLR, 0., 1.0),
-        (Rotate, 0, 30),  # 4
-        (TranslateX, 0., 0.33),  # 2
-        (TranslateY, 0., 0.33),  # 3
-        (ShearX, 0., 0.3),  # 0
-        (ShearY, 0., 0.3),  # 1
-        #(AutoContrast, 0, 1),  # 5
-        #(Invert, 0, 1),  # 6
-        #(Equalize, 0, 1),  # 7
-        (Contrast, 0.1, 1.9),  # 10
-        (Color, 0.1, 1.9),  # 11
-        (Brightness, 0.1, 1.9),  # 12
-        (Sharpness, 0.1, 1.9),  # 13
-        (Posterize, 4, 8),  # 9
-        (Solarize, 1, 256),  # 8
-        
-        # (Cutout, 0, 0.2),  # 14
-        # (SamplePairing(imgs), 0, 0.4),  # 15
-    ]
-
-    # https://github.com/tensorflow/tpu/blob/8462d083dd89489a79e3200bcc8d4063bf362186/models/official/efficientnet/autoaugment.py#L505
-    #l = [
-    #    (AutoContrast, 0, 1),
-    #    (Equalize, 0, 1),
-    #    (Invert, 0, 1),
-    #    (Rotate, 0, 30),
-    #    (Posterize, 0, 4),
-    #    (Solarize, 0, 256),
-    #    (SolarizeAdd, 0, 110),
-    #    (Color, 0.1, 1.9),
-    #    (Contrast, 0.1, 1.9),
-    #    (Brightness, 0.1, 1.9),
-    #    (Sharpness, 0.1, 1.9),
-    #    (ShearX, 0., 0.3),
-    #    (ShearY, 0., 0.3),
-    #    (CutoutAbs, 0, 40),
-    #    (TranslateXabs, 0., 100),
-    #    (TranslateYabs, 0., 100),
-    #]
-
-    return l
-
-
-class Lighting(object):
-    """Lighting noise(AlexNet - style PCA - based noise)"""
-
-    def __init__(self, alphastd, eigval, eigvec):
-        self.alphastd = alphastd
-        self.eigval = torch.Tensor(eigval)
-        self.eigvec = torch.Tensor(eigvec)
-
-    def __call__(self, img):
-        if self.alphastd == 0:
-            return img
-
-        alpha = img.new().resize_(3).normal_(0, self.alphastd)
-        rgb = self.eigvec.type_as(img).clone() \
-            .mul(alpha.view(1, 3).expand(3, 3)) \
-            .mul(self.eigval.view(1, 3).expand(3, 3)) \
-            .sum(1).squeeze()
-
-        return img.add(rgb.view(3, 1, 1).expand_as(img))
-
-
-class CutoutDefault(object):
-    """
-    Reference : https://github.com/quark0/darts/blob/master/cnn/utils.py
-    """
-    def __init__(self, length):
-        self.length = length
-
-    def __call__(self, img):
-        h, w = img.size(1), img.size(2)
-        mask = np.ones((h, w), np.float32)
-        y = np.random.randint(h)
-        x = np.random.randint(w)
-
-        y1 = np.clip(y - self.length // 2, 0, h)
-        y2 = np.clip(y + self.length // 2, 0, h)
-        x1 = np.clip(x - self.length // 2, 0, w)
-        x2 = np.clip(x + self.length // 2, 0, w)
-
-        mask[y1: y2, x1: x2] = 0.
-        mask = torch.from_numpy(mask)
-        mask = mask.expand_as(img)
-        img *= mask
-        return img
-
-PARAMETER_MAX = 1
-class RandAugment:
-    def __init__(self, n, m):
-        self.n = n
-        self.m = m      # [0, PARAMETER_MAX]
-        self.augment_list = augment_list()
-
-    def __call__(self, img):
-        ops = random.choices(self.augment_list, k=self.n)
-        for op, minval, maxval in ops:
-            val = (float(self.m) / PARAMETER_MAX) * float(maxval - minval) + minval
-            img = op(img, val)
-
-        return img
--- a/Old/salvador/cams.py
+++ b/Old/salvador/cams.py
@ -1,98 +0,0 @@
-import torch
-import numpy as np
-import torchvision
-from PIL import Image
-from torch import topk
-import torch.nn.functional as F
-from torch import topk
-import cv2
-from torchvision import transforms
-import os
-
-class SaveFeatures():
-    features=None
-    def __init__(self, m): self.hook = m.register_forward_hook(self.hook_fn)
-    def hook_fn(self, module, input, output): self.features = ((output.cpu()).data).numpy()
-    def remove(self): self.hook.remove()
-
-def getCAM(feature_conv, weight_fc, class_idx):
-    _, nc, h, w = feature_conv.shape
-    cam = weight_fc[class_idx].dot(feature_conv.reshape((nc, h*w)))
-    cam = cam.reshape(h, w)
-    cam = cam - np.min(cam)
-    cam_img = cam / np.max(cam)
-    # cam_img = np.uint8(255 * cam_img)
-    return cam_img
-
-def main(cam):
-    device = 'cuda:0'
-    model_name = 'resnet50'
-    root = 'NEW_SS'
-    
-    os.makedirs(os.path.join(root + '_CAM', 'OK'), exist_ok=True)
-    os.makedirs(os.path.join(root + '_CAM', 'NOK'), exist_ok=True)
-
-    train_transform = transforms.Compose([
-        transforms.ToTensor(),
-    ])
-
-    dataset = torchvision.datasets.ImageFolder(
-        root=root, transform=train_transform,
-    )
-
-    loader = torch.utils.data.DataLoader(dataset, batch_size=1)
-
-    model = torchvision.models.__dict__[model_name](pretrained=False)
-    model.fc = torch.nn.Linear(model.fc.in_features, 2)
-
-    model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
-    model = model.to(device)
-    model.eval()
-
-    weight_softmax_params = list(model._modules.get('fc').parameters())
-    weight_softmax = np.squeeze(weight_softmax_params[0].cpu().data.numpy())
-
-    final_layer = model._modules.get('layer4')
-
-    activated_features = SaveFeatures(final_layer)
-
-    for i, (img, target ) in enumerate(loader):
-        img = img.to(device)
-        prediction = model(img)
-        pred_probabilities = F.softmax(prediction, dim=1).data.squeeze()
-        class_idx = topk(pred_probabilities,1)[1].int()
-        # if target.item() != class_idx:
-        #     print(dataset.imgs[i][0])
-
-        if cam:
-            overlay = getCAM(activated_features.features, weight_softmax, class_idx )
-
-            import ipdb; ipdb.set_trace()
-            import PIL 
-            from torchvision.transforms import ToPILImage
-
-            img = ToPILImage()(overlay).resize(size=(1280, 1024), resample=PIL.Image.BILINEAR)
-            img.save('heat-pil.jpg')
-
-
-            img = cv2.imread(dataset.imgs[i][0])
-            height, width, _ = img.shape
-            overlay = cv2.resize(overlay, (width, height))
-            heatmap = cv2.applyColorMap(overlay, cv2.COLORMAP_JET)
-            cv2.imwrite('heat-cv2.jpg', heatmap)
-
-            img = cv2.imread(dataset.imgs[i][0])
-            height, width, _ = img.shape
-            overlay = cv2.resize(overlay, (width, height))
-            heatmap = cv2.applyColorMap(overlay, cv2.COLORMAP_JET)
-            result = heatmap * 0.3 + img * 0.5
-
-            clss = dataset.imgs[i][0].split(os.sep)[1]
-            name = dataset.imgs[i][0].split(os.sep)[2].split('.')[0]
-            cv2.imwrite(os.path.join(root+"_CAM", clss, name + '.jpg'), result)
-            print(f'{os.path.join(root+"_CAM", clss, name + ".jpg")} saved')
-    
-    activated_features.remove()
-
-if __name__ == "__main__":
-    main(cam=True)
--- a/Old/salvador/checkpoint.pt
+++ b/Old/salvador/checkpoint.pt
--- a/Old/salvador/dataug.py
+++ b/Old/salvador/dataug.py
--- a/Old/salvador/dataug_utils.py
+++ b/Old/salvador/dataug_utils.py
@ -1,314 +0,0 @@
-import numpy as np
-import json, math, time, os
-import matplotlib.pyplot as plt
-import copy
-import gc
-
-from torchviz import make_dot 
-
-import torch
-import torch.nn.functional as F
-
-import time
-
-class timer():
-    def __init__(self):
-        self._start_time=time.time()
-    def exec_time(self):
-        end = time.time()
-        res = end-self._start_time
-        self._start_time=end
-        return res
-
-def print_graph(PyTorch_obj, fig_name='graph'):
-    graph=make_dot(PyTorch_obj) #Loss give the whole graph
-    graph.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
-    graph.render(fig_name)
-
-def plot_res(log, fig_name='res', param_names=None):
-
-    epochs = [x["epoch"] for x in log]
-
-    fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
-
-    ax[0].set_title('Loss')
-    ax[0].plot(epochs,[x["train_loss"] for x in log], label='Train')
-    ax[0].plot(epochs,[x["val_loss"] for x in log], label='Val')
-    ax[0].legend()
-        
-    ax[1].set_title('Acc')
-    ax[1].plot(epochs,[x["acc"] for x in log]) 
-
-    if log[0]["param"]!= None:
-        if isinstance(log[0]["param"],float):
-            ax[2].set_title('Mag')
-            ax[2].plot(epochs,[x["param"] for x in log], label='Mag')
-            ax[2].legend()
-        else :
-            ax[2].set_title('Prob')
-            #for idx, _ in enumerate(log[0]["param"]):
-                #ax[2].plot(epochs,[x["param"][idx] for x in log], label='P'+str(idx))
-            if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])]
-            proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])]
-            ax[2].stackplot(epochs, proba, labels=param_names)
-            ax[2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5)) 
-            
-
-    fig_name = fig_name.replace('.',',')
-    plt.savefig(fig_name)
-    plt.close()
-
-def plot_resV2(log, fig_name='res', param_names=None):
-
-    epochs = [x["epoch"] for x in log]
-
-    fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(30, 15))
-
-    ax[0, 0].set_title('Loss')
-    ax[0, 0].plot(epochs,[x["train_loss"] for x in log], label='Train')
-    ax[0, 0].plot(epochs,[x["val_loss"] for x in log], label='Val')
-    ax[0, 0].legend()
-        
-    ax[1, 0].set_title('Acc')
-    ax[1, 0].plot(epochs,[x["acc"] for x in log]) 
-
-    if log[0]["param"]!= None:
-        if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])]
-        #proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])]
-        proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])]
-        mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])]
-
-        ax[0, 1].set_title('Prob =f(epoch)')
-        ax[0, 1].stackplot(epochs, proba, labels=param_names)
-        #ax[0, 1].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5)) 
-
-        ax[1, 1].set_title('Prob =f(TF)')
-        mean = np.mean(proba, axis=1)
-        std = np.std(proba, axis=1)
-        ax[1, 1].bar(param_names, mean, yerr=std)
-        plt.sca(ax[1, 1]), plt.xticks(rotation=90)
-
-        ax[0, 2].set_title('Mag =f(epoch)')
-        ax[0, 2].stackplot(epochs, mag, labels=param_names)
-        ax[0, 2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5)) 
-
-        ax[1, 2].set_title('Mag =f(TF)')
-        mean = np.mean(mag, axis=1)
-        std = np.std(mag, axis=1)
-        ax[1, 2].bar(param_names, mean, yerr=std)
-        plt.sca(ax[1, 2]), plt.xticks(rotation=90)
-            
-
-    fig_name = fig_name.replace('.',',')
-    plt.savefig(fig_name, bbox_inches='tight')
-    plt.close()
-
-def plot_compare(filenames, fig_name='res'):
-
-    all_data=[]
-    legend=""
-    for idx, file in enumerate(filenames):
-        legend+=str(idx)+'-'+file+'\n'
-        with open(file) as json_file:
-            data = json.load(json_file)
-            all_data.append(data)
-
-    fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
-
-    for data_idx, log in enumerate(all_data):            
-        log=log['Log']
-        epochs = [x["epoch"] for x in log]
-
-        ax[0].plot(epochs,[x["train_loss"] for x in log], label=str(data_idx)+'-Train')
-        ax[0].plot(epochs,[x["val_loss"] for x in log], label=str(data_idx)+'-Val')
-            
-        ax[1].plot(epochs,[x["acc"] for x in log], label=str(data_idx)) 
-        #ax[1].text(x=0.5,y=0,s=str(data_idx)+'-'+filenames[data_idx], transform=ax[1].transAxes)
-
-        if log[0]["param"]!= None:
-            if isinstance(log[0]["param"],float):
-                ax[2].plot(epochs,[x["param"] for x in log], label=str(data_idx)+'-Mag')
-                
-            else :
-                for idx, _ in enumerate(log[0]["param"]):
-                    ax[2].plot(epochs,[x["param"][idx] for x in log], label=str(data_idx)+'-P'+str(idx))
-
-    fig.suptitle(legend)
-    ax[0].set_title('Loss')
-    ax[1].set_title('Acc')
-    ax[2].set_title('Param')
-    for a in ax: a.legend()
-
-    fig_name = fig_name.replace('.',',')
-    plt.savefig(fig_name, bbox_inches='tight')
-    plt.close()
-
-def plot_res_compare(filenames, fig_name='res'):
-
-    all_data=[]
-    #legend=""
-    for idx, file in enumerate(filenames):
-        #legend+=str(idx)+'-'+file+'\n'
-        with open(file) as json_file:
-            data = json.load(json_file)
-            all_data.append(data)
-
-    n_tf = [len(x["Param_names"]) for x in all_data]
-    acc = [x["Accuracy"] for x in all_data]
-    time = [x["Time"][0] for x in all_data]
-
-    fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
-
-    ax[0].plot(n_tf, acc)
-    ax[1].plot(n_tf, time)
-
-    ax[0].set_title('Acc')
-    ax[1].set_title('Time')
-    #for a in ax: a.legend()
-
-    fig_name = fig_name.replace('.',',')
-    plt.savefig(fig_name, bbox_inches='tight')
-    plt.close()
-
-def plot_TF_res(log, tf_names, fig_name='res'):
-
-    mean = np.mean([x["param"] for x in log], axis=0)
-    std = np.std([x["param"] for x in log], axis=0)
-
-    fig, ax = plt.subplots(1, 1, figsize=(30, 8), sharey=True)
-    ax.bar(tf_names, mean, yerr=std)
-    #ax.bar(tf_names, log[-1]["param"])
-
-    fig_name = fig_name.replace('.',',')
-    plt.savefig(fig_name, bbox_inches='tight')
-    plt.close()
-
-def viz_sample_data(imgs, labels, fig_name='data_sample'):
-
-    sample = imgs[0:25,].permute(0, 2, 3, 1).squeeze().cpu()
-
-    plt.figure(figsize=(10,10))
-    for i in range(25):
-        plt.subplot(5,5,i+1)
-        plt.xticks([])
-        plt.yticks([])
-        plt.grid(False)
-        plt.imshow(sample[i,].detach().numpy(), cmap=plt.cm.binary)
-        plt.xlabel(labels[i].item())
-
-    plt.savefig(fig_name)
-    print("Sample saved :", fig_name)
-    plt.close()
-
-def model_copy(src,dst, patch_copy=True, copy_grad=True):
-    #model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats)
-
-    dst.load_state_dict(src.state_dict()) #Do not copy gradient ! 
-
-    if patch_copy:
-        dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ?
-        dst['data_aug'].load_state_dict(src['data_aug'].state_dict())
-
-    #Copie des gradients
-    if copy_grad:
-        for paramName, paramValue, in src.named_parameters():
-          for netCopyName, netCopyValue, in dst.named_parameters():
-            if paramName == netCopyName:
-              netCopyValue.grad = paramValue.grad
-              #netCopyValue=copy.deepcopy(paramValue)
-
-    try: #Data_augV4
-        dst['data_aug']._input_info = src['data_aug']._input_info 
-        dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix
-    except:
-        pass
-
-def optim_copy(dopt, opt):
-
-    #inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state...
-    #opt_param=higher.optim.get_trainable_opt_params(diffopt)
-
-    for group_idx, group in enumerate(opt.param_groups):
-       # print('gp idx',group_idx)
-        for p_idx, p in enumerate(group['params']):
-            opt.state[p]=dopt.state[group_idx][p_idx]
-
-def print_torch_mem(add_info=''):
-
-    nb=0
-    max_size=0
-    for obj in gc.get_objects():
-        #print(type(obj))
-        try:
-            if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
-                #print(i, type(obj), obj.size())
-                size = np.sum(obj.size())
-                if(size>max_size): max_size=size
-                nb+=1
-        except:
-            pass
-    print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
-
-    #print(add_info, "-Garbage size :",len(gc.garbage))
-
-    """Simple GPU memory report."""
-
-    mega_bytes = 1024.0 * 1024.0
-    string = add_info + ' memory (MB)'
-    string += ' | allocated: {}'.format(
-        torch.cuda.memory_allocated() / mega_bytes)
-    string += ' | max allocated: {}'.format(
-        torch.cuda.max_memory_allocated() / mega_bytes)
-    string += ' | cached: {}'.format(torch.cuda.memory_cached() / mega_bytes)
-    string += ' | max cached: {}'.format(
-        torch.cuda.max_memory_cached()/ mega_bytes)
-    print(string)
-
-def plot_TF_influence(log, fig_name='TF_influence', param_names=None):
-    proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])]
-    mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])]
-
-    plt.figure()
-
-    mean = np.mean(proba, axis=1)*np.mean(mag, axis=1) #Pourrait etre interessant de multiplier avant le mean
-    std = np.std(proba, axis=1)*np.std(mag, axis=1)
-    plt.bar(param_names, mean, yerr=std)
-
-    plt.xticks(rotation=90)
-    fig_name = fig_name.replace('.',',')
-    plt.savefig(fig_name, bbox_inches='tight')
-    plt.close()
-
-class loss_monitor(): #Voir https://github.com/pytorch/ignite
-    def __init__(self, patience, end_train=1):
-        self.patience = patience
-        self.end_train = end_train
-        self.counter = 0
-        self.best_score = None
-        self.reached_limit = 0
-
-    def register(self, loss):
-        if self.best_score is None:
-            self.best_score = loss
-        elif loss > self.best_score:
-            self.counter += 1
-            #if not self.reached_limit: 
-            print("loss no improve counter", self.counter, self.reached_limit)
-        else:
-            self.best_score = loss
-            self.counter = 0
-    def limit_reached(self):
-        if self.counter >= self.patience:
-            self.counter = 0
-            self.reached_limit +=1
-            self.best_score = None
-        return self.reached_limit
-
-    def end_training(self):
-        if self.limit_reached() >= self.end_train:
-            return True
-        else:
-            return False
-
-    def reset(self):
-        self.__init__(self.patience, self.end_train)
--- a/Old/salvador/grad_cam.py
+++ b/Old/salvador/grad_cam.py
@ -1,102 +0,0 @@
-import torch
-import numpy as np
-import torchvision
-from PIL import Image
-from torch import topk
-from torch import nn
-import torch.nn.functional as F
-from torch import topk
-import cv2
-from torchvision import transforms
-import os
-
-class Lambda(nn.Module):
-    "Create a layer that simply calls `func` with `x`"
-    def __init__(self, func): 
-        super().__init__()
-        self.func=func
-    def forward(self, x): return self.func(x)
-
-class SaveFeatures():
-    activations, gradients = None, None
-    def __init__(self, m): 
-        self.forward = m.register_forward_hook(self.forward_hook_fn)
-        self.backward = m.register_backward_hook(self.backward_hook_fn)
-
-    def forward_hook_fn(self, module, input, output): 
-        self.activations = output.cpu().detach()
-
-    def backward_hook_fn(self, module, grad_input, grad_output): 
-        self.gradients = grad_output[0].cpu().detach()
-
-    def remove(self): 
-        self.forward.remove()
-        self.backward.remove()
-
-def main(cam):
-    device = 'cuda:0'
-    model_name = 'resnet50'
-    root = '/mnt/md0/data/cifar10/tmp/cifar/train'
-    _root = 'cifar'
-    
-    os.makedirs(os.path.join(_root + '_CAM'), exist_ok=True)
-    os.makedirs(os.path.join(_root + '_CAM'), exist_ok=True)
-
-    train_transform = transforms.Compose([
-        transforms.ToTensor(),
-    ])
-
-    dataset = torchvision.datasets.ImageFolder(
-        root=root, transform=train_transform,
-    )
-
-    loader = torch.utils.data.DataLoader(dataset, batch_size=1)
-    model = torchvision.models.__dict__[model_name](pretrained=True)
-    flat = list(model.children())
-    body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(loader.dataset.classes))) 
-    model = nn.Sequential(body, head)
-
-    model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
-    model = model.to(device)
-    model.eval()
-
-    activated_features = SaveFeatures(model[0])
-
-    for i, (img, target ) in enumerate(loader):
-        img = img.to(device)
-        pred = model(img)
-        import ipdb; ipdb.set_trace()
-        # get the gradient of the output with respect to the parameters of the model
-        pred[:, target.item()].backward()
-
-        # import ipdb; ipdb.set_trace()
-        # pull the gradients out of the model
-        gradients = activated_features.gradients[0]
-
-        pooled_gradients = gradients.mean(1).mean(1)
-
-        # get the activations of the last convolutional layer
-        activations = activated_features.activations[0]
-
-        heatmap = F.relu(((activations*pooled_gradients[...,None,None])).sum(0))
-        heatmap /= torch.max(heatmap)
-
-        heatmap = heatmap.numpy()
-
-        
-        image = cv2.imread(dataset.imgs[i][0])
-        heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]))
-        heatmap = np.uint8(255 * heatmap)
-        heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
-        # superimposed_img = heatmap * 0.3 + image * 0.5
-        superimposed_img = heatmap 
-
-        clss = dataset.imgs[i][0].split(os.sep)[1]
-        name = dataset.imgs[i][0].split(os.sep)[2].split('.')[0]
-        cv2.imwrite(os.path.join(_root+"_CAM", name + '.jpg'), superimposed_img)
-        print(f'{os.path.join(_root+"_CAM", name + ".jpg")} saved')
-    
-    activated_features.remove()
-
-if __name__ == "__main__":
-    main(cam=True)
--- a/Old/salvador/train.py
+++ b/Old/salvador/train.py
@ -1,382 +0,0 @@
-import datetime
-import os
-import time
-import sys
-
-import torch
-import torch.utils.data
-from torch import nn
-import torchvision
-from torchvision import transforms
-from PIL import ImageEnhance
-import random
-
-import utils
-from fastprogress import master_bar, progress_bar
-import numpy as np
-
-## DATA AUG ##
-import higher
-from dataug import *
-from dataug_utils import *
-tf_names = [
-    ## Geometric TF ##
-    'Identity',
-    'FlipUD',
-    'FlipLR',
-    'Rotate',
-    'TranslateX',
-    'TranslateY',
-    'ShearX',
-    'ShearY',
-
-    ## Color TF (Expect image in the range of [0, 1]) ##
-    #'Contrast',
-    #'Color',
-    #'Brightness',
-    #'Sharpness',
-    #'Posterize',
-    #'Solarize', #=>Image entre [0,1] #Pas opti pour des batch
-]
-
-class Lambda(nn.Module):
-    "Create a layer that simply calls `func` with `x`"
-    def __init__(self, func): 
-        super().__init__()
-        self.func=func
-    def forward(self, x): return self.func(x)
-
-class SubsetSampler(torch.utils.data.SubsetRandomSampler):
-    def __init__(self, indices):
-        super().__init__(indices)
-
-    def __iter__(self):
-        return (self.indices[i] for i in range(len(self.indices)))
-
-    def __len__(self):
-        return len(self.indices)
-
-def sharpness(img, factor):
-    sharpness_factor = random.uniform(1, factor)
-    sharp = ImageEnhance.Sharpness(img)
-    sharped = sharp.enhance(sharpness_factor)
-    return sharped
-
-def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, master_bar, Kldiv=False):
-    model.train()
-    metric_logger = utils.MetricLogger(delimiter=" ")
-    confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
-    header = 'Epoch: {}'.format(epoch)
-    for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=master_bar):
-
-        image, target = image.to(device), target.to(device)
-
-        if not Kldiv :
-            output = model(image)
-            #output = F.log_softmax(output, dim=1)
-            loss = criterion(output, target) #Pas de softmax ?
-
-        else : #Consume x2 memory
-            model.augment(mode=False)
-            output = model(image)
-            model.augment(mode=True)
-            log_sup=F.log_softmax(output, dim=1)
-            sup_loss = F.cross_entropy(log_sup, target)
-
-            aug_output = model(image)
-            log_aug=F.log_softmax(aug_output, dim=1)
-            aug_loss=F.cross_entropy(log_aug, target)
-
-            #KL div w/ logits - Similarite predictions (distributions)
-            KL_loss = F.softmax(output, dim=1)*(log_sup-log_aug)
-            KL_loss = KL_loss.sum(dim=-1)
-            #KL_loss = F.kl_div(aug_logits, sup_logits, reduction='none')
-            KL_loss = KL_loss.mean()
-
-            unsupp_coeff = 1
-            loss = sup_loss + (aug_loss + KL_loss) * unsupp_coeff
-            #print(sup_loss.item(), (aug_loss + KL_loss).item())
-
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-
-        acc1 = utils.accuracy(output, target)[0]
-        batch_size = image.shape[0]
-        metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
-        metric_logger.update(loss=loss.item())
-
-        confmat.update(target.flatten(), output.argmax(1).flatten())
-
-
-    return metric_logger.loss.global_avg, confmat
-
-
-def evaluate(model, criterion, data_loader, device):
-    model.eval()
-    metric_logger = utils.MetricLogger(delimiter="  ")
-    confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
-    header = 'Test:'
-    missed = []
-    with torch.no_grad():
-        for i, (image, target) in metric_logger.log_every(data_loader, leave=False, header=header, parent=None):
-            image, target = image.to(device), target.to(device)
-            output = model(image)
-            loss = criterion(output, target)
-            if target.item() != output.topk(1)[1].item():
-                missed.append(data_loader.dataset.imgs[data_loader.sampler.indices[i]])
-
-            confmat.update(target.flatten(), output.argmax(1).flatten())
-
-            acc1 = utils.accuracy(output, target)[0]
-            batch_size = image.shape[0]
-            metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
-            metric_logger.update(loss=loss.item())
-
-
-    return metric_logger.loss.global_avg, missed, confmat
-
-def get_train_valid_loader(args, augment, random_seed, valid_size=0.1, shuffle=True, num_workers=4, pin_memory=True):
-    """
-    Utility function for loading and returning train and valid
-    multi-process iterators over the CIFAR-10 dataset. A sample
-    9x9 grid of the images can be optionally displayed.
-    If using CUDA, num_workers should be set to 1 and pin_memory to True.
-    Params
-    ------
-    - data_dir: path directory to the dataset.
-    - batch_size: how many samples per batch to load.
-    - augment: whether to apply the data augmentation scheme
-      mentioned in the paper. Only applied on the train split.
-    - random_seed: fix seed for reproducibility.
-    - valid_size: percentage split of the training set used for
-      the validation set. Should be a float in the range [0, 1].
-    - shuffle: whether to shuffle the train/validation indices.
-    - show_sample: plot 9x9 sample grid of the dataset.
-    - num_workers: number of subprocesses to use when loading the dataset.
-    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
-      True if using GPU.
-    Returns
-    -------
-    - train_loader: training set iterator.
-    - valid_loader: validation set iterator.
-    """
-    error_msg = "[!] valid_size should be in the range [0, 1]."
-    assert ((valid_size >= 0) and (valid_size <= 1)), error_msg
-
-    # normalize = transforms.Normalize(
-    #     mean=[0.4914, 0.4822, 0.4465],
-    #     std=[0.2023, 0.1994, 0.2010],
-    # )
-
-    # define transforms
-    if augment:
-        train_transform = transforms.Compose([
-            # transforms.ColorJitter(brightness=0.3),
-            # transforms.Lambda(lambda img: sharpness(img, 5)),
-            transforms.RandomHorizontalFlip(),
-            transforms.ToTensor(),
-            # normalize,
-        ])
-
-        valid_transform = transforms.Compose([
-                # transforms.ColorJitter(brightness=0.3),
-                # transforms.RandomHorizontalFlip(),
-                transforms.ToTensor(),
-                # normalize,
-        ])
-    else:
-        train_transform = transforms.Compose([
-            transforms.ToTensor(),
-            # normalize,
-        ])
-
-        valid_transform = transforms.Compose([
-            transforms.ToTensor(),
-            # normalize,
-        ])
-
-
-    # load the dataset
-    train_dataset = torchvision.datasets.ImageFolder(
-        root=args.data_path, transform=train_transform
-    )
-
-    valid_dataset = torchvision.datasets.ImageFolder(
-        root=args.data_path, transform=valid_transform
-    )
-
-    num_train = len(train_dataset)
-    indices = list(range(num_train))
-    split = int(np.floor(valid_size * num_train))
-
-    if shuffle:
-        np.random.seed(random_seed)
-        np.random.shuffle(indices)
-
-    train_idx, valid_idx = indices[split:], indices[:split]
-    train_sampler = torch.utils.data.SubsetRandomSampler(train_idx) if not args.test_only else SubsetSampler(train_idx)
-    valid_sampler = SubsetSampler(valid_idx)
-
-    train_loader = torch.utils.data.DataLoader(
-        train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=train_sampler,
-        num_workers=num_workers, pin_memory=pin_memory,
-    )
-    valid_loader = torch.utils.data.DataLoader(
-        valid_dataset, batch_size=1, sampler=valid_sampler,
-        num_workers=num_workers, pin_memory=pin_memory,
-    )
-
-    imgs = np.asarray(train_dataset.imgs)
-
-    # print('Train')
-    # print(imgs[train_idx])
-    #print('Valid')
-    #print(imgs[valid_idx])
-
-    tgt = [0,0]
-    for _, targets in train_loader: 
-        for target in targets:
-            tgt[target]+=1
-    print("Train targets :", tgt)
-
-    tgt = [0,0]
-    for _, targets in valid_loader:
-        for target in targets:
-            tgt[target]+=1
-    print("Valid targets :", tgt)
-
-    return (train_loader, valid_loader)
-
-def main(args):
-    print(args)
-
-    device = torch.device(args.device)
-
-    torch.backends.cudnn.benchmark = True
-
-
-    #augment = True if not args.test_only else False
-
-    if not args.test_only and args.augment=='flip' : augment = True
-    else : augment = False
-
-    print("Augment", augment)
-    data_loader, data_loader_test = get_train_valid_loader(args=args, pin_memory=True, augment=augment,
-                                                            num_workers=args.workers, valid_size=0.3, random_seed=999)
-
-    print("Creating model")
-    model = torchvision.models.__dict__[args.model](pretrained=True)
-    flat = list(model.children())
-
-    body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(data_loader.dataset.classes))) 
-    model = nn.Sequential(body, head)
-
-    Kldiv=False
-    if not args.test_only and (args.augment=='Rand' or args.augment=='RandKL'):
-        tf_dict = {k: TF.TF_dict[k] for k in tf_names}
-        model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
-
-        if args.augment=='RandKL': Kldiv=True
-
-        model['data_aug']['mag'].data = model['data_aug']['mag'].data * args.magnitude
-        print("Augmodel")
-    
-    # model.fc = nn.Linear(model.fc.in_features, 2)
-    # import ipdb; ipdb.set_trace()
-
-    criterion = nn.CrossEntropyLoss().to(device)
-
-    # optimizer = torch.optim.SGD(
-    #     model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
-
-    optimizer = torch.optim.Adam(
-        model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
-
-    lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
-        optimizer,
-        lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9)
-
-    es = utils.EarlyStopping() if not (args.augment=='Rand' or args.augment=='RandKL') else utils.EarlyStopping(augmented_model=True)
-
-    if args.test_only:
-        model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
-        model = model.to(device)
-        print('TEST')
-        _, missed, _ = evaluate(model, criterion, data_loader_test, device=device)
-        print(missed)
-        print('TRAIN')
-        _, missed, _ = evaluate(model, criterion, data_loader, device=device)
-        print(missed)
-        return
-
-    model = model.to(device)
-
-    print("Start training")
-    start_time = time.time()
-    mb = master_bar(range(args.epochs))
-
-    for epoch in mb:
-        _, train_confmat = train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, mb, Kldiv)
-        lr_scheduler.step( (epoch+1)*len(data_loader) )
-        val_loss, _, valid_confmat = evaluate(model, criterion, data_loader_test, device=device)
-        es(val_loss, model)
-
-        # print('Valid Missed')
-        # print(valid_missed)
-
-        # print('Train')
-        # print(train_confmat)
-        #print('Valid')
-        #print(valid_confmat)
-
-        # if es.early_stop:
-        #     break
-
-    total_time = time.time() - start_time
-    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
-    print('Training time {}'.format(total_time_str))
-
-
-def parse_args():
-    import argparse
-    parser = argparse.ArgumentParser(description='PyTorch Classification Training')
-
-    parser.add_argument('--data-path', default='/github/smart_augmentation/salvador/data', help='dataset')
-    parser.add_argument('--model', default='resnet18', help='model') #'resnet18'
-    parser.add_argument('--device', default='cuda:0', help='device')
-    parser.add_argument('-b', '--batch-size', default=8, type=int)
-    parser.add_argument('--epochs', default=3, type=int, metavar='N',
-                        help='number of total epochs to run')
-    parser.add_argument('-j', '--workers', default=0, type=int, metavar='N',
-                        help='number of data loading workers (default: 16)')
-    parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate')
-    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
-                        help='momentum')
-    parser.add_argument('--wd', '--weight-decay', default=4e-5, type=float,
-                        metavar='W', help='weight decay (default: 1e-4)',
-                        dest='weight_decay')
-
-    parser.add_argument(
-        "--test-only",
-        dest="test_only",
-        help="Only test the model",
-        action="store_true",
-    )
-
-    parser.add_argument('-a', '--augment', default='None', type=str,
-                        metavar='N', help='Data augment',
-                        dest='augment')
-    parser.add_argument('-m', '--magnitude', default=1.0, type=float,
-                        metavar='N', help='Augmentation magnitude',
-                        dest='magnitude')
-
-
-    args = parser.parse_args()
-
-    return args
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)
--- a/Old/salvador/train_dataug.py
+++ b/Old/salvador/train_dataug.py
@ -1,585 +0,0 @@
-import datetime
-import os
-import time
-import sys
-
-import torch
-import torch.utils.data
-from torch import nn
-import torchvision
-from torchvision import transforms
-from PIL import ImageEnhance
-import random
-
-import utils
-from fastprogress import master_bar, progress_bar
-import numpy as np
-
-
-## DATA AUG ##
-import higher
-from dataug import *
-from dataug_utils import *
-tf_names = [
-    ## Geometric TF ##
-    'Identity',
-    'FlipUD',
-    'FlipLR',
-    'Rotate',
-    'TranslateX',
-    'TranslateY',
-    'ShearX',
-    'ShearY',
-
-    ## Color TF (Expect image in the range of [0, 1]) ##
-    'Contrast',
-    'Color',
-    'Brightness',
-    'Sharpness',
-    'Posterize',
-    'Solarize', #=>Image entre [0,1] #Pas opti pour des batch
-]
-
-def compute_vaLoss(model, dl_it, dl):
-    device = next(model.parameters()).device
-    try:
-        xs, ys = next(dl_it)
-    except StopIteration: #Fin epoch val
-        dl_it = iter(dl)
-        xs, ys = next(dl_it)
-    xs, ys = xs.to(device), ys.to(device)
-
-    model.eval() #Validation sans transfornations !
-
-    return F.cross_entropy(model(xs), ys)
-
-def model_copy(src,dst, patch_copy=True, copy_grad=True):
-    #model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats)
-
-    dst.load_state_dict(src.state_dict()) #Do not copy gradient ! 
-
-    if patch_copy:
-        dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ?
-        dst['data_aug'].load_state_dict(src['data_aug'].state_dict())
-
-    #Copie des gradients
-    if copy_grad:
-        for paramName, paramValue, in src.named_parameters():
-          for netCopyName, netCopyValue, in dst.named_parameters():
-            if paramName == netCopyName:
-              netCopyValue.grad = paramValue.grad
-              #netCopyValue=copy.deepcopy(paramValue)
-
-    try: #Data_augV4
-        dst['data_aug']._input_info = src['data_aug']._input_info 
-        dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix
-    except:
-        pass
-
-def optim_copy(dopt, opt):
-
-    #inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state...
-    #opt_param=higher.optim.get_trainable_opt_params(diffopt)
-
-    for group_idx, group in enumerate(opt.param_groups):
-       # print('gp idx',group_idx)
-        for p_idx, p in enumerate(group['params']):
-            opt.state[p]=dopt.state[group_idx][p_idx]
-
-
-#############
-
-class Lambda(nn.Module):
-    "Create a layer that simply calls `func` with `x`"
-    def __init__(self, func): 
-        super().__init__()
-        self.func=func
-    def forward(self, x): return self.func(x)
-
-class SubsetSampler(torch.utils.data.SubsetRandomSampler):
-    def __init__(self, indices):
-        super().__init__(indices)
-
-    def __iter__(self):
-        return (self.indices[i] for i in range(len(self.indices)))
-
-    def __len__(self):
-        return len(self.indices)
-
-def sharpness(img, factor):
-    sharpness_factor = random.uniform(1, factor)
-    sharp = ImageEnhance.Sharpness(img)
-    sharped = sharp.enhance(sharpness_factor)
-    return sharped
-
-def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, master_bar):
-    model.train()
-    metric_logger = utils.MetricLogger(delimiter=" ")
-    confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
-    header = 'Epoch: {}'.format(epoch)
-    for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=master_bar):
-
-        image, target = image.to(device), target.to(device)
-        output = model(image)
-        loss = criterion(output, target)
-
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-
-        acc1 = utils.accuracy(output, target)[0]
-        batch_size = image.shape[0]
-        metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
-        metric_logger.update(loss=loss.item())
-
-        confmat.update(target.flatten(), output.argmax(1).flatten())
-
-
-    return metric_logger.loss.global_avg, confmat
-
-
-def evaluate(model, criterion, data_loader, device):
-    model.eval()
-    metric_logger = utils.MetricLogger(delimiter="  ")
-    confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
-    header = 'Test:'
-    missed = []
-    with torch.no_grad():
-        for i, (image, target) in metric_logger.log_every(data_loader, leave=False, header=header, parent=None):
-            image, target = image.to(device), target.to(device)
-            output = model(image)
-            loss = criterion(output, target)
-            if target.item() != output.topk(1)[1].item():
-                missed.append(data_loader.dataset.imgs[data_loader.sampler.indices[i]])
-
-            confmat.update(target.flatten(), output.argmax(1).flatten())
-
-            acc1 = utils.accuracy(output, target)[0]
-            batch_size = image.shape[0]
-            metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
-            metric_logger.update(loss=loss.item())
-
-
-    return metric_logger.loss.global_avg, missed, confmat
-
-def get_train_valid_loader(args, augment, random_seed, train_size=0.5, test_size=0.1, shuffle=True, num_workers=4, pin_memory=True):
-    """
-    Utility function for loading and returning train and valid
-    multi-process iterators over the CIFAR-10 dataset. A sample
-    9x9 grid of the images can be optionally displayed.
-    If using CUDA, num_workers should be set to 1 and pin_memory to True.
-    Params
-    ------
-    - data_dir: path directory to the dataset.
-    - batch_size: how many samples per batch to load.
-    - augment: whether to apply the data augmentation scheme
-      mentioned in the paper. Only applied on the train split.
-    - random_seed: fix seed for reproducibility.
-    - valid_size: percentage split of the training set used for
-      the validation set. Should be a float in the range [0, 1].
-    - shuffle: whether to shuffle the train/validation indices.
-    - show_sample: plot 9x9 sample grid of the dataset.
-    - num_workers: number of subprocesses to use when loading the dataset.
-    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
-      True if using GPU.
-    Returns
-    -------
-    - train_loader: training set iterator.
-    - valid_loader: validation set iterator.
-    """
-    error_msg = "[!] test_size should be in the range [0, 1]."
-    assert ((test_size >= 0) and (test_size <= 1)), error_msg
-
-    # normalize = transforms.Normalize(
-    #     mean=[0.4914, 0.4822, 0.4465],
-    #     std=[0.2023, 0.1994, 0.2010],
-    # )
-
-    # define transforms
-    if augment:
-        train_transform = transforms.Compose([
-            # transforms.ColorJitter(brightness=0.3),
-            # transforms.Lambda(lambda img: sharpness(img, 5)),
-            transforms.RandomHorizontalFlip(),
-            transforms.ToTensor(),
-            # normalize,
-        ])
-
-        valid_transform = transforms.Compose([
-                # transforms.ColorJitter(brightness=0.3),
-                # transforms.RandomHorizontalFlip(),
-                transforms.ToTensor(),
-                # normalize,
-        ])
-    else:
-        train_transform = transforms.Compose([
-            transforms.ToTensor(),
-            # normalize,
-        ])
-
-        valid_transform = transforms.Compose([
-            transforms.ToTensor(),
-            # normalize,
-        ])
-
-
-    # load the dataset
-    train_dataset = torchvision.datasets.ImageFolder(
-        root=args.data_path, transform=train_transform
-    )
-
-    test_dataset = torchvision.datasets.ImageFolder(
-        root=args.data_path, transform=valid_transform
-    )
-
-    num_train = len(train_dataset)
-    indices = list(range(num_train))
-    split = int(np.floor(test_size * num_train))
-
-    if shuffle:
-        np.random.seed(random_seed)
-        np.random.shuffle(indices)
-
-    train_idx, test_idx = indices[split:], indices[:split]
-    train_idx, valid_idx = train_idx[:int(len(train_idx)*train_size)], train_idx[int(len(train_idx)*train_size):]
-    print("\nTrain", len(train_idx), "\nValid", len(valid_idx), "\nTest", len(test_idx))
-    train_sampler = torch.utils.data.SubsetRandomSampler(train_idx) if not args.test_only else SubsetSampler(train_idx)
-    valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx) if not args.test_only else SubsetSampler(valid_idx)
-    test_sampler = SubsetSampler(test_idx)
-
-    train_loader = torch.utils.data.DataLoader(
-        train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=train_sampler,
-        num_workers=num_workers, pin_memory=pin_memory,
-    )
-    valid_loader = torch.utils.data.DataLoader(
-        train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=valid_sampler,
-        num_workers=num_workers, pin_memory=pin_memory,
-    )
-    test_loader = torch.utils.data.DataLoader(
-        test_dataset, batch_size=1, sampler=test_sampler,
-        num_workers=num_workers, pin_memory=pin_memory,
-    )
-
-    imgs = np.asarray(train_dataset.imgs)
-
-    # print('Train')
-    # print(imgs[train_idx])
-    #print('Valid')
-    #print(imgs[valid_idx])
-
-    return (train_loader, valid_loader, test_loader)
-
-def main(args):
-    print(args)
-
-    device = torch.device(args.device)
-
-    torch.backends.cudnn.benchmark = True
-
-    #augment = True if not args.test_only else False
-    augment = False
-
-    data_loader, dl_val, data_loader_test = get_train_valid_loader(args=args, pin_memory=True, augment=augment,
-                                                            num_workers=args.workers, train_size=0.99, test_size=0.2, random_seed=999)
-
-    print("Creating model")
-    model = torchvision.models.__dict__[args.model](pretrained=True)
-    flat = list(model.children())
-
-    body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(data_loader.dataset.classes))) 
-    model = nn.Sequential(body, head)
-
-    # model.fc = nn.Linear(model.fc.in_features, 2)
-    # import ipdb; ipdb.set_trace()
-
-    criterion = nn.CrossEntropyLoss().to(device)
-
-    # optimizer = torch.optim.SGD(
-    #     model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
-    '''
-    optimizer = torch.optim.Adam(
-        model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
-
-    lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
-        optimizer,
-        lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9)
-    '''
-    es = utils.EarlyStopping()
-    
-    if args.test_only:
-        model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
-        model = model.to(device)
-        print('TEST')
-        _, missed, _ = evaluate(model, criterion, data_loader_test, device=device)
-        print(missed)
-        print('TRAIN')
-        _, missed, _ = evaluate(model, criterion, data_loader, device=device)
-        print(missed)
-        return
-
-    model = model.to(device)
-
-    print("Start training")
-    start_time = time.time()
-    mb = master_bar(range(args.epochs))
-    """
-    for epoch in mb:
-        _, train_confmat = train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, mb)
-        lr_scheduler.step( (epoch+1)*len(data_loader) )
-        val_loss, _, valid_confmat = evaluate(model, criterion, data_loader_test, device=device)
-        es(val_loss, model)
-
-        # print('Valid Missed')
-        # print(valid_missed)
-
-
-        # print('Train')
-        # print(train_confmat)
-        print('Valid')
-        print(valid_confmat)
-
-        # if es.early_stop:
-        #     break
-
-    total_time = time.time() - start_time
-    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
-    print('Training time {}'.format(total_time_str))
-
-    """
-
-    #######
-
-    inner_it = args.inner_it
-    dataug_epoch_start=0
-    print_freq=1
-    KLdiv=False
-
-    tf_dict = {k: TF.TF_dict[k] for k in tf_names}
-    model = Augmented_model(Data_augV5(TF_dict=tf_dict, N_TF=3, mix_dist=0.0, fixed_prob=False, fixed_mag=False, shared_mag=False), model).to(device)
-    #model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
-
-    val_loss=torch.tensor(0) #Necessaire si pas de metastep sur une epoch
-    dl_val_it = iter(dl_val)
-    countcopy=0
-
-    #if inner_it!=0: 
-    meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=args.lr) #lr=1e-2
-    #inner_opt = torch.optim.SGD(model['model'].parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #lr=1e-2 / momentum=0.9
-    inner_opt = torch.optim.Adam(model['model'].parameters(), lr=args.lr, weight_decay=args.weight_decay)
-
-    lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
-        inner_opt,
-        lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9)
-
-    high_grad_track = True
-    if inner_it == 0:
-        high_grad_track=False
-
-    model.train()
-    model.augment(mode=False)
-    
-    fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
-    diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel,track_higher_grads=high_grad_track)
-
-    i=0
-
-    for epoch in mb:
-
-        metric_logger = utils.MetricLogger(delimiter=" ")
-        confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
-        header = 'Epoch: {}'.format(epoch)
-
-        t0 = time.process_time()
-        for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=mb):
-        #for i, (xs, ys) in enumerate(dl_train):
-            #print_torch_mem("it"+str(i))
-            i+=1
-            image, target = image.to(device), target.to(device)
-
-            if(not KLdiv):
-            #Methode uniforme
-                logits = fmodel(image)  # modified `params` can also be passed as a kwarg
-                output = F.log_softmax(logits, dim=1)
-                loss = F.cross_entropy(output, target, reduction='none')  # no need to call loss.backwards()
-
-                if fmodel._data_augmentation: #Weight loss
-                    w_loss = fmodel['data_aug'].loss_weight()#.to(device)
-                    loss = loss * w_loss
-                loss = loss.mean()
-            
-            else:
-            #Methode KL div
-                fmodel.augment(mode=False)
-                sup_logits = fmodel(xs)
-                log_sup=F.log_softmax(sup_logits, dim=1)
-                fmodel.augment(mode=True)
-                loss = F.cross_entropy(log_sup, ys)
-
-                if fmodel._data_augmentation:
-                    aug_logits = fmodel(xs)
-                    log_aug=F.log_softmax(aug_logits, dim=1)
-                    aug_loss=0
-                    if epoch>50: #debut differe ?
-                        #KL div w/ logits - Similarite predictions (distributions)
-                        aug_loss = F.softmax(sup_logits, dim=1)*(log_sup-log_aug)
-                        aug_loss=aug_loss.sum(dim=-1)
-                        #aug_loss = F.kl_div(aug_logits, sup_logits, reduction='none')
-                        w_loss = fmodel['data_aug'].loss_weight() #Weight loss
-                        aug_loss = (w_loss * aug_loss).mean()
-
-                    aug_loss += (F.cross_entropy(log_aug, ys , reduction='none') * w_loss).mean()
-                    #print(aug_loss)
-                    unsupp_coeff = 1
-                    loss += aug_loss * unsupp_coeff
-
-            diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
-
-            if(high_grad_track and i%inner_it==0): #Perform Meta step
-                #print("meta")
-                #Peu utile si high_grad_track = False
-                val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val) + fmodel['data_aug'].reg_loss()
-                #print_graph(val_loss)
-
-                val_loss.backward()
-
-                countcopy+=1
-                model_copy(src=fmodel, dst=model)
-                optim_copy(dopt=diffopt, opt=inner_opt)
-
-                #if epoch>50:
-                meta_opt.step()
-                model['data_aug'].adjust_param(soft=False) #Contrainte sum(proba)=1
-                #model['data_aug'].next_TF_set()
-
-                fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
-                diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
-
-
-            acc1 = utils.accuracy(output, target)[0]
-            batch_size = image.shape[0]
-            metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
-            metric_logger.update(loss=loss.item())
-
-            confmat.update(target.flatten(), output.argmax(1).flatten())
-
-            if(not high_grad_track and (torch.cuda.memory_cached()/1024.0**2)>20000): 
-                countcopy+=1
-                print_torch_mem("copy")
-                model_copy(src=fmodel, dst=model)
-                optim_copy(dopt=diffopt, opt=inner_opt)
-                val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val)
-
-                #Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False)
-                fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
-                diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
-                print_torch_mem("copy")
-
-        if(not high_grad_track): 
-                countcopy+=1
-                print_torch_mem("end copy")
-                model_copy(src=fmodel, dst=model)
-                optim_copy(dopt=diffopt, opt=inner_opt)
-                val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val)
-
-                #Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False)
-                fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
-                diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
-                print_torch_mem("end copy")
-
-
-        tf = time.process_time()
-
-
-        #### Print ####
-        if(print_freq and epoch%print_freq==0):
-            print('-'*9)
-            print('Epoch : %d'%(epoch))
-            print('Time : %.00f'%(tf - t0))
-            print('Train loss :',loss.item(), '/ val loss', val_loss.item())
-            print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
-            print('TF Proba :', model['data_aug']['prob'].data)
-            #print('proba grad',model['data_aug']['prob'].grad)
-            print('TF Mag :', model['data_aug']['mag'].data)
-            #print('Mag grad',model['data_aug']['mag'].grad)
-            #print('Reg loss:', model['data_aug'].reg_loss().item())
-            #print('Aug loss', aug_loss.item())
-        #############
-        #### Log ####
-        #print(type(model['data_aug']) is dataug.Data_augV5)
-        '''
-        param = [{'p': p.item(), 'm':model['data_aug']['mag'].item()} for p in model['data_aug']['prob']] if model['data_aug']._shared_mag else [{'p': p.item(), 'm': m.item()} for p, m in zip(model['data_aug']['prob'], model['data_aug']['mag'])]
-        data={
-            "epoch": epoch,
-            "train_loss": loss.item(),
-            "val_loss": val_loss.item(),
-            "acc": accuracy,
-            "time": tf - t0,
-
-            "param": param #if isinstance(model['data_aug'], Data_augV5) 
-            #else [p.item() for p in model['data_aug']['prob']],
-        }
-        log.append(data)
-        '''
-        #############
-
-        train_confmat=confmat
-        lr_scheduler.step( (epoch+1)*len(data_loader) )
-
-        test_loss, _, test_confmat = evaluate(model, criterion, data_loader_test, device=device)
-        es(test_loss, model)
-
-        # print('Valid Missed')
-        # print(valid_missed)
-
-
-        # print('Train')
-        # print(train_confmat)
-        print('Test')
-        print(test_confmat)
-
-        # if es.early_stop:
-        #     break
-
-    total_time = time.time() - start_time
-    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
-    print('Training time {}'.format(total_time_str))
-       
-
-def parse_args():
-    import argparse
-    parser = argparse.ArgumentParser(description='PyTorch Classification Training')
-
-    parser.add_argument('--data-path', default='/github/smart_augmentation/salvador/data', help='dataset')
-    parser.add_argument('--model', default='resnet18', help='model') #'resnet18'
-    parser.add_argument('--device', default='cuda:0', help='device')
-    parser.add_argument('-b', '--batch-size', default=8, type=int)
-    parser.add_argument('--epochs', default=3, type=int, metavar='N',
-                        help='number of total epochs to run')
-    parser.add_argument('-j', '--workers', default=0, type=int, metavar='N',
-                        help='number of data loading workers (default: 16)')
-    parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate')
-    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
-                        help='momentum')
-    parser.add_argument('--wd', '--weight-decay', default=4e-5, type=float,
-                        metavar='W', help='weight decay (default: 1e-4)',
-                        dest='weight_decay')
-
-    parser.add_argument(
-        "--test-only",
-        dest="test_only",
-        help="Only test the model",
-        action="store_true",
-    )
-
-    parser.add_argument('--in_it', '--inner_it', default=0, type=int,
-                        metavar='N', help='higher inner_it',
-                        dest='inner_it')
-
-    args = parser.parse_args()
-
-    return args
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)
--- a/Old/salvador/transformations.py
+++ b/Old/salvador/transformations.py
@ -1,346 +0,0 @@
-import torch
-import kornia
-import random
-
-### Available TF for Dataug ###
-'''
-TF_dict={ #Dataugv4
-  ## Geometric TF ##
-  'Identity' : (lambda x, mag: x),
-  'FlipUD' : (lambda x, mag: flipUD(x)),
-  'FlipLR' : (lambda x, mag: flipLR(x)),
-  'Rotate': (lambda x, mag: rotate(x, angle=torch.tensor([rand_int(mag, maxval=30)for _ in x], device=x.device))),
-  'TranslateX': (lambda x, mag: translate(x, translation=torch.tensor([[rand_int(mag, maxval=20), 0] for _ in x], device=x.device))),
-  'TranslateY': (lambda x, mag: translate(x, translation=torch.tensor([[0, rand_int(mag, maxval=20)] for _ in x], device=x.device))),
-  'ShearX': (lambda x, mag: shear(x, shear=torch.tensor([[rand_float(mag, maxval=0.3), 0] for _ in x], device=x.device))),
-  'ShearY': (lambda x, mag: shear(x, shear=torch.tensor([[0, rand_float(mag, maxval=0.3)] for _ in x], device=x.device))),
-
-  ## Color TF (Expect image in the range of [0, 1]) ##
-  'Contrast': (lambda x, mag: contrast(x, contrast_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
-  'Color':(lambda x, mag: color(x, color_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
-  'Brightness':(lambda x, mag: brightness(x, brightness_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
-  'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
-  'Posterize': (lambda x, mag: posterize(x, bits=torch.tensor([rand_int(mag, minval=4, maxval=8) for _ in x], device=x.device))),
-  'Solarize': (lambda x, mag: solarize(x, thresholds=torch.tensor([rand_int(mag,minval=1, maxval=256)/256. for _ in x], device=x.device))) , #=>Image entre [0,1] #Pas opti pour des batch
-
-  #Non fonctionnel
-  #'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
-  #'Equalize': (lambda mag: None),
-}
-'''
-'''
-TF_dict={ #Dataugv5 #AutoAugment
-  ## Geometric TF ##
-  'Identity' : (lambda x, mag: x),
-  'FlipUD' : (lambda x, mag: flipUD(x)),
-  'FlipLR' : (lambda x, mag: flipLR(x)),
-  'Rotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30))),
-  'TranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=0))),
-  'TranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=1))),
-  'ShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=0))),
-  'ShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=1))),
-
-  ## Color TF (Expect image in the range of [0, 1]) ##
-  'Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
-  'Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
-  'Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
-  'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
-  'Posterize': (lambda x, mag: posterize(x, bits=rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
-  'Solarize': (lambda x, mag: solarize(x, thresholds=rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
-  
-  #Non fonctionnel
-  #'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
-  #'Equalize': (lambda mag: None),
-}
-'''
-TF_dict={ #Dataugv5
-  ## Geometric TF ##
-  'Identity' : (lambda x, mag: x),
-  'FlipUD' : (lambda x, mag: flipUD(x)),
-  'FlipLR' : (lambda x, mag: flipLR(x)),
-  'Rotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30))),
-  'TranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=0))),
-  'TranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=1))),
-  'ShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=0))),
-  'ShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=1))),
-
-  ## Color TF (Expect image in the range of [0, 1]) ##
-  'Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
-  'Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
-  'Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
-  'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
-  'Posterize': (lambda x, mag: posterize(x, bits=rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
-  'Solarize': (lambda x, mag: solarize(x, thresholds=rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
-
-  #Color TF (Common mag scale)
-  '+Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
-  '+Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
-  '+Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
-  '+Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
-  '-Contrast': (lambda x, mag: contrast(x, contrast_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
-  '-Color':(lambda x, mag: color(x, color_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
-  '-Brightness':(lambda x, mag: brightness(x, brightness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
-  '-Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
-  '=Posterize': (lambda x, mag: posterize(x, bits=invScale_rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
-  '=Solarize': (lambda x, mag: solarize(x, thresholds=invScale_rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
-  
-
-  'BRotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30*3))),
-  'BTranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20*3), zero_pos=0))),
-  'BTranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20*3), zero_pos=1))),
-  'BShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3*3), zero_pos=0))),
-  'BShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3*3), zero_pos=1))),
-
-  'BadTranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=20*2, maxval=20*3), zero_pos=0))),
-  'BadTranslateX_neg': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-20*3, maxval=-20*2), zero_pos=0))),
-  'BadTranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=20*2, maxval=20*3), zero_pos=1))),
-  'BadTranslateY_neg': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-20*3, maxval=-20*2), zero_pos=1))),
-  
-  'BadColor':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
-  'BadSharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
-  'BadContrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
-  'BadBrightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
-  
-  #Non fonctionnel
-  #'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
-  #'Equalize': (lambda mag: None),
-}
-
-TF_no_mag={'Identity', 'FlipUD', 'FlipLR'}
-TF_ignore_mag= TF_no_mag | {'Solarize', 'Posterize'}
-
-def int_image(float_image): #ATTENTION : legere perte d'info (granularite : 1/256 = 0.0039)
-  return (float_image*255.).type(torch.uint8)
-
-def float_image(int_image):
-  return int_image.type(torch.float)/255.
-
-#def rand_inverse(value):
-#    return  value if random.random() < 0.5 else -value
-
-#def rand_int(mag, maxval, minval=None): #[(-maxval,minval), maxval]
-#  real_max = int_parameter(mag, maxval=maxval)
-#  if not minval : minval = -real_max
-#  return random.randint(minval, real_max)
-
-#def rand_float(mag, maxval, minval=None): #[(-maxval,minval), maxval]
-#  real_max = float_parameter(mag, maxval=maxval)
-#  if not minval : minval = -real_max
-#  return random.uniform(minval, real_max)
-
-def rand_floats(size, mag, maxval, minval=None): #[(-maxval,minval), maxval]
-  real_mag = float_parameter(mag, maxval=maxval)
-  if not minval : minval = -real_mag
-  #return random.uniform(minval, real_max)
-  return minval + (real_mag-minval) * torch.rand(size, device=mag.device) #[min_val, real_mag]
-
-def invScale_rand_floats(size, mag, maxval, minval):
-  #Mag=[0,PARAMETER_MAX] => [PARAMETER_MAX, 0] = [maxval, minval]
-  real_mag = float_parameter(float(PARAMETER_MAX) - mag, maxval=maxval-minval)+minval 
-  return real_mag + (maxval-real_mag) * torch.rand(size, device=mag.device) #[real_mag, max_val]
-
-def zero_stack(tensor, zero_pos):
-  if zero_pos==0:
-    return torch.stack((tensor, torch.zeros((tensor.shape[0],), device=tensor.device)), dim=1)
-  if zero_pos==1:
-    return torch.stack((torch.zeros((tensor.shape[0],), device=tensor.device), tensor), dim=1)
-  else:
-    raise Exception("Invalid zero_pos : ", zero_pos) 
-    
-#https://github.com/tensorflow/models/blob/fc2056bce6ab17eabdc139061fef8f4f2ee763ec/research/autoaugment/augmentation_transforms.py#L137
-PARAMETER_MAX = 1  # What is the max 'level' a transform could be predicted
-def float_parameter(level, maxval):
-  """Helper function to scale `val` between 0 and maxval .
-  Args:
-    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
-    maxval: Maximum value that the operation can have. This will be scaled
-      to level/PARAMETER_MAX.
-  Returns:
-    A float that results from scaling `maxval` according to `level`.
-  """
-
-  #return float(level) * maxval / PARAMETER_MAX
-  return (level * maxval / PARAMETER_MAX)#.to(torch.float)
-
-#def int_parameter(level, maxval): #Perte de gradient
-  """Helper function to scale `val` between 0 and maxval .
-  Args:
-    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
-    maxval: Maximum value that the operation can have. This will be scaled
-      to level/PARAMETER_MAX.
-  Returns:
-    An int that results from scaling `maxval` according to `level`.
-  """
-  #return int(level * maxval / PARAMETER_MAX)
-#  return (level * maxval / PARAMETER_MAX) 
-
-def flipLR(x):
-    device = x.device
-    (batch_size, channels, h, w) = x.shape
-
-    M =torch.tensor( [[[-1.,  0., w-1],
-                        [ 0.,  1.,  0.],
-                        [ 0.,  0.,  1.]]], device=device).expand(batch_size,-1,-1)
-
-    # warp the original image by the found transform
-    return kornia.warp_perspective(x, M, dsize=(h, w))
-
-def flipUD(x):
-    device = x.device
-    (batch_size, channels, h, w) = x.shape
-
-    M =torch.tensor( [[[ 1.,  0.,  0.],
-                        [ 0., -1.,  h-1],
-                        [ 0.,  0.,  1.]]], device=device).expand(batch_size,-1,-1)
-
-    # warp the original image by the found transform
-    return kornia.warp_perspective(x, M, dsize=(h, w))
-
-def rotate(x, angle):
-  return kornia.rotate(x, angle=angle.type(torch.float)) #Kornia ne supporte pas les int
-
-def translate(x, translation):
-  #print(translation)
-  return kornia.translate(x, translation=translation.type(torch.float)) #Kornia ne supporte pas les int
-
-def shear(x, shear):
-  return kornia.shear(x, shear=shear)
-
-def contrast(x, contrast_factor):
-  return kornia.adjust_contrast(x, contrast_factor=contrast_factor) #Expect image in the range of [0, 1]
-
-#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageEnhance.py
-def color(x, color_factor):
-    (batch_size, channels, h, w) = x.shape
-
-    gray_x = kornia.rgb_to_grayscale(x)
-    gray_x = gray_x.repeat_interleave(channels, dim=1)
-    return blend(gray_x, x, color_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
-
-def brightness(x, brightness_factor):
-    device = x.device
-
-    return blend(torch.zeros(x.size(), device=device), x, brightness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
-
-def sharpeness(x, sharpness_factor):
-    device = x.device
-    (batch_size, channels, h, w) = x.shape
-
-    k = torch.tensor([[[ 1.,  1.,  1.],
-                       [ 1.,  5.,  1.],
-                       [ 1.,  1.,  1.]]], device=device) #Smooth Filter : https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageFilter.py
-    smooth_x = kornia.filter2D(x, kernel=k, border_type='reflect', normalized=True) #Peut etre necessaire de s'occuper du channel Alhpa differement
-
-    return blend(smooth_x, x, sharpness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
-
-#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py
-def posterize(x, bits):
-  bits = bits.type(torch.uint8) #Perte du gradient
-  x = int_image(x) #Expect image in the range of [0, 1]
-
-  mask = ~(2 ** (8 - bits) - 1).type(torch.uint8)
-
-  (batch_size, channels, h, w) = x.shape
-  mask = mask.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
-
-  return float_image(x & mask)
-
-def auto_contrast(x): #PAS OPTIMISE POUR DES BATCH #EXTRA LENT
-  # Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
-  print("Warning : Pas encore check !")
-  (batch_size, channels, h, w) = x.shape
-  x = int_image(x) #Expect image in the range of [0, 1]
-  #print('Start',x[0])
-  for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
-    #print(img.shape)
-    for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
-      #print(chan.shape)
-      hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
-
-      # find lowest/highest samples after preprocessing
-      for lo in range(256):
-          if hist[lo]:
-              break
-      for hi in range(255, -1, -1):
-          if hist[hi]:
-              break
-      if hi <= lo:
-          # don't bother
-          pass
-      else:
-        scale = 255.0 / (hi - lo)
-        offset = -lo * scale
-        for ix in range(256):
-          n_ix = int(ix * scale + offset)
-          if n_ix < 0: n_ix = 0
-          elif n_ix > 255: n_ix = 255
-
-          chan[chan==ix]=n_ix
-          x[im_idx, chan_idx]=chan
-
-  #print('End',x[0])
-  return float_image(x)
-
-def equalize(x): #PAS OPTIMISE POUR DES BATCH
-  raise Exception(self, "not implemented") 
-  # Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
-  (batch_size, channels, h, w) = x.shape
-  x = int_image(x) #Expect image in the range of [0, 1]
-  #print('Start',x[0])
-  for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
-    #print(img.shape)
-    for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
-      #print(chan.shape)
-      hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
-
-  return float_image(x)
-
-def solarize(x, thresholds):
-  batch_size, channels, h, w = x.shape
-  #imgs=[]
-  #for idx, t in enumerate(thresholds): #Operation par image
-  #  mask = x[idx] > t #Perte du gradient
-    #In place
-  #  inv_x = 1-x[idx][mask]
-  #  x[idx][mask]=inv_x
-    #
-
-  #Out of place
-  #  im = x[idx]
-  #  inv_x = 1-im[mask]
-
-  #  imgs.append(im.masked_scatter(mask,inv_x))
-
-  #idxs=torch.tensor(range(x.shape[0]), device=x.device)
-  #idxs=idxs.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
-  #x=x.scatter(dim=0, index=idxs, src=torch.stack(imgs))
-  #
-
-  thresholds = thresholds.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
-  #print(thresholds.grad_fn)
-  x=torch.where(x>thresholds,1-x, x)
-  #print(mask.grad_fn)
-
-  #x=x.min(thresholds)
-  #inv_x = 1-x[mask]
-  #x=x.where(x<thresholds,1-x)
-  #x[mask]=inv_x
-  #x=x.masked_scatter(mask, inv_x)
-
-  return x
-
-#https://github.com/python-pillow/Pillow/blob/9c78c3f97291bd681bc8637922d6a2fa9415916c/src/PIL/Image.py#L2818
-def blend(x,y,alpha): #out = image1 * (1.0 - alpha) + image2 * alpha
-    #return kornia.add_weighted(src1=x, alpha=(1-alpha), src2=y, beta=alpha, gamma=0) #out=src1∗alpha+src2∗beta+gamma #Ne fonctionne pas pour des batch de alpha
-
-    if not isinstance(x, torch.Tensor):
-        raise TypeError("x should be a tensor. Got {}".format(type(x)))
-
-    if not isinstance(y, torch.Tensor):
-        raise TypeError("y should be a tensor. Got {}".format(type(y)))
-
-    (batch_size, channels, h, w) = x.shape
-    alpha = alpha.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
-    res = x*(1-alpha) + y*alpha
-
-    return res
--- a/Old/salvador/utils.py
+++ b/Old/salvador/utils.py
@ -1,202 +0,0 @@
-from __future__ import print_function
-from collections import defaultdict, deque
-import datetime
-import math
-import time
-import torch
-import numpy as np
-
-import os
-from fastprogress import progress_bar
-
-class SmoothedValue(object):
-    """Track a series of values and provide access to smoothed values over a
-    window or the global series average.
-    """
-
-    def __init__(self, window_size=20, fmt=None):
-        if fmt is None:
-            fmt = "{global_avg:.4f}"
-        self.deque = deque(maxlen=window_size)
-        self.total = 0.0
-        self.count = 0
-        self.fmt = fmt
-
-    def update(self, value, n=1):
-        self.deque.append(value)
-        self.count += n
-        self.total += value * n
-
-    @property
-    def median(self):
-        d = torch.tensor(list(self.deque))
-        return d.median().item()
-
-    @property
-    def avg(self):
-        d = torch.tensor(list(self.deque), dtype=torch.float32)
-        return d.mean().item()
-
-    @property
-    def global_avg(self):
-        return self.total / self.count
-
-    @property
-    def max(self):
-        return max(self.deque)
-
-    @property
-    def value(self):
-        return self.deque[-1]
-
-    def __str__(self):
-        return self.fmt.format(
-            median=self.median,
-            avg=self.avg,
-            global_avg=self.global_avg,
-            max=self.max,
-            value=self.value)
-
-
-class ConfusionMatrix(object):
-    def __init__(self, num_classes):
-        self.num_classes = num_classes
-        self.mat = None
-
-    def update(self, a, b):
-        n = self.num_classes
-        if self.mat is None:
-            self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)
-        with torch.no_grad():
-            k = (a >= 0) & (a < n)
-            inds = n * a[k].to(torch.int64) + b[k]
-            self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)
-
-    def reset(self):
-        self.mat.zero_()
-
-    def compute(self):
-        h = self.mat.float()
-        acc_global = torch.diag(h).sum() / h.sum()
-        acc = torch.diag(h) / h.sum(1)
-        return acc_global, acc
-
-
-    def __str__(self):
-        acc_global, acc = self.compute()
-        return (
-            'global correct: {:.1f}\n'
-            'average row correct: {}').format(
-                acc_global.item() * 100,
-                ['{:.1f}'.format(i) for i in (acc * 100).tolist()])
-
-
-class MetricLogger(object):
-    def __init__(self, delimiter="\t"):
-        self.meters = defaultdict(SmoothedValue)
-        self.delimiter = delimiter
-
-    def update(self, **kwargs):
-        for k, v in kwargs.items():
-            if isinstance(v, torch.Tensor):
-                v = v.item()
-            assert isinstance(v, (float, int))
-            self.meters[k].update(v)
-
-    def __getattr__(self, attr):
-        if attr in self.meters:
-            return self.meters[attr]
-        if attr in self.__dict__:
-            return self.__dict__[attr]
-        raise AttributeError("'{}' object has no attribute '{}'".format(
-            type(self).__name__, attr))
-
-    def __str__(self):
-        loss_str = []
-        for name, meter in self.meters.items():
-            loss_str.append(
-                "{}: {}".format(name, str(meter))
-            )
-        return self.delimiter.join(loss_str)
-
-
-    def add_meter(self, name, meter):
-        self.meters[name] = meter
-
-    def log_every(self, iterable, parent, header=None, **kwargs):
-        if not header:
-            header = ''
-        log_msg = self.delimiter.join([
-            '{meters}'
-        ])
-
-        progrss = progress_bar(iterable, parent=parent, **kwargs)
-
-        for idx, obj in enumerate(progrss):
-            yield idx, obj
-            progrss.comment = log_msg.format(
-                    meters=str(self))
-
-        print('{header} {meters}'.format(header=header, meters=str(self)))
-
-def accuracy(output, target, topk=(1,)):
-    """Computes the accuracy over the k top predictions for the specified values of k"""
-    with torch.no_grad():
-        maxk = max(topk)
-        batch_size = target.size(0)
-
-        _, pred = output.topk(maxk, 1, True, True)
-        pred = pred.t()
-        correct = pred.eq(target[None])
-
-        res = []
-        for k in topk:
-            correct_k = correct[:k].flatten().sum(dtype=torch.float32)
-            res.append(correct_k * (100.0 / batch_size))
-        return res
-
-class EarlyStopping:
-    """Early stops the training if validation loss doesn't improve after a given patience."""
-    def __init__(self, patience=7, verbose=False, delta=0, augmented_model=False):
-        """
-        Args:
-            patience (int): How long to wait after last time validation loss improved.
-                            Default: 7
-            verbose (bool): If True, prints a message for each validation loss improvement. 
-                            Default: False
-            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
-                            Default: 0
-        """
-        self.patience = patience
-        self.verbose = verbose
-        self.counter = 0
-        self.best_score = None
-        self.early_stop = False
-        self.val_loss_min = np.Inf
-        self.delta = delta
-
-        self.augmented_model = augmented_model
-
-    def __call__(self, val_loss, model):
-
-        score = -val_loss
-
-        if self.best_score is None:
-            self.best_score = score
-            self.save_checkpoint(val_loss, model)
-        elif score < self.best_score - self.delta:
-            self.counter += 1
-            # print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
-            # if self.counter >= self.patience:
-            #     self.early_stop = True
-        else:
-            self.best_score = score
-            self.save_checkpoint(val_loss, model)
-            self.counter = 0
-
-    def save_checkpoint(self, val_loss, model):
-        '''Saves model when validation loss decrease.'''
-        if self.verbose:
-            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
-        torch.save(model.state_dict(), 'checkpoint.pt') if not self.augmented_model else torch.save(model['model'].state_dict(), 'checkpoint.pt')
-        self.val_loss_min = val_loss