Initial Commit

This commit is contained in:
Harle, Antoine (Contracteur) 2019-11-08 11:28:06 -05:00
commit 3ae3e02e59
44 changed files with 4908 additions and 0 deletions

46
.gitignore vendored Normal file
View file

@ -0,0 +1,46 @@
/higher/data
/Gradient-Descent-The-Ultimate-Optimizer/data
/FAR-HO/data
/__pycache__
*.pyo
*.pyc
*~
# Compiled source #
###################
*.com
*.class
*.dll
*.exe
*.o
*.so
# Packages #
############
# it's better to unpack these files and commit the raw source
# git has its own built in compression methods
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip
# Logs and databases #
######################
*.log
*.sql
*.sqlite
# OS generated files #
######################
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

456
FAR-HO/augmentation_transforms.py Executable file
View file

@ -0,0 +1,456 @@
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Transforms used in the Augmentation Policies."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import inspect
import random
import numpy as np
# pylint:disable=g-multiple-import
from PIL import ImageOps, ImageEnhance, ImageFilter, Image
# pylint:enable=g-multiple-import
IMAGE_SIZE = 28
# What is the dataset mean and std of the images on the training set
MEANS = [0.49139968, 0.48215841, 0.44653091]
STDS = [0.24703223, 0.24348513, 0.26158784]
PARAMETER_MAX = 10 # What is the max 'level' a transform could be predicted
def random_flip(x):
"""Flip the input x horizontally with 50% probability."""
if np.random.rand(1)[0] > 0.5:
return np.fliplr(x)
return x
def zero_pad_and_crop(img, amount=4):
"""Zero pad by `amount` zero pixels on each side then take a random crop.
Args:
img: numpy image that will be zero padded and cropped.
amount: amount of zeros to pad `img` with horizontally and verically.
Returns:
The cropped zero padded img. The returned numpy array will be of the same
shape as `img`.
"""
padded_img = np.zeros((img.shape[0] + amount * 2, img.shape[1] + amount * 2,
img.shape[2]))
padded_img[amount:img.shape[0] + amount, amount:
img.shape[1] + amount, :] = img
top = np.random.randint(low=0, high=2 * amount)
left = np.random.randint(low=0, high=2 * amount)
new_img = padded_img[top:top + img.shape[0], left:left + img.shape[1], :]
return new_img
def create_cutout_mask(img_height, img_width, num_channels, size):
"""Creates a zero mask used for cutout of shape `img_height` x `img_width`.
Args:
img_height: Height of image cutout mask will be applied to.
img_width: Width of image cutout mask will be applied to.
num_channels: Number of channels in the image.
size: Size of the zeros mask.
Returns:
A mask of shape `img_height` x `img_width` with all ones except for a
square of zeros of shape `size` x `size`. This mask is meant to be
elementwise multiplied with the original image. Additionally returns
the `upper_coord` and `lower_coord` which specify where the cutout mask
will be applied.
"""
assert img_height == img_width
# Sample center where cutout mask will be applied
height_loc = np.random.randint(low=0, high=img_height)
width_loc = np.random.randint(low=0, high=img_width)
# Determine upper right and lower left corners of patch
upper_coord = (max(0, height_loc - size // 2), max(0, width_loc - size // 2))
lower_coord = (min(img_height, height_loc + size // 2),
min(img_width, width_loc + size // 2))
mask_height = lower_coord[0] - upper_coord[0]
mask_width = lower_coord[1] - upper_coord[1]
assert mask_height > 0
assert mask_width > 0
mask = np.ones((img_height, img_width, num_channels))
zeros = np.zeros((mask_height, mask_width, num_channels))
mask[upper_coord[0]:lower_coord[0], upper_coord[1]:lower_coord[1], :] = (
zeros)
return mask, upper_coord, lower_coord
def cutout_numpy(img, size=16):
"""Apply cutout with mask of shape `size` x `size` to `img`.
The cutout operation is from the paper https://arxiv.org/abs/1708.04552.
This operation applies a `size`x`size` mask of zeros to a random location
within `img`.
Args:
img: Numpy image that cutout will be applied to.
size: Height/width of the cutout mask that will be
Returns:
A numpy tensor that is the result of applying the cutout mask to `img`.
"""
img_height, img_width, num_channels = (img.shape[0], img.shape[1],
img.shape[2])
assert len(img.shape) == 3
mask, _, _ = create_cutout_mask(img_height, img_width, num_channels, size)
return img * mask
def float_parameter(level, maxval):
"""Helper function to scale `val` between 0 and maxval .
Args:
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
maxval: Maximum value that the operation can have. This will be scaled
to level/PARAMETER_MAX.
Returns:
A float that results from scaling `maxval` according to `level`.
"""
return float(level) * maxval / PARAMETER_MAX
def int_parameter(level, maxval):
"""Helper function to scale `val` between 0 and maxval .
Args:
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
maxval: Maximum value that the operation can have. This will be scaled
to level/PARAMETER_MAX.
Returns:
An int that results from scaling `maxval` according to `level`.
"""
return int(level * maxval / PARAMETER_MAX)
def pil_wrap(img):
"""Convert the `img` numpy tensor to a PIL Image."""
return Image.fromarray(
np.uint8((img * STDS + MEANS) * 255.0)).convert('RGBA')
def pil_unwrap(pil_img):
"""Converts the PIL img to a numpy array."""
pic_array = (np.array(pil_img.getdata()).reshape((IMAGE_SIZE, IMAGE_SIZE, 4)) / 255.0)
i1, i2 = np.where(pic_array[:, :, 3] == 0)
pic_array = (pic_array[:, :, :3] - MEANS) / STDS
pic_array[i1, i2] = [0, 0, 0]
return pic_array
def apply_policy(policy, img):
"""Apply the `policy` to the numpy `img`.
Args:
policy: A list of tuples with the form (name, probability, level) where
`name` is the name of the augmentation operation to apply, `probability`
is the probability of applying the operation and `level` is what strength
the operation to apply.
img: Numpy image that will have `policy` applied to it.
Returns:
The result of applying `policy` to `img`.
"""
#print('img shape :',img.shape)
#print('Policy len :',len(policy))
pil_img = pil_wrap(img)
for xform in policy:
#print('xform :', len(xform))
assert len(xform) == 3
name, probability, level = xform
#xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability, level)
xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability.eval(), level)
pil_img = xform_fn(pil_img)
return pil_unwrap(pil_img)
class TransformFunction(object):
"""Wraps the Transform function for pretty printing options."""
def __init__(self, func, name):
self.f = func
self.name = name
def __repr__(self):
return '<' + self.name + '>'
def __call__(self, pil_img):
return self.f(pil_img)
class TransformT(object):
"""Each instance of this class represents a specific transform."""
def __init__(self, name, xform_fn):
self.name = name
self.xform = xform_fn
def pil_transformer(self, probability, level):
def return_function(im):
if random.random() < probability:
im = self.xform(im, level)
return im
name = self.name + '({:.1f},{})'.format(probability, level)
return TransformFunction(return_function, name)
def do_transform(self, image, level):
f = self.pil_transformer(PARAMETER_MAX, level)
return pil_unwrap(f(pil_wrap(image)))
################## Transform Functions ##################
identity = TransformT('identity', lambda pil_img, level: pil_img)
flip_lr = TransformT(
'FlipLR',
lambda pil_img, level: pil_img.transpose(Image.FLIP_LEFT_RIGHT))
flip_ud = TransformT(
'FlipUD',
lambda pil_img, level: pil_img.transpose(Image.FLIP_TOP_BOTTOM))
# pylint:disable=g-long-lambda
auto_contrast = TransformT(
'AutoContrast',
lambda pil_img, level: ImageOps.autocontrast(
pil_img.convert('RGB')).convert('RGBA'))
equalize = TransformT(
'Equalize',
lambda pil_img, level: ImageOps.equalize(
pil_img.convert('RGB')).convert('RGBA'))
invert = TransformT(
'Invert',
lambda pil_img, level: ImageOps.invert(
pil_img.convert('RGB')).convert('RGBA'))
# pylint:enable=g-long-lambda
blur = TransformT(
'Blur', lambda pil_img, level: pil_img.filter(ImageFilter.BLUR))
smooth = TransformT(
'Smooth',
lambda pil_img, level: pil_img.filter(ImageFilter.SMOOTH))
def _rotate_impl(pil_img, level):
"""Rotates `pil_img` from -30 to 30 degrees depending on `level`."""
degrees = int_parameter(level, 30)
if random.random() > 0.5:
degrees = -degrees
return pil_img.rotate(degrees)
rotate = TransformT('Rotate', _rotate_impl)
def _posterize_impl(pil_img, level):
"""Applies PIL Posterize to `pil_img`."""
level = int_parameter(level, 4)
return ImageOps.posterize(pil_img.convert('RGB'), 4 - level).convert('RGBA')
posterize = TransformT('Posterize', _posterize_impl)
def _shear_x_impl(pil_img, level):
"""Applies PIL ShearX to `pil_img`.
The ShearX operation shears the image along the horizontal axis with `level`
magnitude.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had ShearX applied to it.
"""
level = float_parameter(level, 0.3)
if random.random() > 0.5:
level = -level
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, level, 0, 0, 1, 0))
shear_x = TransformT('ShearX', _shear_x_impl)
def _shear_y_impl(pil_img, level):
"""Applies PIL ShearY to `pil_img`.
The ShearY operation shears the image along the vertical axis with `level`
magnitude.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had ShearX applied to it.
"""
level = float_parameter(level, 0.3)
if random.random() > 0.5:
level = -level
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, level, 1, 0))
shear_y = TransformT('ShearY', _shear_y_impl)
def _translate_x_impl(pil_img, level):
"""Applies PIL TranslateX to `pil_img`.
Translate the image in the horizontal direction by `level`
number of pixels.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had TranslateX applied to it.
"""
level = int_parameter(level, 10)
if random.random() > 0.5:
level = -level
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, level, 0, 1, 0))
translate_x = TransformT('TranslateX', _translate_x_impl)
def _translate_y_impl(pil_img, level):
"""Applies PIL TranslateY to `pil_img`.
Translate the image in the vertical direction by `level`
number of pixels.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had TranslateY applied to it.
"""
level = int_parameter(level, 10)
if random.random() > 0.5:
level = -level
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, 0, 1, level))
translate_y = TransformT('TranslateY', _translate_y_impl)
def _crop_impl(pil_img, level, interpolation=Image.BILINEAR):
"""Applies a crop to `pil_img` with the size depending on the `level`."""
cropped = pil_img.crop((level, level, IMAGE_SIZE - level, IMAGE_SIZE - level))
resized = cropped.resize((IMAGE_SIZE, IMAGE_SIZE), interpolation)
return resized
crop_bilinear = TransformT('CropBilinear', _crop_impl)
def _solarize_impl(pil_img, level):
"""Applies PIL Solarize to `pil_img`.
Translate the image in the vertical direction by `level`
number of pixels.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had Solarize applied to it.
"""
level = int_parameter(level, 256)
return ImageOps.solarize(pil_img.convert('RGB'), 256 - level).convert('RGBA')
solarize = TransformT('Solarize', _solarize_impl)
def _cutout_pil_impl(pil_img, level):
"""Apply cutout to pil_img at the specified level."""
size = int_parameter(level, 20)
if size <= 0:
return pil_img
img_height, img_width, num_channels = (IMAGE_SIZE, IMAGE_SIZE, 3)
_, upper_coord, lower_coord = (
create_cutout_mask(img_height, img_width, num_channels, size))
pixels = pil_img.load() # create the pixel map
for i in range(upper_coord[0], lower_coord[0]): # for every col:
for j in range(upper_coord[1], lower_coord[1]): # For every row
pixels[i, j] = (125, 122, 113, 0) # set the colour accordingly
return pil_img
cutout = TransformT('Cutout', _cutout_pil_impl)
def _enhancer_impl(enhancer):
"""Sets level to be between 0.1 and 1.8 for ImageEnhance transforms of PIL."""
def impl(pil_img, level):
v = float_parameter(level, 1.8) + .1 # going to 0 just destroys it
return enhancer(pil_img).enhance(v)
return impl
color = TransformT('Color', _enhancer_impl(ImageEnhance.Color))
contrast = TransformT('Contrast', _enhancer_impl(ImageEnhance.Contrast))
brightness = TransformT('Brightness', _enhancer_impl(
ImageEnhance.Brightness))
sharpness = TransformT('Sharpness', _enhancer_impl(ImageEnhance.Sharpness))
ALL_TRANSFORMS = [
flip_lr,
flip_ud,
auto_contrast,
equalize,
invert,
rotate,
posterize,
crop_bilinear,
solarize,
color,
contrast,
brightness,
sharpness,
shear_x,
shear_y,
translate_x,
translate_y,
cutout,
blur,
smooth
]
NAME_TO_TRANSFORM = {t.name: t for t in ALL_TRANSFORMS}
TRANSFORM_NAMES = NAME_TO_TRANSFORM.keys()

131
FAR-HO/blue_utils.py Normal file
View file

@ -0,0 +1,131 @@
import matplotlib.pyplot as plt
from far_ho.examples.datasets import Datasets, Dataset
import os
import numpy as np
import tensorflow as tf
import augmentation_transforms as augmentation_transforms ##### ATTENTION FICHIER EN DOUBLE => A REGLER MIEUX ####
def viz_data(dataset, fig_name='data_sample',aug_policy=None):
plt.figure(figsize=(10,10))
for i in range(25):
plt.subplot(5,5,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
img = dataset.data[i][:,:,0]
if aug_policy :
img = augment_img(img,aug_policy)
#print('im shape',img.shape)
plt.imshow(img, cmap=plt.cm.binary)
plt.xlabel(np.nonzero(dataset.target[i])[0].item())
plt.savefig(fig_name)
def augment_img(data, policy):
#print('Im shape',data.shape)
data = np.stack((data,)*3, axis=-1) #BOF BOF juste pour forcer 3 channels
#print('Im shape',data.shape)
final_img = augmentation_transforms.apply_policy(policy, data)
#final_img = augmentation_transforms.random_flip(augmentation_transforms.zero_pad_and_crop(final_img, 4))
# Apply cutout
#final_img = augmentation_transforms.cutout_numpy(final_img)
im_rgb = np.array(final_img, np.float32)
im_gray = np.dot(im_rgb[...,:3], [0.2989, 0.5870, 0.1140]) #Just pour retourner a 1 channel
return im_gray
### https://www.kaggle.com/raoulma/mnist-image-class-tensorflow-cnn-99-51-test-acc#5.-Build-the-neural-network-with-tensorflow-
## build the neural network class
# weight initialization
def weight_variable(shape, name = None):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial, name = name)
# bias initialization
def bias_variable(shape, name = None):
initial = tf.constant(0.1, shape=shape) # positive bias
return tf.Variable(initial, name = name)
# 2D convolution
def conv2d(x, W, name = None):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name = name)
# max pooling
def max_pool_2x2(x, name = None):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
padding='SAME', name = name)
def cnn(x_data_tf,y_data_tf, name='model'):
# tunable hyperparameters for nn architecture
s_f_conv1 = 3; # filter size of first convolution layer (default = 3)
n_f_conv1 = 36; # number of features of first convolution layer (default = 36)
s_f_conv2 = 3; # filter size of second convolution layer (default = 3)
n_f_conv2 = 36; # number of features of second convolution layer (default = 36)
s_f_conv3 = 3; # filter size of third convolution layer (default = 3)
n_f_conv3 = 36; # number of features of third convolution layer (default = 36)
n_n_fc1 = 576; # number of neurons of first fully connected layer (default = 576)
# 1.layer: convolution + max pooling
W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, 1, n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
h_conv1_tf = tf.nn.relu(conv2d(x_data_tf,
W_conv1_tf) + b_conv1_tf,
name = 'h_conv1_tf') # (.,28,28,32)
h_pool1_tf = max_pool_2x2(h_conv1_tf,
name = 'h_pool1_tf') # (.,14,14,32)
# 2.layer: convolution + max pooling
W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2,
n_f_conv1, n_f_conv2],
name = 'W_conv2_tf')
b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf,
W_conv2_tf) + b_conv2_tf,
name ='h_conv2_tf') #(.,14,14,32)
h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
# 3.layer: convolution + max pooling
W_conv3_tf = weight_variable([s_f_conv3, s_f_conv3,
n_f_conv2, n_f_conv3],
name = 'W_conv3_tf')
b_conv3_tf = bias_variable([n_f_conv3], name = 'b_conv3_tf')
h_conv3_tf = tf.nn.relu(conv2d(h_pool2_tf,
W_conv3_tf) + b_conv3_tf,
name = 'h_conv3_tf') #(.,7,7,32)
h_pool3_tf = max_pool_2x2(h_conv3_tf,
name = 'h_pool3_tf') # (.,4,4,32)
# 4.layer: fully connected
W_fc1_tf = weight_variable([4*4*n_f_conv3,n_n_fc1],
name = 'W_fc1_tf') # (4*4*32, 1024)
b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
h_pool3_flat_tf = tf.reshape(h_pool3_tf, [-1,4*4*n_f_conv3],
name = 'h_pool3_flat_tf') # (.,1024)
h_fc1_tf = tf.nn.relu(tf.matmul(h_pool3_flat_tf,
W_fc1_tf) + b_fc1_tf,
name = 'h_fc1_tf') # (.,1024)
# add dropout
#keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
#h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
# 5.layer: fully connected
W_fc2_tf = weight_variable([n_n_fc1, 10], name = 'W_fc2_tf')
b_fc2_tf = bias_variable([10], name = 'b_fc2_tf')
z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf),
b_fc2_tf, name = 'z_pred_tf')# => (.,10)
# predicted probabilities in one-hot encoding
y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf')
# tensor of correct predictions
y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
tf.argmax(y_data_tf, 1),
name = 'y_pred_correct_tf')
return y_pred_proba_tf

166
FAR-HO/far_pba_cifar.py Normal file
View file

@ -0,0 +1,166 @@
#https://github.com/arcelien/pba/blob/master/autoaugment/train_cifar.py
from __future__ import absolute_import, print_function, division
import os
import numpy as np
import tensorflow as tf
#import tensorflow.contrib.layers as layers
import far_ho as far
import far_ho.examples as far_ex
#import pprint
import autoaugment.augmentation_transforms as augmentation_transforms
#import autoaugment.policies as found_policies
from autoaugment.wrn import build_wrn_model
def build_model(inputs, num_classes, is_training, hparams):
"""Constructs the vision model being trained/evaled.
Args:
inputs: input features/images being fed to the image model build built.
num_classes: number of output classes being predicted.
is_training: is the model training or not.
hparams: additional hyperparameters associated with the image model.
Returns:
The logits of the image model.
"""
scopes = setup_arg_scopes(is_training)
with contextlib.nested(*scopes):
if hparams.model_name == 'pyramid_net':
logits = build_shake_drop_model(
inputs, num_classes, is_training)
elif hparams.model_name == 'wrn':
logits = build_wrn_model(
inputs, num_classes, hparams.wrn_size)
elif hparams.model_name == 'shake_shake':
logits = build_shake_shake_model(
inputs, num_classes, hparams, is_training)
return logits
class CifarModel(object):
"""Builds an image model for Cifar10/Cifar100."""
def __init__(self, hparams):
self.hparams = hparams
def build(self, mode):
"""Construct the cifar model."""
assert mode in ['train', 'eval']
self.mode = mode
self._setup_misc(mode)
self._setup_images_and_labels()
self._build_graph(self.images, self.labels, mode)
self.init = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
def _setup_misc(self, mode):
"""Sets up miscellaneous in the cifar model constructor."""
self.lr_rate_ph = tf.Variable(0.0, name='lrn_rate', trainable=False)
self.reuse = None if (mode == 'train') else True
self.batch_size = self.hparams.batch_size
if mode == 'eval':
self.batch_size = 25
def _setup_images_and_labels(self):
"""Sets up image and label placeholders for the cifar model."""
if FLAGS.dataset == 'cifar10':
self.num_classes = 10
else:
self.num_classes = 100
self.images = tf.placeholder(tf.float32, [self.batch_size, 32, 32, 3])
self.labels = tf.placeholder(tf.float32,
[self.batch_size, self.num_classes])
def assign_epoch(self, session, epoch_value):
session.run(self._epoch_update, feed_dict={self._new_epoch: epoch_value})
def _build_graph(self, images, labels, mode):
"""Constructs the TF graph for the cifar model.
Args:
images: A 4-D image Tensor
labels: A 2-D labels Tensor.
mode: string indicating training mode ( e.g., 'train', 'valid', 'test').
"""
is_training = 'train' in mode
if is_training:
self.global_step = tf.train.get_or_create_global_step()
logits = build_model(
images,
self.num_classes,
is_training,
self.hparams)
self.predictions, self.cost = helper_utils.setup_loss(
logits, labels)
self.accuracy, self.eval_op = tf.metrics.accuracy(
tf.argmax(labels, 1), tf.argmax(self.predictions, 1))
self._calc_num_trainable_params()
# Adds L2 weight decay to the cost
self.cost = helper_utils.decay_weights(self.cost,
self.hparams.weight_decay_rate)
#### Attention: differe implem originale
self.init = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
########################################################
######## PBA ############
#Parallele Cifar model trainer
tf.flags.DEFINE_string('model_name', 'wrn',
'wrn, shake_shake_32, shake_shake_96, shake_shake_112, '
'pyramid_net')
tf.flags.DEFINE_string('checkpoint_dir', '/tmp/training', 'Training Directory.')
tf.flags.DEFINE_string('data_path', '/tmp/data',
'Directory where dataset is located.')
tf.flags.DEFINE_string('dataset', 'cifar10',
'Dataset to train with. Either cifar10 or cifar100')
tf.flags.DEFINE_integer('use_cpu', 1, '1 if use CPU, else GPU.')
## ???
FLAGS = tf.flags.FLAGS
FLAGS.dataset
FLAGS.data_path
FLAGS.model_name = 'wrn'
hparams = tf.contrib.training.HParams(
train_size=50000,
validation_size=0,
eval_test=1,
dataset=FLAGS.dataset,
data_path=FLAGS.data_path,
batch_size=128,
gradient_clipping_by_global_norm=5.0)
if FLAGS.model_name == 'wrn':
hparams.add_hparam('model_name', 'wrn')
hparams.add_hparam('num_epochs', 200)
hparams.add_hparam('wrn_size', 160)
hparams.add_hparam('lr', 0.1)
hparams.add_hparam('weight_decay_rate', 5e-4)
data_loader = data_utils.DataSet(hparams)
data_loader.reset()
with tf.Graph().as_default(): #, tf.device('/cpu:0' if FLAGS.use_cpu else '/gpu:0'):
"""Builds the image models for train and eval."""
# Determine if we should build the train and eval model. When using
# distributed training we only want to build one or the other and not both.
with tf.variable_scope('model', use_resource=False):
m = CifarModel(self.hparams)
m.build('train')
#self._num_trainable_params = m.num_trainable_params
#self._saver = m.saver
#with tf.variable_scope('model', reuse=True, use_resource=False):
# meval = CifarModel(self.hparams)
# meval.build('eval')
##### FAR-HO ####
for _ in range(n_hyper_iterations):

92
FAR-HO/test.py Normal file
View file

@ -0,0 +1,92 @@
import os
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as layers
import far_ho as far
import far_ho.examples as far_ex
import matplotlib.pyplot as plt
sess = tf.InteractiveSession()
def get_data():
# load a small portion of mnist data
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=(.1, .1,))
return datasets.train, datasets.validation
def g_logits(x,y):
with tf.variable_scope('model'):
h1 = layers.fully_connected(x, 300)
logits = layers.fully_connected(h1, int(y.shape[1]))
return logits
x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
logits = g_logits(x,y)
train_set, validation_set = get_data()
lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
lr = far.get_hyperparameter('lr', initializer=0.01)
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
L = tf.reduce_mean(tf.sigmoid(lambdas)*ce)
E = tf.reduce_mean(ce)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
inner_optimizer = far.GradientDescentOptimizer(lr)
outer_optimizer = tf.train.AdamOptimizer()
rev_it =10
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
T = 20 # Number of inner iterations
train_set_supplier = train_set.create_supplier(x, y)
validation_set_supplier = validation_set.create_supplier(x, y)
tf.global_variables_initializer().run()
print('inner:', L.eval(train_set_supplier()))
print('outer:', E.eval(validation_set_supplier()))
# print('-'*50)
n_hyper_iterations = 200
inner_losses = []
outer_losses = []
train_accs = []
val_accs = []
for _ in range(n_hyper_iterations):
hyper_step(T,
inner_objective_feed_dicts=train_set_supplier,
outer_objective_feed_dicts=validation_set_supplier)
inner_obj = L.eval(train_set_supplier())
outer_obj = E.eval(validation_set_supplier())
inner_losses.append(inner_obj)
outer_losses.append(outer_obj)
print('inner:', inner_obj)
print('outer:', outer_obj)
train_acc = accuracy.eval(train_set_supplier())
val_acc = accuracy.eval(validation_set_supplier())
train_accs.append(train_acc)
val_accs.append(val_acc)
print('training accuracy', train_acc)
print('validation accuracy', val_acc)
print('learning rate', lr.eval())
print('norm of examples weight', tf.norm(lambdas).eval())
print('-'*50)
plt.subplot(211)
plt.plot(inner_losses, label='training loss')
plt.plot(outer_losses, label='validation loss')
plt.legend(loc=0, frameon=True)
#plt.xlim(0, 19)
plt.subplot(212)
plt.plot(train_accs, label='training accuracy')
plt.plot(val_accs, label='validation accuracy')
plt.legend(loc=0, frameon=True)
plt.savefig('H%d - I%d - R%d'%(n_hyper_iterations,T,rev_it))

126
FAR-HO/test_cnn.py Normal file
View file

@ -0,0 +1,126 @@
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as layers
import far_ho as far
import far_ho.examples as far_ex
tf.logging.set_verbosity(tf.logging.ERROR)
import matplotlib.pyplot as plt
import blue_utils as butil
#Reset
try:
sess.close()
except: pass
rnd = np.random.RandomState(1)
tf.reset_default_graph()
sess = tf.InteractiveSession()
def get_data(data_split):
# load a small portion of mnist data
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False)
print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target)
[print("Nb samples : ", d.num_examples) for d in datasets]
return datasets.train, datasets.validation, datasets.test
#Model
# FC : reshape = True
def g_logits(x,y, name='model'):
with tf.variable_scope(name):
h1 = layers.fully_connected(x, 300)
logits = layers.fully_connected(h1, int(y.shape[1]))
return logits
#### Hyper-parametres ####
n_hyper_iterations = 500
T = 20 # Number of inner iterations
rev_it =10
hp_lr = 1.e-3
##########################
#MNIST
#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
#y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
#logits = g_logits(x, y)
#CNN : reshape = False
x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
logits = butil.cnn(x,y)
train_set, validation_set, test_set = get_data(data_split=(.05, .05,))
butil.viz_data(train_set)
print('Data sampled !')
# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, .1), 1.e-7))
#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5))
#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.))
lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4))
mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9))
rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.00001), 0.00001))
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
L = tf.reduce_mean(ce) + rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles
E = tf.reduce_mean(ce)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
inner_optimizer = far.MomentumOptimizer(lr, mu)
outer_optimizer = tf.train.AdamOptimizer(hp_lr)
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
train_set_supplier = train_set.create_supplier(x, y, batch_size=256) # stochastic GD
validation_set_supplier = validation_set.create_supplier(x, y)
his_params = []
tf.global_variables_initializer().run()
for hyt in range(n_hyper_iterations):
hyper_step(T,
inner_objective_feed_dicts=train_set_supplier,
outer_objective_feed_dicts=validation_set_supplier)
res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()),
E.eval(validation_set_supplier()),
accuracy.eval(train_set_supplier()),
accuracy.eval(validation_set_supplier())]
his_params.append(res)
print('Hyper-it :',hyt,'/',n_hyper_iterations)
print('inner:', L.eval(train_set_supplier()))
print('outer:', E.eval(validation_set_supplier()))
print('training accuracy:', res[5])
print('validation accuracy:', res[6])
#print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
print('-'*50)
test_set_supplier = test_set.create_supplier(x, y)
print('Test accuracy:',accuracy.eval(test_set_supplier()))
fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
ax[0].set_title('Learning rate')
ax[0].plot([e[0] for e in his_params])
ax[1].set_title('Momentum factor')
ax[1].plot([e[1] for e in his_params])
#ax[2].set_title('L2 regulariz.')
#ax[2].plot([e[2] for e in his_params])
ax[2].set_title('Tr. and val. acc')
ax[2].plot([e[5] for e in his_params])
ax[2].plot([e[6] for e in his_params])
ax[3].set_title('Tr. and val. errors')
ax[3].plot([e[3] for e in his_params])
ax[3].plot([e[4] for e in his_params])
plt.savefig('res_cnn_H{}_I{}'.format(n_hyper_iterations,T))

141
FAR-HO/test_cnn_aug.py Normal file
View file

@ -0,0 +1,141 @@
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as layers
import far_ho as far
import far_ho.examples as far_ex
tf.logging.set_verbosity(tf.logging.ERROR)
import matplotlib.pyplot as plt
import blue_utils as butil
#Reset
try:
sess.close()
except: pass
rnd = np.random.RandomState(1)
tf.reset_default_graph()
sess = tf.InteractiveSession()
def get_data(data_split):
# load a small portion of mnist data
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False)
print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target)
[print("Nb samples : ", d.num_examples) for d in datasets]
return datasets.train, datasets.validation, datasets.test
#Model
# FC : reshape = True
def g_logits(x,y, name='model'):
with tf.variable_scope(name):
h1 = layers.fully_connected(x, 300)
logits = layers.fully_connected(h1, int(y.shape[1]))
return logits
#### Hyper-parametres ####
n_hyper_iterations = 10
T = 10 # Number of inner iterations
rev_it =10
hp_lr = 0.02
##########################
#MNIST
#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
#y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
#logits = g_logits(x, y)
#CNN : reshape = False
x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
logits = butil.cnn(x,y)
train_set, validation_set, test_set = get_data(data_split=(.1, .1,))
probX = far.get_hyperparameter('probX', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9))
probY = far.get_hyperparameter('probY', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9))
#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4))
#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9))
#probX, probY = 0.5, 0.5
#policy = [('TranslateX', probX, 8), ('TranslateY', probY, 8)]
policy = [('TranslateX', probX, 8), ('FlipUD', probY, 8)]
print('Hyp :',far.utils.hyperparameters(scope=None))
#butil.viz_data(train_set, aug_policy= policy)
#print('Data sampled !')
#Ajout artificiel des transfo a la loss juste pour qu il soit compter dans la dynamique du graph
probX_loss = tf.sigmoid(probX)
probY_loss = tf.sigmoid(probY)
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
L = tf.reduce_mean(probX_loss*probY_loss*ce)
E = tf.reduce_mean(ce)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
inner_optimizer = far.AdamOptimizer()
outer_optimizer = tf.train.AdamOptimizer(hp_lr)
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
train_set_supplier = train_set.create_supplier(x, y, batch_size=256, aug_policy=policy) # stochastic GD
validation_set_supplier = validation_set.create_supplier(x, y)
#print(train_set.dim_data,validation_set.dim_data)
his_params = []
tf.global_variables_initializer().run()
butil.viz_data(train_set, fig_name= 'Start_sample',aug_policy= policy)
print('Data sampled !')
for hyt in range(n_hyper_iterations):
hyper_step(T,
inner_objective_feed_dicts=train_set_supplier,
outer_objective_feed_dicts=validation_set_supplier,
_skip_hyper_ts=True)
res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()),
E.eval(validation_set_supplier()),
accuracy.eval(train_set_supplier()),
accuracy.eval(validation_set_supplier())]
his_params.append(res)
butil.viz_data(train_set, fig_name= 'Train_sample_{}'.format(hyt),aug_policy= policy)
print('Data sampled !')
print('Hyper-it :',hyt,'/',n_hyper_iterations)
print('inner:', L.eval(train_set_supplier()))
print('outer:', E.eval(validation_set_supplier()))
print('training accuracy:', res[4])
print('validation accuracy:', res[5])
print('Transformation : ProbX -',res[0],'/ProbY -',res[1])
#print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
print('-'*50)
test_set_supplier = test_set.create_supplier(x, y)
print('Test accuracy:',accuracy.eval(test_set_supplier()))
fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
ax[0].set_title('ProbX')
ax[0].plot([e[0] for e in his_params])
ax[1].set_title('ProbY')
ax[1].plot([e[1] for e in his_params])
ax[2].set_title('Tr. and val. errors')
ax[2].plot([e[2] for e in his_params])
ax[2].plot([e[3] for e in his_params])
ax[3].set_title('Tr. and val. acc')
ax[3].plot([e[4] for e in his_params])
ax[3].plot([e[5] for e in his_params])
plt.savefig('res_cnn_aug_H{}_I{}'.format(n_hyper_iterations,T))

133
FAR-HO/test_fc.py Normal file
View file

@ -0,0 +1,133 @@
#https://github.com/lucfra/FAR-HO/blob/master/far_ho/examples/autoMLDemos/Far-HO%20Demo%2C%20AutoML%202018%2C%20ICML%20workshop.ipynb
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as layers
import far_ho as far
import far_ho.examples as far_ex
tf.logging.set_verbosity(tf.logging.ERROR)
import matplotlib.pyplot as plt
#import blue_utils as butil
#Reset
try:
sess.close()
except: pass
rnd = np.random.RandomState(1)
tf.reset_default_graph()
sess = tf.InteractiveSession()
def get_data(data_split):
# load a small portion of mnist data
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=True)
print("Data shape : ", datasets.train.dim_data, " / Label shape : ", datasets.train.dim_target)
[print("Nb samples : ", d.num_examples) for d in datasets]
return datasets.train, datasets.validation, datasets.test
#Model
# FC : reshape = True
def g_logits(x,y, name='model'):
with tf.variable_scope(name):
h1 = layers.fully_connected(x, 300)
logits = layers.fully_connected(h1, int(y.shape[1]))
return logits
#### Hyper-parametres ####
n_hyper_iterations = 90
T = 20 # Number of inner iterations
rev_it =10
hp_lr = 0.1
epochs =10
batch_size = 256
##########################
#MNIST
x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
logits = g_logits(x, y)
#CNN : reshape = False
#x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
#y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
#logits = butil.cnn(x,y)
train_set, validation_set, test_set = get_data(data_split=(.6, .3,))
#butil.viz_data(train_set)
# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
lr = far.get_hyperparameter('lr', initializer=1e-2, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 1.e-7))
mu = far.get_hyperparameter('mu', initializer=0.95, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5))
#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.))
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
L = tf.reduce_mean(ce) #+ rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles
E = tf.reduce_mean(ce)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
inner_optimizer = far.MomentumOptimizer(lr, mu)
#inner_optimizer = far.GradientDescentOptimizer(lr)
outer_optimizer = tf.train.AdamOptimizer(hp_lr)
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)#, global_step=tf.train.get_or_create_step())
train_set_supplier = train_set.create_supplier(x, y, batch_size=batch_size)#, epochs=1) # stochastic GD
validation_set_supplier = validation_set.create_supplier(x, y)
print('Hyper iterations par epochs',int(train_set.num_examples/batch_size*epochs/T))
his_params = []
tf.global_variables_initializer().run()
for hyt in range(n_hyper_iterations):
hyper_step(T,
inner_objective_feed_dicts=train_set_supplier,
outer_objective_feed_dicts=validation_set_supplier,
_skip_hyper_ts=False)
res = sess.run(far.hyperparameters()) + [0, L.eval(train_set_supplier()),
E.eval(validation_set_supplier()),
accuracy.eval(train_set_supplier()),
accuracy.eval(validation_set_supplier())]
his_params.append(res)
print('Hyper-it :',hyt,'/',n_hyper_iterations)
print('inner:', res[3])
print('outer:', res[4])
print('training accuracy:', res[5])
print('validation accuracy:', res[6])
#print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
print('-'*50)
test_set_supplier = test_set.create_supplier(x, y)
print('Test accuracy:',accuracy.eval(test_set_supplier()))
fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
ax[0].set_title('Learning rate')
ax[0].plot([e[0] for e in his_params])
ax[1].set_title('Momentum factor')
ax[1].plot([e[1] for e in his_params])
#ax[2].set_title('L2 regulariz.')
#ax[2].plot([e[2] for e in his_params])
ax[2].set_title('Tr. and val. acc')
ax[2].plot([e[5] for e in his_params])
ax[2].plot([e[6] for e in his_params])
ax[3].set_title('Tr. and val. errors')
ax[3].plot([e[3] for e in his_params])
ax[3].plot([e[4] for e in his_params])
plt.savefig('resultats/res_fc_H{}_I{}'.format(n_hyper_iterations,T))
#plt.savefig('resultats/res_fc_H{}_I{}_noHyp'.format(n_hyper_iterations,T))

View file

@ -0,0 +1,5 @@
venv/
__pycache__
data/
log/
.vscode/

View file

@ -0,0 +1,33 @@
# Gradient Descent: The Ultimate Optimizer
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black)
| ⚠️ WARNING: THIS IS NOT MY WORK ⚠️ |
| --- |
This repository contains the paper and code to the paper [Gradient Descent:
The Ultimate Optimizer](https://arxiv.org/abs/1909.13371).
I couldn't find the code (which is found in the appendix at the end of the
paper) anywhere on the web. What I present here is the code of the paper with
instructions on how to set it up.
Getting the code in a runnable state required some fixes on my part so the
code might be slightly different than that presented in the paper.
## Set up
```sh
git clone https://github.com/Rainymood/Gradient-Descent-The-Ultimate-Optimizer
cd Gradient-Descent-The-Ultimate-Optimizer
virtualenv -p python3 venv
source venv/bin/activate
pip install -r requirements.txt
python main.py
```
When you are done you can exit the virtualenv with
```shell
deactivate
```

View file

@ -0,0 +1,244 @@
from hyperopt import *
#from hyperopt_v2 import *
import torchvision.transforms.functional as TF
import torchvision.transforms as T
#from scipy import ndimage
import kornia
import random
class MNIST_FullyConnected_Augmented(Optimizable):
"""
A fully-connected NN for the MNIST task. This is Optimizable but not itself
an optimizer.
"""
def __init__(self, num_inp, num_hid, num_out, optimizer, device = torch.device('cuda')):
self.device = device
#print(self.device)
parameters = {
"w1": torch.zeros(num_inp, num_hid, device=self.device).t(),
"b1": torch.zeros(num_hid, device=self.device).t(),
"w2": torch.zeros(num_hid, num_out, device=self.device).t(),
"b2": torch.zeros(num_out, device=self.device).t(),
#Data augmentation
"prob": torch.tensor(0.5, device=self.device),
"mag": torch.tensor(180.0, device=self.device),
}
super().__init__(parameters, optimizer)
def initialize(self):
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
self.optimizer.initialize()
#print(self.device)
def forward(self, x):
"""Compute a prediction."""
#print("Prob:",self.parameters["prob"].item())
if random.random() < self.parameters["prob"]:
#angle = 45
#x = TF.rotate(x, angle)
#print(self.device)
#x = F.linear(x, torch.ones(28*28, 28*28, device=self.device).t()*self.parameters["mag"], bias=None)
x = x + self.parameters["mag"]
x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
x = torch.tanh(x)
x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
x = torch.tanh(x)
x = F.log_softmax(x, dim=1)
return x
def adjust(self):
self.optimizer.adjust(self.parameters)
def __str__(self):
return "mnist_FC_augmented / " + str(self.optimizer)
class LeNet(Optimizable, nn.Module):
def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
nn.Module.__init__(self)
self.device = device
parameters = {
"w1": torch.zeros(20, num_inp, 5, 5, device=self.device),
"b1": torch.zeros(20, device=self.device),
"w2": torch.zeros(50, 20, 5, 5, device=self.device),
"b2": torch.zeros(50, device=self.device),
"w3": torch.zeros(500,4*4*50, device=self.device),
"b3": torch.zeros(500, device=self.device),
"w4": torch.zeros(10, 500, device=self.device),
"b4": torch.zeros(10, device=self.device),
#Data augmentation
"prob": torch.tensor(1.0, device=self.device),
"mag": torch.tensor(180.0, device=self.device),
}
super().__init__(parameters, optimizer)
def initialize(self):
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.parameters["w3"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.parameters["w4"], a=math.sqrt(5))
self.optimizer.initialize()
def forward(self, x):
if random.random() < self.parameters["prob"]:
batch_size = x.shape[0]
# create transformation (rotation)
alpha = self.parameters["mag"] # in degrees
angle = torch.ones(batch_size, device=self.device) * alpha
# define the rotation center
center = torch.ones(batch_size, 2, device=self.device)
center[..., 0] = x.shape[3] / 2 # x
center[..., 1] = x.shape[2] / 2 # y
#print(x.shape, center)
# define the scale factor
scale = torch.ones(batch_size, device=self.device)
# compute the transformation matrix
M = kornia.get_rotation_matrix2d(center, angle, scale)
# apply the transformation to original image
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
#print("Start Shape ", x.shape)
out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = out.view(out.size(0), -1)
#print("Shape ", out.shape)
out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
#print("Shape ", out.shape)
out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
#print("Shape ", out.shape)
return F.log_softmax(out, dim=1)
def adjust(self):
self.optimizer.adjust(self.parameters)
def __str__(self):
return "mnist_CNN_augmented / " + str(self.optimizer)
class LeNet_v2(Optimizable, nn.Module):
def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
nn.Module.__init__(self)
self.device = device
self.conv1 = nn.Conv2d(num_inp, 20, 5, 1)
self.conv2 = nn.Conv2d(20, 50, 5, 1)
#self.fc1 = nn.Linear(4*4*50, 500)
self.fc1 = nn.Linear(1250, 500)
self.fc2 = nn.Linear(500, 10)
#print(self.conv1.weight)
parameters = {
"w1": self.conv1.weight,
"b1": self.conv1.bias,
"w2": self.conv2.weight,
"b2": self.conv2.bias,
"w3": self.fc1.weight,
"b3": self.fc1.bias,
"w4": self.fc2.weight,
"b4": self.fc2.bias,
#Data augmentation
"prob": torch.tensor(0.5, device=self.device),
"mag": torch.tensor(1.0, device=self.device),
}
Optimizable.__init__(self, parameters, optimizer)
'''
def forward(self, x): #Sature la memoire ???
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2, 2)
#x = x.view(-1, 4*4*50)
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
'''
def forward(self, x):
if random.random() < self.parameters["prob"].item():
#print(self.parameters["prob"])
#x = [T.ToTensor()(
# TF.affine(img=T.ToPILImage()(im), angle=self.parameters["mag"], translate=(0,0), scale=1, shear=0, resample=0, fillcolor=None))
# for im in torch.unbind(x,dim=0)]
#x = torch.stack(x,dim=0)
#x = [ndimage.rotate(im, self.parameters["mag"], reshape=False)
# for im in torch.unbind(x,dim=0)]
#x = torch.stack(x,dim=0)
#x = [im + self.parameters["mag"]
# for im in torch.unbind(x,dim=0)]
#x = torch.stack(x,dim=0)
batch_size = x.shape[0]
# create transformation (rotation)
alpha = self.parameters["mag"] * 180 # in degrees
angle = torch.ones(batch_size, device=self.device) * alpha
# define the rotation center
center = torch.ones(batch_size, 2, device=self.device)
center[..., 0] = x.shape[3] / 2 # x
center[..., 1] = x.shape[2] / 2 # y
#print(x.shape, center)
# define the scale factor
scale = torch.ones(batch_size, device=self.device)
# compute the transformation matrix
M = kornia.get_rotation_matrix2d(center, angle, scale)
# apply the transformation to original image
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
#print("Start Shape ", x.shape)
out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = out.view(out.size(0), -1)
#print("Shape ", out.shape)
out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
#print("Shape ", out.shape)
out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
#print("Shape ", out.shape)
return F.log_softmax(out, dim=1)
def initialize(self):
self.optimizer.initialize()
def adjust(self):
self.optimizer.adjust(self.parameters)
def adjust_val(self):
self.optimizer.adjust_val(self.parameters)
def eval(self):
self.parameters['prob']=torch.tensor(0.0, device=self.device)
def __str__(self):
return "mnist_CNN_augmented / " + str(self.optimizer)

View file

@ -0,0 +1,52 @@
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision.transforms.functional as TF
class MNIST_aug(Dataset):
training_file = 'training.pt'
test_file = 'test.pt'
classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four',
'5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
def __init__(self):
self.images = [TF.to_pil_image(x) for x in torch.ByteTensor(10, 3, 48, 48)]
self.set_stage(0) # initial stage
def __getitem__(self, index):
image = self.images[index]
# Just apply your transformations here
image = self.crop(image)
x = TF.to_tensor(image)
return x
def set_stage(self, stage):
if stage == 0:
print('Using (32, 32) crops')
self.crop = transforms.RandomCrop((32, 32))
elif stage == 1:
print('Using (28, 28) crops')
self.crop = transforms.RandomCrop((28, 28))
def __len__(self):
return len(self.images)
dataset = MyData()
loader = DataLoader(dataset,
batch_size=2,
num_workers=2,
shuffle=True)
for batch_idx, data in enumerate(loader):
print('Batch idx {}, data shape {}'.format(
batch_idx, data.shape))
loader.dataset.set_stage(1)
for batch_idx, data in enumerate(loader):
print('Batch idx {}, data shape {}'.format(
batch_idx, data.shape))

View file

@ -0,0 +1,150 @@
#from hyperopt import *
from hyperopt_v2 import *
import torchvision.transforms.functional as TF
import torchvision.transforms as T
#from scipy import ndimage
import kornia
import random
class LeNet_v3(nn.Module):
def __init__(self, num_inp, num_out):
super(LeNet_v3, self).__init__()
self.params = nn.ParameterDict({
'w1': nn.Parameter(torch.zeros(20, num_inp, 5, 5)),
'b1': nn.Parameter(torch.zeros(20)),
'w2': nn.Parameter(torch.zeros(50, 20, 5, 5)),
'b2': nn.Parameter(torch.zeros(50)),
'w3': nn.Parameter(torch.zeros(500,4*4*50)),
'b3': nn.Parameter(torch.zeros(500)),
'w4': nn.Parameter(torch.zeros(10, 500)),
'b4': nn.Parameter(torch.zeros(10))
})
def initialize(self):
nn.init.kaiming_uniform_(self.params["w1"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.params["w2"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.params["w3"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.params["w4"], a=math.sqrt(5))
def forward(self, x):
#print("Start Shape ", x.shape)
out = F.relu(F.conv2d(input=x, weight=self.params["w1"], bias=self.params["b1"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = F.relu(F.conv2d(input=out, weight=self.params["w2"], bias=self.params["b2"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = out.view(out.size(0), -1)
#print("Shape ", out.shape)
out = F.relu(F.linear(out, self.params["w3"], self.params["b3"]))
#print("Shape ", out.shape)
out = F.linear(out, self.params["w4"], self.params["b4"])
#print("Shape ", out.shape)
return F.log_softmax(out, dim=1)
def print_grad_fn(self):
for n, p in self.params.items():
print(n, p.grad_fn)
def __str__(self):
return "mnist_CNN_augmented / "
class Data_aug(nn.Module):
def __init__(self):
super(Data_aug, self).__init__()
self.data_augmentation = True
self.params = nn.ParameterDict({
"prob": nn.Parameter(torch.tensor(0.5)),
"mag": nn.Parameter(torch.tensor(180.0))
})
#self.params["mag"].register_hook(print)
def forward(self, x):
if self.data_augmentation and self.training and random.random() < self.params["prob"]:
#print('Aug')
batch_size = x.shape[0]
# create transformation (rotation)
alpha = self.params["mag"] # in degrees
angle = torch.ones(batch_size, device=x.device) * alpha
# define the rotation center
center = torch.ones(batch_size, 2, device=x.device)
center[..., 0] = x.shape[3] / 2 # x
center[..., 1] = x.shape[2] / 2 # y
#print(x.shape, center)
# define the scale factor
scale = torch.ones(batch_size, device=x.device)
# compute the transformation matrix
M = kornia.get_rotation_matrix2d(center, angle, scale)
# apply the transformation to original image
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
return x
def eval(self):
self.params['prob']=torch.tensor(0.0, device=self.device)
nn.Module.eval(self)
def data_augmentation(self, mode=True):
self.data_augmentation=mode
def print_grad_fn(self):
for n, p in self.params.items():
print(n, p.grad_fn)
def __str__(self):
return "Data_Augmenter / "
class Augmented_model(nn.Module):
def __init__(self, model, data_augmenter):
#self.model = model
#self.data_aug = data_augmenter
super(Augmented_model, self).__init__()#nn.Module.__init__(self)
#super().__init__()
self.mods = nn.ModuleDict({
'data_aug': data_augmenter,
'model': model
})
#for name, param in self.mods.named_parameters():
# print(name, type(param.data), param.size())
#params = self.mods.named_parameters() #self.parameters()
#parameters = [param for param in self.model.parameters()] + [param for param in self.data_aug.parameters()]
#Optimizable.__init__(self, params, optimizer)
def initialize(self):
self.mods['model'].initialize()
def forward(self, x):
return self.mods['model'](self.mods['data_aug'](x))
#def adjust(self):
# self.optimizer.adjust(self) #Parametres des dict
def data_augmentation(self, mode=True):
self.mods['data_aug'].data_augmentation=mode
def begin(self):
for param in self.parameters():
param.requires_grad_() # keep gradient information…
param.retain_grad() # even if not a leaf…
def print_grad_fn(self):
for n, m in self.mods.items():
m.print_grad_fn()
def __str__(self):
return str(self.mods['data_aug'])+ str(self.mods['model'])# + str(self.optimizer)

View file

@ -0,0 +1,5 @@
digraph {
graph [size="12,12"]
node [align=left fontsize=12 height=0.2 ranksep=0.1 shape=box style=filled]
94296775052080 [label=NoneType fillcolor=darkolivegreen1]
}

View file

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 2.40.1 (20161225.0304)
-->
<!-- Title: %3 Pages: 1 -->
<svg width="75pt" height="30pt"
viewBox="0.00 0.00 74.65 30.40" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 26.4)">
<title>%3</title>
<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-26.4 70.6472,-26.4 70.6472,4 -4,4"/>
<!-- 94296775052080 -->
<g id="node1" class="node">
<title>94296775052080</title>
<polygon fill="#caff70" stroke="#000000" points="66.4717,-22.6036 .1755,-22.6036 .1755,.2036 66.4717,.2036 66.4717,-22.6036"/>
<text text-anchor="middle" x="33.3236" y="-7.6" font-family="Times,serif" font-size="12.00" fill="#000000">NoneType</text>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 937 B

View file

@ -0,0 +1,345 @@
import math
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Optimizable():#nn.Module):
"""
This is the interface for anything that has parameters that need to be
optimized, somewhat like torch.nn.Model but with the right plumbing for
hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
interface which does not give us enough control about the detachments.)
Nominal operation of an Optimizable at the lowest level is as follows:
o = MyOptimizable()
o.initialize()
loop {
o.begin()
o.zero_grad()
loss = compute loss function from parameters
loss.backward()
o.adjust()
}
Optimizables recursively handle updates to their optimiz*ers*.
"""
#def __init__(self):
# super(Optimizable, self).__init__()
# self.parameters = nn.Parameter(torch.zeros(()))
def __init__(self, parameters, optimizer):
#super(Optimizable, self).__init__()
self.parameters = parameters # a dict mapping names to tensors
self.optimizer = optimizer # which must itself be Optimizable!
self.all_params_with_gradients = []
#self.device = device
def initialize(self):
"""Initialize parameters, e.g. with a Kaiming initializer."""
pass
def begin(self):
"""Enable gradient tracking on current parameters."""
self.all_params_with_gradients = [] #Reintialisation pour eviter surcharge de la memoire
for name, param in self.parameters.items():
#for param in self.parameters:
param.requires_grad_() # keep gradient information…
param.retain_grad() # even if not a leaf…
#param.to(self.device)
#if param.device == torch.device('cuda:0'):
# print(name, param.device)
self.all_params_with_gradients.append(param)
self.optimizer.begin()
def zero_grad(self):
""" Set all gradients to zero. """
for param in self.all_params_with_gradients:
#param = param.to(self.device)
param.grad = torch.zeros(param.shape, device=param.device)
self.optimizer.zero_grad()
""" Note: at this point you would probably call .backwards() on the loss
function. """
def adjust(self):
""" Update parameters """
pass
def print_grad_fn(self):
self.optimizer.print_grad_fn()
for n, p in self.parameters.items():
print(n," - ", p.grad_fn)
def param_grad(self):
return self.all_params_with_gradients
def param(self, param_name):
return self.parameters[param_name].item()
class MNIST_FullyConnected(Optimizable):
"""
A fully-connected NN for the MNIST task. This is Optimizable but not itself
an optimizer.
"""
def __init__(self, num_inp, num_hid, num_out, optimizer):
parameters = {
"w1": torch.zeros(num_inp, num_hid).t(),
"b1": torch.zeros(num_hid).t(),
"w2": torch.zeros(num_hid, num_out).t(),
"b2": torch.zeros(num_out).t(),
}
super().__init__(parameters, optimizer)
def initialize(self):
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
self.optimizer.initialize()
def forward(self, x):
"""Compute a prediction."""
x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
x = torch.tanh(x)
x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
x = torch.tanh(x)
x = F.log_softmax(x, dim=1)
return x
def adjust(self):
self.optimizer.adjust(self.parameters)
def __str__(self):
return "mnist / " + str(self.optimizer)
class NoOpOptimizer(Optimizable):#, nn.Module):
"""
NoOpOptimizer sits on top of a stack, and does not affect what lies below.
"""
def __init__(self):
#super(Optimizable, self).__init__()
pass
def initialize(self):
pass
def begin(self):
pass
def zero_grad(self):
pass
def adjust(self, params):
pass
def adjust_val(self, params):
pass
def print_grad_fn(self):
pass
def __str__(self):
return "static"
class Adam(Optimizable):
"""
A fully hyperoptimizable Adam optimizer
"""
def clamp(x):
return (x.tanh() + 1.0) / 2.0
def unclamp(y):
z = y * 2.0 - 1.0
return ((1.0 + z) / (1.0 - z)).log() / 2.0
def __init__(
self,
alpha=0.001,
beta1=0.9,
beta2=0.999,
log_eps=-8.0,
optimizer=NoOpOptimizer(),
device = torch.device('cuda')
):
self.device = device
parameters = {
"alpha": torch.tensor(alpha, device=self.device),
"beta1": Adam.unclamp(torch.tensor(beta1, device=self.device)),
"beta2": Adam.unclamp(torch.tensor(beta2, device=self.device)),
"log_eps": torch.tensor(log_eps, device=self.device),
}
super().__init__(parameters, optimizer)
self.num_adjustments = 0
self.num_adjustments_val = 0
self.cache = {}
for name, param in parameters.items():
param.requires_grad_() # keep gradient information…
param.retain_grad() # even if not a leaf…
#param.to(self.device)
#if param.device == torch.device('cuda:0'):
# print(name, param.device)
def adjust(self, params): #Update param d'apprentissage
self.num_adjustments += 1
self.optimizer.adjust(self.parameters)
#print('Adam update')
t = self.num_adjustments
beta1 = Adam.clamp(self.parameters["beta1"])
beta2 = Adam.clamp(self.parameters["beta2"])
for name, param in params.items():
if name == "mag": continue
if name not in self.cache:
self.cache[name] = {
"m": torch.zeros(param.shape, device=self.device),
"v": torch.zeros(param.shape, device=self.device)
+ 10.0 ** self.parameters["log_eps"].data
# NOTE that we add a little fudge factor' here because sqrt is not
# differentiable at exactly zero
}
#print(name, param.device)
g = param.grad.detach()
self.cache[name]["m"] = m = (
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
)
self.cache[name]["v"] = v = (
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
)
self.all_params_with_gradients.append(m)
self.all_params_with_gradients.append(v)
m_hat = m / (1.0 - beta1 ** float(t))
v_hat = v / (1.0 - beta2 ** float(t))
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
params[name] = param.detach() - self.parameters["alpha"] * dparam
#print(name)
def adjust_val(self, params): #Update param Transformations
self.num_adjustments_val += 1
self.optimizer.adjust_val(self.parameters)
#print('Adam update')
t = self.num_adjustments_val
beta1 = Adam.clamp(self.parameters["beta1"])
beta2 = Adam.clamp(self.parameters["beta2"])
for name, param in params.items():
if name != "mag": continue
if name not in self.cache:
self.cache[name] = {
"m": torch.zeros(param.shape, device=self.device),
"v": torch.zeros(param.shape, device=self.device)
+ 10.0 ** self.parameters["log_eps"].data
# NOTE that we add a little fudge factor' here because sqrt is not
# differentiable at exactly zero
}
#print(name, param.device)
g = param.grad.detach()
self.cache[name]["m"] = m = (
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
)
self.cache[name]["v"] = v = (
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
)
self.all_params_with_gradients.append(m)
self.all_params_with_gradients.append(v)
m_hat = m / (1.0 - beta1 ** float(t))
v_hat = v / (1.0 - beta2 ** float(t))
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
params[name] = param.detach() - self.parameters["alpha"] * dparam
#print(name)
def __str__(self):
return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
'''
class SGD(Optimizable):
"""
A hyperoptimizable SGD
"""
def __init__(self, alpha=0.01, optimizer=NoOpOptimizer()):
parameters = {"alpha": torch.tensor(alpha)}
super().__init__(parameters, optimizer)
def adjust(self, params):
self.optimizer.adjust(self.parameters)
for name, param in params.items():
g = param.grad.detach()
params[name] = param.detach() - g * self.parameters["alpha"]
def __str__(self):
return "sgd(%f) / " % self.parameters["alpha"] + str(self.optimizer)
class SGDPerParam(Optimizable):
"""
Like above, but can be taught a separate step size for each parameter it
tunes.
"""
def __init__(self, alpha=0.01, params=[], optimizer=NoOpOptimizer()):
parameters = {name + "_alpha": torch.tensor(alpha) for name in params}
super().__init__(parameters, optimizer)
def adjust(self, params):
self.optimizer.adjust(self.parameters)
for name, param in params.items():
g = param.grad.detach()
params[name] = param.detach() - g * self.parameters[name + "_alpha"]
def __str__(self):
return "sgd(%s) / " % str(
{k: t.item() for k, t in self.parameters.items()}
) + str(self.optimizer)
'''
'''
class AdamBaydin(Optimizable):
""" Same as above, but only optimizes the learning rate, treating the
remaining hyperparameters as constants. """
def __init__(
self,
alpha=0.001,
beta1=0.9,
beta2=0.999,
log_eps=-8.0,
optimizer=NoOpOptimizer(),
):
parameters = {"alpha": torch.tensor(alpha)}
self.beta1 = beta1
self.beta2 = beta2
self.log_eps = log_eps
super().__init__(parameters, optimizer)
self.num_adjustments = 0
self.cache = {}
def adjust(self, params):
self.num_adjustments += 1
self.optimizer.adjust(self.parameters)
t = self.num_adjustments
beta1 = self.beta1
beta2 = self.beta2
for name, param in params.items():
if name not in self.cache:
self.cache[name] = {
"m": torch.zeros(param.shape),
"v": torch.zeros(param.shape) + 10.0 ** self.log_eps,
}
g = param.grad.detach()
self.cache[name]["m"] = m = (
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
)
self.cache[name]["v"] = v = (
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
)
self.all_params_with_gradients.append(m)
self.all_params_with_gradients.append(v)
m_hat = m / (1.0 - beta1 ** float(t))
v_hat = v / (1.0 - beta2 ** float(t))
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.log_eps)
params[name] = param.detach() - self.parameters["alpha"] * dparam
def __str__(self):
return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
'''

View file

@ -0,0 +1,296 @@
import math
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer
class Optimizable():
"""
This is the interface for anything that has parameters that need to be
optimized, somewhat like torch.nn.Model but with the right plumbing for
hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
interface which does not give us enough control about the detachments.)
Nominal operation of an Optimizable at the lowest level is as follows:
o = MyOptimizable()
o.initialize()
loop {
o.begin()
o.zero_grad()
loss = compute loss function from parameters
loss.backward()
o.adjust()
}
Optimizables recursively handle updates to their optimiz*ers*.
"""
#def __init__(self):
# super(Optimizable, self).__init__()
# self.parameters = nn.Parameter(torch.zeros(()))
def __init__(self, parameters, optimizer):
self.params = parameters # a dict mapping names to tensors
self.optimizer = optimizer # which must itself be Optimizable!
self.all_params_with_gradients = []
#self.device = device
def initialize(self):
"""Initialize parameters, e.g. with a Kaiming initializer."""
pass
def begin(self):
"""Enable gradient tracking on current parameters."""
self.all_params_with_gradients = nn.ParameterList() #Reintialisation pour eviter surcharge de la memoire
print("Opti param :", type(self.params))
#for name, param in self.params:
if isinstance(self.params,dict): #Dict
for name, param in self.params:
param.requires_grad_() # keep gradient information…
param.retain_grad() # even if not a leaf…
self.all_params_with_gradients.append(param)
if isinstance(self.params,list): #List
for param in self.params:
param.requires_grad_() # keep gradient information…
param.retain_grad() # even if not a leaf…
self.all_params_with_gradients.append(param)
self.optimizer.begin()
def zero_grad(self):
""" Set all gradients to zero. """
for param in self.all_params_with_gradients:
param.grad = torch.zeros(param.shape, device=param.device)
self.optimizer.zero_grad()
""" Note: at this point you would probably call .backwards() on the loss
function. """
def adjust(self):
""" Update parameters """
pass
class NoOpOptimizer(Optimizable):#, nn.Module):
"""
NoOpOptimizer sits on top of a stack, and does not affect what lies below.
"""
def __init__(self):
#super(Optimizable, self).__init__()
pass
def initialize(self):
pass
def begin(self):
#print("NoOpt begin")
pass
def zero_grad(self):
pass
def adjust(self, params):
pass
def step(self):
pass
def print_grad_fn(self):
pass
def __str__(self):
return "static"
class SGD(Optimizer, nn.Module): #Eviter Optimizer
"""
A hyperoptimizable SGD
"""
def __init__(self, params, lr=0.01, height=0):
self.height=height
#params : a optimiser
#reste (defaults) param de l'opti
print('SGD - H', height)
nn.Module.__init__(self)
optim_keys = ('lr','') #A mettre dans Optimizable ? #'' pour eviter iteration dans la chaine de charactere...
'''
self_params = {"lr": torch.tensor(lr),
"momentum": 0,
"dampening":0,
"weight_decay":0,
"nesterov": False}
'''
#self_params = dict(lr=torch.tensor(lr),
# momentum=0, dampening=0, weight_decay=0, nesterov=False)
self_params = nn.ParameterDict({
"lr": nn.Parameter(torch.tensor(lr)),
"momentum": nn.Parameter(torch.tensor(0.0)),
"dampening": nn.Parameter(torch.tensor(0.0)),
"weight_decay": nn.Parameter(torch.tensor(0.0)),
})
for k in self_params.keys() & optim_keys:
self_params[k].requires_grad_() # keep gradient information…
self_params[k].retain_grad() # even if not a leaf…
#self_params[k].register_hook(print)
if height==0:
optimizer = NoOpOptimizer()
else:
#def dict_generator(): yield {k: self_params[k] for k in self_params.keys() & optim_keys}
#(dict for dict in {k: self_params[k] for k in self_params.keys() & optim_keys}) #Devrait mar
optimizer = SGD(params=(self_params[k]for k in self_params.keys() & optim_keys), lr=lr, height=height-1)
#optimizer.register_backward_hook(print)
self.optimizer = optimizer
#if(height==0):
# for n,p in params.items():
# print(n,p)
#Optimizable.__init__(self, self_params, optimizer)
#print(type(params))
#for p in params:
# print(type(p))
Optimizer.__init__(self, params, self_params)
for group in self.param_groups:
for p in group['params']:
print(type(p.data), p.size())
print('End SGD-H', height)
def begin(self):
for group in self.param_groups:
for p in group['params']:
#print(type(p.data), p.size())
p.requires_grad_() # keep gradient information…
p.retain_grad() # even if not a leaf…
#p.register_hook(lambda x: print(self.height, x.grad_fn))
self.optimizer.begin()
def print_grad_fn(self):
self.optimizer.print_grad_fn()
for group in self.param_groups:
for i, p in enumerate(group['params']):
print(self.height," - ", i, p.grad_fn)
#def adjust(self, params):
# self.optimizer.adjust(self.params)
# for name, param in params.items():
# g = param.grad.detach()
# params[name] = param.detach() - g * self.params["lr"]
def step(self):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
print('SGD start')
self.optimizer.step()
for group in self.param_groups:
for i, p in enumerate(group['params']):
if p.grad is None:
continue
#d_p = p.grad.data
d_p = p.grad.detach()
#print(group['lr'])
p.data.add_(-group['lr'].item(), d_p)
#group['params'][i] = p.detach() - d_p * group['lr']
p.data-= group['lr']*d_p #Data ne pas utiliser perte info
for p in group['params']:
if p.grad is None:
print(p, p.grad)
continue
print("SGD end")
#return loss
def __str__(self):
return "sgd(%f) / " % self.params["lr"] + str(self.optimizer)
class Adam(Optimizable, nn.Module):
"""
A fully hyperoptimizable Adam optimizer
"""
def clamp(x):
return (x.tanh() + 1.0) / 2.0
def unclamp(y):
z = y * 2.0 - 1.0
return ((1.0 + z) / (1.0 - z)).log() / 2.0
def __init__(
self,
alpha=0.001,
beta1=0.9,
beta2=0.999,
log_eps=-8.0,
optimizer=NoOpOptimizer(),
device = torch.device('cuda')
):
#super(Adam, self).__init__()
nn.Module.__init__(self)
self.device = device
params = nn.ParameterDict({
"alpha": nn.Parameter(torch.tensor(alpha, device=self.device)),
"beta1": nn.Parameter(Adam.unclamp(torch.tensor(beta1, device=self.device))),
"beta2": nn.Parameter(Adam.unclamp(torch.tensor(beta2, device=self.device))),
"log_eps": nn.Parameter(torch.tensor(log_eps, device=self.device)),
})
Optimizable.__init__(self, params, optimizer)
self.num_adjustments = 0
self.cache = {}
for name, param in params.items():
param.requires_grad_() # keep gradient information…
param.retain_grad() # even if not a leaf…
def adjust(self, params, pytorch_mod=False):
self.num_adjustments += 1
self.optimizer.adjust(self.params)
t = self.num_adjustments
beta1 = Adam.clamp(self.params["beta1"])
beta2 = Adam.clamp(self.params["beta2"])
updated_param = []
if pytorch_mod:
params = params.named_parameters(prefix='') #Changer nom d'input...
for name, param in params:
if name not in self.cache:
self.cache[name] = {
"m": torch.zeros(param.shape, device=self.device),
"v": torch.zeros(param.shape, device=self.device)
+ 10.0 ** self.params["log_eps"].data
# NOTE that we add a little fudge factor' here because sqrt is not
# differentiable at exactly zero
}
#print(name, param.device)
g = param.grad.detach()
self.cache[name]["m"] = m = (
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
)
self.cache[name]["v"] = v = (
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
)
self.all_params_with_gradients.append(nn.Parameter(m)) #Risque de surcharger la memoire => Dict mieux ?
self.all_params_with_gradients.append(nn.Parameter(v))
m_hat = m / (1.0 - beta1 ** float(t))
v_hat = v / (1.0 - beta2 ** float(t))
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.params["log_eps"])
updated_param[name] = param.detach() - self.params["alpha"] * dparam
if pytorch_mod: params.update(updated_param) #Changer nom d'input...
else: params = updated_param
def __str__(self):
return "adam(" + str(self.params) + ") / " + str(self.optimizer)

View file

@ -0,0 +1,182 @@
import numpy as np
import json, math, time, os
from hyperopt import *
import gc
BATCH_SIZE = 300
mnist_train = torchvision.datasets.MNIST(
"./data", train=True, download=True, transform=torchvision.transforms.ToTensor()
)
mnist_test = torchvision.datasets.MNIST(
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
)
dl_train = torch.utils.data.DataLoader(
mnist_train, batch_size=BATCH_SIZE, shuffle=False
)
dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=10000, shuffle=False)
def test(model):
for i, (features_, labels_) in enumerate(dl_test):
features, labels = torch.reshape(features_, (10000, 28 * 28)), labels_
pred = model.forward(features)
return pred.argmax(dim=1).eq(labels).sum().item() / 10000 * 100
def train(model, epochs=3, height=1):
stats = []
for epoch in range(epochs):
for i, (features_, labels_) in enumerate(dl_train):
t0 = time.process_time()
model.begin()
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
pred = model.forward(
features
) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
loss = F.nll_loss(pred, labels)
model.zero_grad()
loss.backward(create_graph=True)
model.adjust()
tf = time.process_time()
data = {
"time": tf - t0,
"iter": epoch * len(dl_train) + i,
"loss": loss.item(),
"params": {
k: v.item()
for k, v in model.optimizer.parameters.items()
if "." not in k
},
}
stats.append(data)
return stats
def run(opt, name="out", usr={}, epochs=3, height=1):
torch.manual_seed(0x42)
model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
print("Running...", str(model))
model.initialize()
log = train(model, epochs, height)
acc = test(model)
out = {"acc": acc, "log": log, "usr": usr}
with open("log/%s.json" % name, "w+") as f:
json.dump(out, f, indent=True)
times = [x["time"] for x in log]
print("Times (ms):", np.mean(times), "+/-", np.std(times))
print("Final accuracy:", acc)
return out
def sgd_experiments():
run(SGD(0.01), "sgd", epochs=1)
out = run(SGD(0.01, optimizer=SGD(0.01)), "sgd+sgd", epochs=1)
alpha = out["log"][-1]["params"]["alpha"]
print(alpha)
run(SGD(alpha), "sgd-final", epochs=1)
def adam_experiments():
run(Adam(), "adam", epochs=1)
print()
mo = SGDPerParam(
0.001, ["alpha", "beta1", "beta2", "log_eps"], optimizer=SGD(0.0001)
)
out = run(Adam(optimizer=mo), "adam+sgd", epochs=1)
p = out["log"][-1]["params"]
alpha = p["alpha"]
beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
log_eps = p["log_eps"]
print(alpha, beta1, beta2, log_eps)
print(mo)
run(
Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
"adam+sgd-final",
epochs=1,
)
print()
out = run(Adam(optimizer=Adam()), "adam2", epochs=1)
p = out["log"][-1]["params"]
alpha = p["alpha"]
beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
log_eps = p["log_eps"]
print(alpha, beta1, beta2, log_eps)
run(
Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
"adam2-final",
epochs=1,
)
print()
mo = SGDPerParam(0.001, ["alpha"], optimizer=SGD(0.0001))
out = run(AdamBaydin(optimizer=mo), "adambaydin+sgd", epochs=1)
p = out["log"][-1]["params"]
alpha = p["alpha"]
print(alpha)
print(mo)
run(Adam(alpha=p["alpha"]), "adambaydin+sgd-final", epochs=1)
print()
out = run(AdamBaydin(optimizer=Adam()), "adambaydin2", epochs=1)
p = out["log"][-1]["params"]
alpha = p["alpha"]
print(alpha)
run(Adam(alpha=p["alpha"]), "adambaydin2-final", epochs=1)
def surface():
run(SGD(10 ** -3, optimizer=SGD(10 ** -1)), "tst", epochs=1)
for log_alpha in np.linspace(-3, 2, 10):
run(SGD(10 ** log_alpha), "sgd@1e%+.2f" % log_alpha, epochs=1)
def make_sgd_stack(height, top):
if height == 0:
return SGD(alpha=top)
return SGD(alpha=top, optimizer=make_sgd_stack(height - 1, top))
def make_adam_stack(height, top=0.0000001):
if height == 0:
return Adam(alpha=top)
return Adam(alpha=top, optimizer=make_adam_stack(height - 1))
def stack_test():
for top in np.linspace(-7, 3, 20):
for height in range(6):
print("height =", height, "to p=", top)
opt = make_sgd_stack(height, 10 ** top)
run(
opt,
"metasgd3-%d@%+.2f" % (height, top),
{"height": height, "top": top},
epochs=1,
height=height,
)
gc.collect()
def perf_test():
for h in range(51):
print("height:", h)
# opt = make_sgd_stack(h, 0.01)
opt = make_adam_stack(h)
run(opt, "adamperf-%d" % h, {"height": h}, epochs=1)
gc.collect()
if __name__ == "__main__":
try:
os.mkdir("log")
except:
print("log/ exists already")
surface()
sgd_experiments()
adam_experiments()
stack_test()
perf_test()

View file

@ -0,0 +1,5 @@
numpy==1.17.2
Pillow==6.2.0
six==1.12.0
torch==1.2.0
torchvision==0.4.0

View file

@ -0,0 +1,344 @@
import numpy as np
import json, math, time, os
from data_aug import *
#from data_aug_v2 import *
import gc
import matplotlib.pyplot as plt
from torchviz import make_dot, make_dot_from_trace
from torch.utils.data import SubsetRandomSampler
BATCH_SIZE = 300
#TEST_SIZE = 10000
TEST_SIZE = 300
DATA_LIMIT = 10
'''
data_train = torchvision.datasets.MNIST(
"./data", train=True, download=True,
transform=torchvision.transforms.Compose([
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
torchvision.transforms.ToTensor()
])
)
data_test = torchvision.datasets.MNIST(
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
)
'''
data_train = torchvision.datasets.CIFAR10(
"./data", train=True, download=True,
transform=torchvision.transforms.Compose([
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
torchvision.transforms.ToTensor()
])
)
data_test = torchvision.datasets.CIFAR10(
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
)
train_subset_indices=range(int(len(data_train)/2))
val_subset_indices=range(int(len(data_train)/2),len(data_train))
dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False)
def test(model, reshape_in=True, device = torch.device('cuda')):
for i, (features_, labels_) in enumerate(dl_test):
if reshape_in :
features, labels = torch.reshape(features_, (TEST_SIZE, 28 * 28)), labels_
else:
features, labels =features_, labels_
features, labels = features.to(device), labels.to(device)
pred = model.forward(features)
return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
def train_one_epoch(model, optimizer, epoch=0, reshape_in=True, device = torch.device('cuda'), train_data=True):
if train_data: dl = dl_train
else: dl = dl_val
for i, (features_, labels_) in enumerate(dl):
if i > DATA_LIMIT : break
#t0 = time.process_time()
if reshape_in :
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
else:
features, labels =features_, labels_
features, labels = features.to(device), labels.to(device)
#optimizer.begin()
#optimizer.zero_grad()
model.begin()
model.zero_grad()
pred = model.forward(features)
#loss = F.nll_loss(pred, labels)
loss = F.cross_entropy(pred,labels)
#model.print_grad_fn()
#optimizer.print_grad_fn()
#print('-'*50)
loss.backward(create_graph=True)
#optimizer.step()
if train_data: model.adjust()
else: model.adjust_val()
#tf = time.process_time()
#data = {
# "time": tf - t0,
# "iter": epoch * len(dl_train) + i,
# "loss": loss.item(),
# "params": {
# k: v.item()
# for k, v in model.optimizer.parameters.items()
# if "." not in k
# },
#}
#stats.append(data)
#print_torch_mem(i)
return loss.item()
def train_v2(model, optimizer, epochs=3, reshape_in=True, device = torch.device('cuda')):
log = []
for epoch in range(epochs):
#dl_train.dataset.transform=torchvision.transforms.Compose([
# torchvision.transforms.RandomAffine(degrees=model.param('mag'), translate=None, scale=None, shear=None, resample=False, fillcolor=0),
# torchvision.transforms.ToTensor()
#])
viz_data(fig_name='res/data_sample')
t0 = time.process_time()
loss = train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device)
train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device,train_data=False)
#acc = test(model=model, reshape_in=reshape_in, device=device)
acc = 0
tf = time.process_time()
data = {
"time": tf - t0,
"epoch": epoch,
"loss": loss,
"acc": acc,
"params": {
k: v.item()
for k, v in model.optimizer.parameters.items()
#for k, v in model.mods.data_aug.params.named_parameters()
if "." not in k
},
}
log.append(data)
print("Epoch :",epoch+1, "/",epochs, "- Loss :",log[-1]["loss"])
param = [p for p in model.param_grad() if p.grad is not None]
if(len(param)!=0):
print(param[-2],' / ', param[-2].grad)
print(param[-1],' / ', param[-1].grad)
return log
def train(model, epochs=3, height=1, reshape_in=True, device = torch.device('cuda')):
stats = []
for epoch in range(epochs):
for i, (features_, labels_) in enumerate(dl_train):
t0 = time.process_time()
model.begin()
if reshape_in :
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
else:
features, labels =features_, labels_
features, labels = features.to(device), labels.to(device)
pred = model.forward(
features
) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
#loss = F.nll_loss(pred, labels)
loss = F.cross_entropy(pred,labels)
#print('-'*50)
#param = [p for p in model.param_grad() if p.grad is not None]
#if(len(param)!=0):
# print(param[-2],' / ', param[-2].grad)
# print(param[-1],' / ', param[-1].grad)
model.zero_grad()
loss.backward(create_graph=True)
model.adjust()
tf = time.process_time()
data = {
"time": tf - t0,
"iter": epoch * len(dl_train) + i,
"loss": loss.item(),
"params": {
k: v.item()
for k, v in model.optimizer.parameters.items()
if "." not in k
},
}
stats.append(data)
print('-'*50)
i=0
for obj in gc.get_objects():
try:
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)) and len(obj.size())>1:
print(i, type(obj), obj.size())
i+=1
except:
pass
print("Epoch :",epoch+1, "/",epochs, "- Loss :",stats[-1]["loss"])
param = [p for p in model.param_grad() if p.grad is not None]
if(len(param)!=0):
print(param[-2],' / ', param[-2].grad)
print(param[-1],' / ', param[-1].grad)
return stats
def run(opt, name="out", usr={}, epochs=10, height=1, cnn=True, device = torch.device('cuda')):
torch.manual_seed(0x42)
if not cnn:
reshape_in = True
#model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
model = MNIST_FullyConnected_Augmented(28 * 28, 128, 10, opt, device=device)
else:
reshape_in = False
#model = LeNet(1, 10,opt, device)
#model = LeNet_v2(1, 10,opt, device).to(device=device)
model = LeNet_v2(3, 10,opt, device).to(device=device)
optimizer=None
'''
m = LeNet_v3(1, 10)
a = Data_aug()
model = Augmented_model(model=m,
data_augmenter=a,
optimizer=opt).to(device) #deux fois le meme optimizer ?...
'''
'''
m = LeNet_v3(1, 10)
a = Data_aug()
model = Augmented_model(model=m, data_augmenter=a).to(device)
#optimizer = SGD(model.parameters())
optimizer = SGD(model.parameters(), lr=0.01, height=1)
'''
#for idx, m in enumerate(model.modules()):
# print(idx, '->', m)
print("Running...", str(model))
model.initialize()
#print_model(model)
#model.data_augmentation(False)
#model.eval()
log = train_v2(model=model, optimizer=optimizer, epochs=epochs, reshape_in=reshape_in, device=device)
model.eval()
acc = test(model, reshape_in, device=device)
#param = [p for p in model.param_grad() if p.grad is not None]
#if(len(param)!=0):
# print(param[-2],' / ', param[-2].grad)
# print(param[-1],' / ', param[-1].grad)
out = {"acc": acc, "log": log, "usr": usr}
with open("log/%s.json" % name, "w+") as f:
json.dump(out, f, indent=True)
times = [x["time"] for x in log]
print("Times (ms):", np.mean(times), "+/-", np.std(times))
print("Final accuracy:", acc)
#plot_res(log, fig_name='res/'+name)
return out
def make_adam_stack(height, top=0.0000001, device = torch.device('cuda')):
#print(height,device)
if height == 0:
return Adam(alpha=top, device=device)
return Adam(alpha=top, optimizer=make_adam_stack(height - 1, top, device=device), device=device)
def plot_res(log, fig_name='res'):
fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
ax[0].set_title('Loss')
ax[0].plot([x["loss"] for x in log])
ax[1].set_title('Acc')
ax[1].plot([x["acc"] for x in log])
ax[2].set_title('mag')
ax[2].plot([x["data_aug"] for x in log])
plt.savefig(fig_name)
def print_torch_mem(add_info=''):
nb=0
max_size=0
for obj in gc.get_objects():
try:
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
#print(i, type(obj), obj.size())
size = np.sum(obj.size())
if(size>max_size): max_size=size
nb+=1
except:
pass
print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
def print_model(model, fig_name='graph/graph'): #Semble ne pas marcher pour les models en fonctionnel
x = torch.randn(1,1,28,28, device=device)
dot=make_dot(model(x), params=dict(model.named_parameters()))
dot.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
dot.render(fig_name)
print("Model graph generated !")
def viz_data(fig_name='data_sample'):
features_, labels_ = next(iter(dl_train))
plt.figure(figsize=(10,10))
#for i, (features_, labels_) in enumerate(dl_train):
for i in range(25):
if i==25: break
#print(features_.size(), labels_.size())
plt.subplot(5,5,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
img = features_[i,0,:,:]
#print('im shape',img.shape)
plt.imshow(img, cmap=plt.cm.binary)
plt.xlabel(labels_[i].item())
plt.savefig(fig_name)
##########################################
if __name__ == "__main__":
try:
os.mkdir("log")
except:
print("log/ exists already")
device = torch.device('cuda')
run(make_adam_stack(height=1, top=0.001, device=device),
"Augmented_MNIST",
epochs=100,
cnn=True,
device = device)
print()

583
higher/dataug.py Normal file
View file

@ -0,0 +1,583 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import *
#import kornia
#import random
#import numpy as np
import copy
import transformations as TF
class Data_aug(nn.Module): #Rotation parametree
def __init__(self):
super(Data_aug, self).__init__()
self._data_augmentation = True
self._params = nn.ParameterDict({
"prob": nn.Parameter(torch.tensor(0.5)),
"mag": nn.Parameter(torch.tensor(1.0))
})
#self.params["mag"].register_hook(print)
def forward(self, x):
if self._data_augmentation and random.random() < self._params["prob"]:
#print('Aug')
batch_size = x.shape[0]
# create transformation (rotation)
alpha = self._params["mag"]*180 # in degrees
angle = torch.ones(batch_size, device=x.device) * alpha
# define the rotation center
center = torch.ones(batch_size, 2, device=x.device)
center[..., 0] = x.shape[3] / 2 # x
center[..., 1] = x.shape[2] / 2 # y
#print(x.shape, center)
# define the scale factor
scale = torch.ones(batch_size, device=x.device)
# compute the transformation matrix
M = kornia.get_rotation_matrix2d(center, angle, scale)
# apply the transformation to original image
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
return x
def eval(self):
self.augment(mode=False)
nn.Module.eval(self)
def augment(self, mode=True):
self._data_augmentation=mode
def __getitem__(self, key):
return self._params[key]
def __str__(self):
return "Data_aug(Mag-1 TF)"
class Data_augV2(nn.Module): #Methode exacte
def __init__(self):
super(Data_augV2, self).__init__()
self._data_augmentation = True
self._fixed_transf=[0.0, 45.0, 180.0] #Degree rotation
#self._fixed_transf=[0.0]
self._nb_tf= len(self._fixed_transf)
self._params = nn.ParameterDict({
"prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme
#"prob2": nn.Parameter(torch.ones(len(self._fixed_transf)).softmax(dim=0))
})
#print(self._params["prob"], self._params["prob2"])
self.transf_idx=0
def forward(self, x):
if self._data_augmentation:
#print('Aug',self._fixed_transf[self.transf_idx])
device = x.device
batch_size = x.shape[0]
# create transformation (rotation)
#alpha = 180 # in degrees
alpha = self._fixed_transf[self.transf_idx]
angle = torch.ones(batch_size, device=device) * alpha
x = self.rotate(x,angle)
return x
def rotate(self, x, angle):
device = x.device
batch_size = x.shape[0]
# define the rotation center
center = torch.ones(batch_size, 2, device=device)
center[..., 0] = x.shape[3] / 2 # x
center[..., 1] = x.shape[2] / 2 # y
#print(x.shape, center)
# define the scale factor
scale = torch.ones(batch_size, device=device)
# compute the transformation matrix
M = kornia.get_rotation_matrix2d(center, angle, scale)
# apply the transformation to original image
return kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
def adjust_prob(self): #Detach from gradient ?
self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0)
#print('proba',self._params['prob'])
self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
#print('Sum p', sum(self._params['prob']))
def eval(self):
self.augment(mode=False)
nn.Module.eval(self)
def augment(self, mode=True):
self._data_augmentation=mode
def __getitem__(self, key):
return self._params[key]
def __str__(self):
return "Data_augV2(Exact-%d TF)" % self._nb_tf
class Data_augV3(nn.Module): #Echantillonage uniforme/Mixte
def __init__(self, mix_dist=0.0):
super(Data_augV3, self).__init__()
self._data_augmentation = True
#self._fixed_transf=[0.0, 45.0, 180.0] #Degree rotation
self._fixed_transf=[0.0, 1.0, -1.0] #Flips (Identity,Horizontal,Vertical)
#self._fixed_transf=[0.0]
self._nb_tf= len(self._fixed_transf)
self._params = nn.ParameterDict({
"prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme
#"prob2": nn.Parameter(torch.ones(len(self._fixed_transf)).softmax(dim=0))
})
#print(self._params["prob"], self._params["prob2"])
self._sample = []
self._mix_dist = False
if mix_dist != 0.0:
self._mix_dist = True
self._mix_factor = max(min(mix_dist, 1.0), 0.0)
def forward(self, x):
if self._data_augmentation:
device = x.device
batch_size = x.shape[0]
#good_distrib = Uniform(low=torch.zeros(batch_size,1, device=device),high=torch.new_full((batch_size,1),self._params["prob"], device=device))
#bad_distrib = Uniform(low=torch.zeros(batch_size,1, device=device),high=torch.new_full((batch_size,1), 1-self._params["prob"], device=device))
#transform_dist = Categorical(probs=torch.tensor([self._params["prob"], 1-self._params["prob"]], device=device))
#self._sample = transform_dist._sample(sample_shape=torch.Size([batch_size,1]))
uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=0)
if not self._mix_dist:
distrib = uniforme_dist
else:
distrib = (self._mix_factor*self._params["prob"]+(1-self._mix_factor)*uniforme_dist).softmax(dim=0) #Mix distrib reel / uniforme avec mix_factor
cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*distrib)
self._sample = cat_distrib.sample()
TF_param = torch.tensor([self._fixed_transf[x] for x in self._sample], device=device) #Approche de marco peut-etre plus rapide
#x = self.rotate(x,angle=TF_param)
x = self.flip(x,flip_mat=TF_param)
return x
def rotate(self, x, angle):
device = x.device
batch_size = x.shape[0]
# define the rotation center
center = torch.ones(batch_size, 2, device=device)
center[..., 0] = x.shape[3] / 2 # x
center[..., 1] = x.shape[2] / 2 # y
#print(x.shape, center)
# define the scale factor
scale = torch.ones(batch_size, device=device)
# compute the transformation matrix
M = kornia.get_rotation_matrix2d(center, angle, scale)
# apply the transformation to original image
return kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
def flip(self, x, flip_mat):
#print(flip_mat)
device = x.device
batch_size = x.shape[0]
h, w = x.shape[2], x.shape[3] # destination size
#points_src = torch.ones(batch_size, 4, 2, device=device)
#points_dst = torch.ones(batch_size, 4, 2, device=device)
#Identity
iM=torch.tensor(np.eye(3))
#Horizontal flip
# the source points are the region to crop corners
#points_src = torch.FloatTensor([[
# [w - 1, 0], [0, 0], [0, h - 1], [w - 1, h - 1],
#]])
# the destination points are the image vertexes
#points_dst = torch.FloatTensor([[
# [0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1],
#]])
# compute perspective transform
#hM = kornia.get_perspective_transform(points_src, points_dst)
hM =torch.tensor( [[[-1., 0., w-1],
[ 0., 1., 0.],
[ 0., 0., 1.]]])
#Vertical flip
# the source points are the region to crop corners
#points_src = torch.FloatTensor([[
# [0, h - 1], [w - 1, h - 1], [w - 1, 0], [0, 0],
#]])
# the destination points are the image vertexes
#points_dst = torch.FloatTensor([[
# [0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1],
#]])
# compute perspective transform
#vM = kornia.get_perspective_transform(points_src, points_dst)
vM =torch.tensor( [[[ 1., 0., 0.],
[ 0., -1., h-1],
[ 0., 0., 1.]]])
#print(vM)
M=torch.ones(batch_size, 3, 3, device=device)
for i in range(batch_size): # A optimiser
if flip_mat[i]==0.0:
M[i,]=iM
elif flip_mat[i]==1.0:
M[i,]=hM
elif flip_mat[i]==-1.0:
M[i,]=vM
# warp the original image by the found transform
return kornia.warp_perspective(x, M, dsize=(h, w))
def adjust_prob(self, soft=False): #Detach from gradient ?
if soft :
self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible
else:
#self._params['prob'].clamp(min=0.0,max=1.0)
self._params['prob'].data = F.relu(self._params['prob'].data)
#self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0)
#print('proba',self._params['prob'])
self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
#print('Sum p', sum(self._params['prob']))
def loss_weight(self):
#w_loss = [self._params["prob"][x] for x in self._sample]
#print(self._sample.view(-1,1).shape)
#print(self._sample[:10])
w_loss = torch.zeros((self._sample.shape[0],self._nb_tf), device=self._sample.device)
w_loss.scatter_(1, self._sample.view(-1,1), 1)
#print(w_loss.shape)
#print(w_loss[:10,:])
w_loss = w_loss * self._params["prob"]
#print(w_loss.shape)
#print(w_loss[:10,:])
w_loss = torch.sum(w_loss,dim=1)
#print(w_loss.shape)
#print(w_loss[:10])
return w_loss
def train(self, mode=None):
if mode is None :
mode=self._data_augmentation
self.augment(mode=mode) #Inutile si mode=None
super(Data_augV3, self).train(mode)
def eval(self):
self.train(mode=False)
#super(Augmented_model, self).eval()
def augment(self, mode=True):
self._data_augmentation=mode
def __getitem__(self, key):
return self._params[key]
def __str__(self):
if not self._mix_dist:
return "Data_augV3(Uniform-%d TF)" % self._nb_tf
else:
return "Data_augV3(Mix %.1f-%d TF)" % (self._mix_factor, self._nb_tf)
class Data_augV4(nn.Module): #Transformations avec mask
def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mix_dist=0.0):
super(Data_augV4, self).__init__()
self._data_augmentation = True
#self._TF_matrix={}
#self._input_info={'h':0, 'w':0, 'device':None} #Input associe a TF_matrix
'''
self._mag_fct={ #f(mag_normalise)=mag_reelle
## Geometric TF ##
'Identity' : (lambda mag: None),
'FlipUD' : (lambda mag: None),
'FlipLR' : (lambda mag: None),
'Rotate': (lambda mag: random.randint(-int_parameter(mag, maxval=30), int_parameter(mag, maxval=30))),
'TranslateX': (lambda mag: [random.randint(-int_parameter(mag, maxval=20), int_parameter(mag, maxval=20)), 0]),
'TranslateY': (lambda mag: [0, random.randint(-int_parameter(mag, maxval=20), int_parameter(mag, maxval=20))]),
'ShearX': (lambda mag: [random.uniform(-float_parameter(mag, maxval=0.3), float_parameter(mag, maxval=0.3)), 0]),
'ShearY': (lambda mag: [0, random.uniform(-float_parameter(mag, maxval=0.3), float_parameter(mag, maxval=0.3))]),
## Color TF (Expect image in the range of [0, 1]) ##
'Contrast': (lambda mag: random.uniform(0.1, float_parameter(mag, maxval=1.9))),
'Color':(lambda mag: random.uniform(0.1, float_parameter(mag, maxval=1.9))),
'Brightness':(lambda mag: random.uniform(1., float_parameter(mag, maxval=1.9))),
'Sharpness':(lambda mag: random.uniform(0.1, float_parameter(mag, maxval=1.9))),
'Posterize': (lambda mag: random.randint(4, int_parameter(mag, maxval=8))),
'Solarize': (lambda mag: random.randint(1, int_parameter(mag, maxval=256))/256.), #=>Image entre [0,1] #Pas opti pour des batch
#Non fonctionnel
'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
#'Equalize': (lambda mag: None),
}
'''
self._mag_fct = TF_dict
self._TF=list(self._mag_fct.keys())
self._nb_tf= len(self._TF)
self._fixed_mag=5 #[0, PARAMETER_MAX]
self._params = nn.ParameterDict({
"prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme
})
self._sample = []
self._mix_dist = False
if mix_dist != 0.0:
self._mix_dist = True
self._mix_factor = max(min(mix_dist, 1.0), 0.0)
def forward(self, x):
if self._data_augmentation:
device = x.device
batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
## Echantillonage ##
uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=1)
if not self._mix_dist:
self._distrib = uniforme_dist
else:
self._distrib = (self._mix_factor*self._params["prob"]+(1-self._mix_factor)*uniforme_dist).softmax(dim=1) #Mix distrib reel / uniforme avec mix_factor
print(self.distrib.shape)
cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*self._distrib)
self._sample = cat_distrib.sample()
## Transformations ##
#'''
x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
smps_x=[]
masks=[]
for tf_idx in range(self._nb_tf):
mask = self._sample==tf_idx #Create selection mask
smp_x = x[mask] #torch.masked_select() ?
if smp_x.shape[0]!=0: #if there's data to TF
magnitude=self._fixed_mag
tf=self._TF[tf_idx]
## Geometric TF ##
if tf=='Identity':
pass
elif tf=='FlipLR':
smp_x = TF.flipLR(smp_x)
elif tf=='FlipUD':
smp_x = TF.flipUD(smp_x)
elif tf=='Rotate':
smp_x = TF.rotate(smp_x, angle=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
elif tf=='TranslateX' or tf=='TranslateY':
smp_x = TF.translate(smp_x, translation=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
elif tf=='ShearX' or tf=='ShearY' :
smp_x = TF.shear(smp_x, shear=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
## Color TF (Expect image in the range of [0, 1]) ##
elif tf=='Contrast':
smp_x = TF.contrast(smp_x, contrast_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
elif tf=='Color':
smp_x = TF.color(smp_x, color_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
elif tf=='Brightness':
smp_x = TF.brightness(smp_x, brightness_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
elif tf=='Sharpness':
smp_x = TF.sharpeness(smp_x, sharpness_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
elif tf=='Posterize':
smp_x = TF.posterize(smp_x, bits=torch.tensor([1 for _ in smp_x], device=device))
elif tf=='Solarize':
smp_x = TF.solarize(smp_x, thresholds=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
elif tf=='Equalize':
smp_x = TF.equalize(smp_x)
elif tf=='Auto_Contrast':
smp_x = TF.auto_contrast(smp_x)
else:
raise Exception("Invalid TF requested : ", tf)
x[mask]=smp_x # Refusionner eviter x[mask] : in place
#idx= mask.nonzero()
#print('-'*8)
#print(idx[0], tf_idx)
#print(smp_x[0,])
#x=x.view(-1,3*32*32)
#x=x.scatter(dim=0, index=idx, src=smp_x.view(-1,3*32*32)) #Changement des Tensor mais pas visible sur la visualisation...
#x=x.view(-1,3,32,32)
#print(x[0,])
'''
if len(self._TF_matrix)==0 or self._input_info['h']!=h or self._input_info['w']!=w or self._input_info['device']!=device: #Device different:Pas necessaire de tout recalculer
self.compute_TF_matrix(sample_info={'h': x.shape[2],
'w': x.shape[3],
'device': x.device})
TF_matrix = torch.zeros(batch_size, 3, 3, device=device) #All geom TF
for tf_idx in range(self._nb_tf):
mask = self._sample==tf_idx #Create selection mask
TF_matrix[mask,]=self._TF_matrix[self._TF[tf_idx]]
x=kornia.warp_perspective(x, TF_matrix, dsize=(h, w))
'''
return x
'''
def compute_TF_matrix(self, magnitude=None, sample_info= None):
print('Computing TF_matrix...')
if not magnitude :
magnitude=self._fixed_mag
if sample_info:
self._input_info['h']= sample_info['h']
self._input_info['w']= sample_info['w']
self._input_info['device'] = sample_info['device']
h, w, device= self._input_info['h'], self._input_info['w'], self._input_info['device']
self._TF_matrix={}
for tf in self._TF :
if tf=='Id':
self._TF_matrix[tf]=torch.tensor([[[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]]], device=device)
elif tf=='Rot':
center = torch.ones(1, 2, device=device)
center[0, 0] = w / 2 # x
center[0, 1] = h / 2 # y
scale = torch.ones(1, device=device)
angle = self._mag_fct[tf](magnitude) * torch.ones(1, device=device)
R = kornia.get_rotation_matrix2d(center, angle, scale) #Rotation matrix (1,2,3)
self._TF_matrix[tf]=torch.cat((R,torch.tensor([[[ 0., 0., 1.]]], device=device)), dim=1) #TF matrix (1,3,3)
elif tf=='FlipLR':
self._TF_matrix[tf]=torch.tensor([[[-1., 0., w-1],
[ 0., 1., 0.],
[ 0., 0., 1.]]], device=device)
elif tf=='FlipUD':
self._TF_matrix[tf]=torch.tensor([[[ 1., 0., 0.],
[ 0., -1., h-1],
[ 0., 0., 1.]]], device=device)
else:
raise Exception("Invalid TF requested")
'''
def adjust_prob(self, soft=False): #Detach from gradient ?
if soft :
self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible
else:
#self._params['prob'].clamp(min=0.0,max=1.0)
self._params['prob'].data = F.relu(self._params['prob'].data)
#self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0)
self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
def loss_weight(self):
w_loss = torch.zeros((self._sample.shape[0],self._nb_tf), device=self._sample.device)
w_loss.scatter_(1, self._sample.view(-1,1), 1)
w_loss = w_loss * self._params["prob"]/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss)
w_loss = torch.sum(w_loss,dim=1)
return w_loss
def train(self, mode=None):
if mode is None :
mode=self._data_augmentation
self.augment(mode=mode) #Inutile si mode=None
super(Data_augV4, self).train(mode)
def eval(self):
self.train(mode=False)
def augment(self, mode=True):
self._data_augmentation=mode
def __getitem__(self, key):
return self._params[key]
def __str__(self):
if not self._mix_dist:
return "Data_augV4(Uniform-%d TF)" % self._nb_tf
else:
return "Data_augV4(Mix %.1f-%d TF)" % (self._mix_factor, self._nb_tf)
class Augmented_model(nn.Module):
def __init__(self, data_augmenter, model):
super(Augmented_model, self).__init__()
self._mods = nn.ModuleDict({
'data_aug': data_augmenter,
'model': model
})
self.augment(mode=True)
def initialize(self):
self._mods['model'].initialize()
def forward(self, x):
return self._mods['model'](self._mods['data_aug'](x))
def augment(self, mode=True):
self._data_augmentation=mode
self._mods['data_aug'].augment(mode)
def train(self, mode=None):
if mode is None :
mode=self._data_augmentation
self._mods['data_aug'].augment(mode)
super(Augmented_model, self).train(mode)
def eval(self):
self.train(mode=False)
#super(Augmented_model, self).eval()
def items(self):
"""Return an iterable of the ModuleDict key/value pairs.
"""
return self._mods.items()
def update(self, modules):
self._mods.update(modules)
def is_augmenting(self):
return self._data_augmentation
def TF_names(self):
try:
return self._mods['data_aug']._TF
except:
return None
def __getitem__(self, key):
return self._mods[key]
def __str__(self):
return "Aug_mod("+str(self._mods['data_aug'])+"-"+str(self._mods['model'])+")"

51
higher/model.py Normal file
View file

@ -0,0 +1,51 @@
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
class LeNet(nn.Module):
def __init__(self, num_inp, num_out):
super(LeNet, self).__init__()
self._params = nn.ParameterDict({
'w1': nn.Parameter(torch.zeros(20, num_inp, 5, 5)),
'b1': nn.Parameter(torch.zeros(20)),
'w2': nn.Parameter(torch.zeros(50, 20, 5, 5)),
'b2': nn.Parameter(torch.zeros(50)),
#'w3': nn.Parameter(torch.zeros(500,4*4*50)), #num_imp=1
'w3': nn.Parameter(torch.zeros(500,5*5*50)), #num_imp=3
'b3': nn.Parameter(torch.zeros(500)),
'w4': nn.Parameter(torch.zeros(num_out, 500)),
'b4': nn.Parameter(torch.zeros(num_out))
})
self.initialize()
def initialize(self):
nn.init.kaiming_uniform_(self._params["w1"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self._params["w2"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self._params["w3"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self._params["w4"], a=math.sqrt(5))
def forward(self, x):
#print("Start Shape ", x.shape)
out = F.relu(F.conv2d(input=x, weight=self._params["w1"], bias=self._params["b1"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = F.relu(F.conv2d(input=out, weight=self._params["w2"], bias=self._params["b2"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = out.view(out.size(0), -1)
#print("Shape ", out.shape)
out = F.relu(F.linear(out, self._params["w3"], self._params["b3"]))
#print("Shape ", out.shape)
out = F.linear(out, self._params["w4"], self._params["b4"])
#print("Shape ", out.shape)
return F.log_softmax(out, dim=1)
def __getitem__(self, key):
return self._params[key]
def __str__(self):
return "LeNet"

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

764
higher/test_dataug.py Normal file
View file

@ -0,0 +1,764 @@
from torch.utils.data import SubsetRandomSampler
import torch.optim as optim
import torchvision
import higher
from model import *
from dataug import *
from utils import *
BATCH_SIZE = 300
#TEST_SIZE = 300
TEST_SIZE = 10000
#ATTENTION : Dataug (Kornia) Expect image in the range of [0, 1]
transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
#torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #CIFAR10
])
'''
data_train = torchvision.datasets.MNIST(
"./data", train=True, download=True,
transform=torchvision.transforms.Compose([
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
torchvision.transforms.ToTensor()
])
)
data_test = torchvision.datasets.MNIST(
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
)
'''
data_train = torchvision.datasets.CIFAR10(
"./data", train=True, download=True, transform=transform
)
data_test = torchvision.datasets.CIFAR10(
"./data", train=False, download=True, transform=transform
)
#'''
train_subset_indices=range(int(len(data_train)/2))
#train_subset_indices=range(BATCH_SIZE*10)
val_subset_indices=range(int(len(data_train)/2),len(data_train))
dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False)
device = torch.device('cuda')
if device == torch.device('cpu'):
device_name = 'CPU'
else:
device_name = torch.cuda.get_device_name(device)
def test(model):
model.eval()
for i, (features, labels) in enumerate(dl_test):
features,labels = features.to(device), labels.to(device)
pred = model.forward(features)
return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
def compute_vaLoss(model, dl_val_it):
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val)
xs_val, ys_val = next(dl_val_it)
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
try:
model.augment(mode=False) #Validation sans transfornations !
except:
pass
return F.cross_entropy(model(xs_val), ys_val)
def train_classic(model, epochs=1):
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
model.train()
dl_val_it = iter(dl_val)
log = []
for epoch in range(epochs):
print_torch_mem("Start epoch")
t0 = time.process_time()
for i, (features, labels) in enumerate(dl_train):
#print_torch_mem("Start iter")
features,labels = features.to(device), labels.to(device)
optim.zero_grad()
pred = model.forward(features)
loss = F.cross_entropy(pred,labels)
loss.backward()
optim.step()
#### Tests ####
tf = time.process_time()
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val)
xs_val, ys_val = next(dl_val_it)
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
val_loss = F.cross_entropy(model(xs_val), ys_val)
accuracy=test(model)
model.train()
#### Log ####
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": None,
}
log.append(data)
return log
def train_classic_higher(model, epochs=1):
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
model.train()
dl_val_it = iter(dl_val)
log = []
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(optim, model.parameters(),fmodel=fmodel,track_higher_grads=False)
#with higher.innerloop_ctx(model, optim, copy_initial_weights=True, track_higher_grads=False) as (fmodel, diffopt):
for epoch in range(epochs):
print_torch_mem("Start epoch "+str(epoch))
print("Fast param ",len(fmodel._fast_params))
t0 = time.process_time()
for i, (features, labels) in enumerate(dl_train):
#print_torch_mem("Start iter")
features,labels = features.to(device), labels.to(device)
#optim.zero_grad()
pred = fmodel.forward(features)
loss = F.cross_entropy(pred,labels)
#.backward()
#optim.step()
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
model_copy(src=fmodel, dst=model, patch_copy=False)
optim_copy(dopt=diffopt, opt=optim)
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(optim, model.parameters(),fmodel=fmodel,track_higher_grads=False)
#### Tests ####
tf = time.process_time()
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val)
xs_val, ys_val = next(dl_val_it)
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
val_loss = F.cross_entropy(model(xs_val), ys_val)
accuracy=test(model)
model.train()
#### Log ####
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": None,
}
log.append(data)
return log
def train_classic_tests(model, epochs=1):
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
countcopy=0
model.train()
dl_val_it = iter(dl_val)
log = []
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
doptim = higher.optim.get_diff_optim(optim, model.parameters(), fmodel=fmodel, track_higher_grads=False)
for epoch in range(epochs):
print_torch_mem("Start epoch")
print(len(fmodel._fast_params))
t0 = time.process_time()
#with higher.innerloop_ctx(model, optim, copy_initial_weights=True, track_higher_grads=True) as (fmodel, doptim):
#fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
#doptim = higher.optim.get_diff_optim(optim, model.parameters(), track_higher_grads=True)
for i, (features, labels) in enumerate(dl_train):
features,labels = features.to(device), labels.to(device)
#with higher.innerloop_ctx(model, optim, copy_initial_weights=True, track_higher_grads=False) as (fmodel, doptim):
#optim.zero_grad()
pred = fmodel.forward(features)
loss = F.cross_entropy(pred,labels)
doptim.step(loss) #(opt.zero_grad, loss.backward, opt.step)
#loss.backward()
#new_params = doptim.step(loss, params=fmodel.parameters())
#fmodel.update_params(new_params)
#print('Fast param',len(fmodel._fast_params))
#print('opt state', type(doptim.state[0][0]['momentum_buffer']), doptim.state[0][2]['momentum_buffer'].shape)
if False or (len(fmodel._fast_params)>1):
print("fmodel fast param",len(fmodel._fast_params))
'''
#val_loss = F.cross_entropy(fmodel(features), labels)
#print_graph(val_loss)
#val_loss.backward()
#print('bip')
tmp = fmodel.parameters()
#print(list(tmp)[1])
tmp = [higher.utils._copy_tensor(t,safe_copy=True) if isinstance(t, torch.Tensor) else t for t in tmp]
#print(len(tmp))
#fmodel._fast_params.clear()
del fmodel._fast_params
fmodel._fast_params=None
fmodel.fast_params=tmp # Surcharge la memoire
#fmodel.update_params(tmp) #Meilleur perf / Surcharge la memoire avec trach higher grad
#optim._fmodel=fmodel
'''
countcopy+=1
model_copy(src=fmodel, dst=model, patch_copy=False)
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
#doptim.detach_dyn()
#tmp = doptim.state
#tmp = doptim.state_dict()
#for k, v in tmp['state'].items():
# print('dict',k, type(v))
a = optim.param_groups[0]['params'][0]
state = optim.state[a]
#state['momentum_buffer'] = None
#print('opt state', type(optim.state[a]), len(optim.state[a]))
#optim.load_state_dict(tmp)
for group_idx, group in enumerate(optim.param_groups):
# print('gp idx',group_idx)
for p_idx, p in enumerate(group['params']):
optim.state[p]=doptim.state[group_idx][p_idx]
#print('opt state', type(optim.state[a]['momentum_buffer']), optim.state[a]['momentum_buffer'][0:10])
#print('dopt state', type(doptim.state[0][0]['momentum_buffer']), doptim.state[0][0]['momentum_buffer'][0:10])
'''
for a in tmp:
#print(type(a), len(a))
for nb, b in a.items():
#print(nb, type(b), len(b))
for n, state in b.items():
#print(n, type(states))
#print(state.grad_fn)
state = torch.tensor(state.data).requires_grad_()
#print(state.grad_fn)
'''
doptim = higher.optim.get_diff_optim(optim, model.parameters(), track_higher_grads=True)
#doptim.state = tmp
countcopy+=1
model_copy(src=fmodel, dst=model)
optim_copy(dopt=diffopt, opt=inner_opt)
#### Tests ####
tf = time.process_time()
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val)
xs_val, ys_val = next(dl_val_it)
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
val_loss = F.cross_entropy(model(xs_val), ys_val)
accuracy=test(model)
model.train()
#### Log ####
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": None,
}
log.append(data)
#countcopy+=1
#model_copy(src=fmodel, dst=model, patch_copy=False)
#optim.load_state_dict(doptim.state_dict()) #Besoin sauver etat otpim ?
print("Copy ", countcopy)
return log
def run_simple_dataug(inner_it, epochs=1):
dl_train_it = iter(dl_train)
dl_val_it = iter(dl_val)
#aug_model = nn.Sequential(
# Data_aug(),
# LeNet(1,10),
# )
aug_model = Augmented_model(Data_aug(), LeNet(1,10)).to(device)
print(str(aug_model))
meta_opt = torch.optim.Adam(aug_model['data_aug'].parameters(), lr=1e-2)
inner_opt = torch.optim.SGD(aug_model['model'].parameters(), lr=1e-2, momentum=0.9)
log = []
t0 = time.process_time()
epoch = 0
while epoch < epochs:
meta_opt.zero_grad()
aug_model.train()
with higher.innerloop_ctx(aug_model, inner_opt, copy_initial_weights=True, track_higher_grads=True) as (fmodel, diffopt): #effet copy_initial_weight pas clair...
for i in range(n_inner_iter):
try:
xs, ys = next(dl_train_it)
except StopIteration: #Fin epoch train
tf = time.process_time()
epoch +=1
dl_train_it = iter(dl_train)
xs, ys = next(dl_train_it)
accuracy=test(aug_model)
aug_model.train()
#### Print ####
print('-'*9)
print('Epoch %d/%d'%(epoch,epochs))
print('train loss',loss.item(), '/ val loss', val_loss.item())
print('acc', accuracy)
print('mag', aug_model['data_aug']['mag'].item())
#### Log ####
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": aug_model['data_aug']['mag'].item(),
}
log.append(data)
t0 = time.process_time()
xs, ys = xs.to(device), ys.to(device)
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
loss = F.cross_entropy(logits, ys) # no need to call loss.backwards()
#loss.backward(retain_graph=True)
#print(fmodel['model']._params['b4'].grad)
#print('mag', fmodel['data_aug']['mag'].grad)
diffopt.step(loss) # note that `step` must take `loss` as an argument!
# The line above gets P[t+1] from P[t] and loss[t]. `step` also returns
# these new parameters, as an alternative to getting them from
# `fmodel.fast_params` or `fmodel.parameters()` after calling
# `diffopt.step`.
# At this point, or at any point in the iteration, you can take the
# gradient of `fmodel.parameters()` (or equivalently
# `fmodel.fast_params`) w.r.t. `fmodel.parameters(time=0)` (equivalently
# `fmodel.init_fast_params`). i.e. `fast_params` will always have
# `grad_fn` as an attribute, and be part of the gradient tape.
# At the end of your inner loop you can obtain these e.g. ...
#grad_of_grads = torch.autograd.grad(
# meta_loss_fn(fmodel.parameters()), fmodel.parameters(time=0))
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val)
xs_val, ys_val = next(dl_val_it)
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
fmodel.augment(mode=False)
val_logits = fmodel(xs_val) #Validation sans transfornations !
val_loss = F.cross_entropy(val_logits, ys_val)
#print('val_loss',val_loss.item())
val_loss.backward()
#print('mag', fmodel['data_aug']['mag'], '/', fmodel['data_aug']['mag'].grad)
#model=copy.deepcopy(fmodel)
aug_model.load_state_dict(fmodel.state_dict()) #Do not copy gradient !
#Copie des gradients
for paramName, paramValue, in fmodel.named_parameters():
for netCopyName, netCopyValue, in aug_model.named_parameters():
if paramName == netCopyName:
netCopyValue.grad = paramValue.grad
#print('mag', aug_model['data_aug']['mag'], '/', aug_model['data_aug']['mag'].grad)
meta_opt.step()
plot_res(log, fig_name="res/{}-{} epochs- {} in_it".format(str(aug_model),epochs,inner_it))
print('-'*9)
times = [x["time"] for x in log]
print(str(aug_model),": acc", max([x["acc"] for x in log]), "in (ms):", np.mean(times), "+/-", np.std(times))
def run_dist_dataug(model, epochs=1, inner_it=1, dataug_epoch_start=0):
dl_train_it = iter(dl_train)
dl_val_it = iter(dl_val)
meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=1e-3)
inner_opt = torch.optim.SGD(model['model'].parameters(), lr=1e-2, momentum=0.9)
high_grad_track = True
if dataug_epoch_start>0:
model.augment(mode=False)
high_grad_track = False
model.train()
log = []
t0 = time.process_time()
countcopy=0
val_loss=torch.tensor(0)
opt_param=None
epoch = 0
while epoch < epochs:
meta_opt.zero_grad()
with higher.innerloop_ctx(model, inner_opt, copy_initial_weights=True, override=opt_param, track_higher_grads=high_grad_track) as (fmodel, diffopt): #effet copy_initial_weight pas clair...
for i in range(n_inner_iter):
try:
xs, ys = next(dl_train_it)
except StopIteration: #Fin epoch train
tf = time.process_time()
epoch +=1
dl_train_it = iter(dl_train)
xs, ys = next(dl_train_it)
#viz_sample_data(imgs=xs, labels=ys, fig_name='samples/data_sample_epoch{}_noTF'.format(epoch))
#viz_sample_data(imgs=aug_model['data_aug'](xs), labels=ys, fig_name='samples/data_sample_epoch{}'.format(epoch))
accuracy=test(model)
model.train()
#### Print ####
print('-'*9)
print('Epoch : %d/%d'%(epoch,epochs))
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
print('Accuracy :', accuracy)
print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
print('TF Proba :', model['data_aug']['prob'].data)
#print('proba grad',aug_model['data_aug']['prob'].grad)
#############
#### Log ####
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": [p for p in model['data_aug']['prob']],
}
log.append(data)
#############
if epoch == dataug_epoch_start:
print('Starting Data Augmention...')
model.augment(mode=True)
high_grad_track = True
t0 = time.process_time()
xs, ys = xs.to(device), ys.to(device)
'''
#Methode exacte
final_loss = 0
for tf_idx in range(fmodel['data_aug']._nb_tf):
fmodel['data_aug'].transf_idx=tf_idx
logits = fmodel(xs)
loss = F.cross_entropy(logits, ys)
#loss.backward(retain_graph=True)
#print('idx', tf_idx)
#print(fmodel['data_aug']['prob'][tf_idx], fmodel['data_aug']['prob'][tf_idx].grad)
final_loss += loss*fmodel['data_aug']['prob'][tf_idx] #Take it in the forward function ?
loss = final_loss
'''
#Methode uniforme
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
loss = F.cross_entropy(logits, ys, reduction='none') # no need to call loss.backwards()
if fmodel._data_augmentation: #Weight loss
w_loss = fmodel['data_aug'].loss_weight().to(device)
loss = loss * w_loss
loss = loss.mean()
#'''
#to visualize computational graph
#print_graph(loss)
#loss.backward(retain_graph=True)
#print(fmodel['model']._params['b4'].grad)
#print('prob grad', fmodel['data_aug']['prob'].grad)
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val)
xs_val, ys_val = next(dl_val_it)
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
fmodel.augment(mode=False) #Validation sans transfornations !
val_loss = F.cross_entropy(fmodel(xs_val), ys_val)
#print_graph(val_loss)
val_loss.backward()
countcopy+=1
model_copy(src=fmodel, dst=model)
optim_copy(dopt=diffopt, opt=inner_opt)
meta_opt.step()
model['data_aug'].adjust_prob() #Contrainte sum(proba)=1
print("Copy ", countcopy)
return log
def run_dist_dataugV2(model, epochs=1, inner_it=0, dataug_epoch_start=0, print_freq=1, loss_patience=None):
log = []
countcopy=0
val_loss=torch.tensor(0) #Necessaire si pas de metastep sur une epoch
dl_val_it = iter(dl_val)
meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=1e-2)
inner_opt = torch.optim.SGD(model['model'].parameters(), lr=1e-2, momentum=0.9)
high_grad_track = True
if inner_it == 0:
high_grad_track=False
if dataug_epoch_start!=0:
model.augment(mode=False)
high_grad_track = False
val_loss_monitor= None
if loss_patience != None :
if dataug_epoch_start==-1: val_loss_monitor = loss_monitor(patience=loss_patience, end_train=2) #1st limit = dataug start
else: val_loss_monitor = loss_monitor(patience=loss_patience) #Val loss monitor
model.train()
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel,track_higher_grads=high_grad_track)
for epoch in range(1, epochs+1):
#print_torch_mem("Start epoch "+str(epoch))
#print(high_grad_track, fmodel._data_augmentation, len(fmodel._fast_params))
t0 = time.process_time()
#with higher.innerloop_ctx(model, inner_opt, copy_initial_weights=True, override=opt_param, track_higher_grads=high_grad_track) as (fmodel, diffopt):
for i, (xs, ys) in enumerate(dl_train):
xs, ys = xs.to(device), ys.to(device)
'''
#Methode exacte
final_loss = 0
for tf_idx in range(fmodel['data_aug']._nb_tf):
fmodel['data_aug'].transf_idx=tf_idx
logits = fmodel(xs)
loss = F.cross_entropy(logits, ys)
#loss.backward(retain_graph=True)
#print('idx', tf_idx)
#print(fmodel['data_aug']['prob'][tf_idx], fmodel['data_aug']['prob'][tf_idx].grad)
final_loss += loss*fmodel['data_aug']['prob'][tf_idx] #Take it in the forward function ?
loss = final_loss
'''
#Methode uniforme
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
loss = F.cross_entropy(logits, ys, reduction='none') # no need to call loss.backwards()
#PAS PONDERE LOSS POUR DIST MIX
if fmodel._data_augmentation: # and not fmodel['data_aug']._mix_dist: #Weight loss
w_loss = fmodel['data_aug'].loss_weight().to(device)
loss = loss * w_loss
loss = loss.mean()
#'''
#to visualize computational graph
#print_graph(loss)
#loss.backward(retain_graph=True)
#print(fmodel['model']._params['b4'].grad)
#print('prob grad', fmodel['data_aug']['prob'].grad)
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
if(high_grad_track and i%inner_it==0): #Perform Meta step
#print("meta")
#Peu utile si high_grad_track = False
val_loss = compute_vaLoss(model=fmodel, dl_val_it=dl_val_it)
#print_graph(val_loss)
val_loss.backward()
countcopy+=1
model_copy(src=fmodel, dst=model)
optim_copy(dopt=diffopt, opt=inner_opt)
meta_opt.step()
model['data_aug'].adjust_prob(soft=False) #Contrainte sum(proba)=1
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
tf = time.process_time()
#viz_sample_data(imgs=xs, labels=ys, fig_name='samples/data_sample_epoch{}_noTF'.format(epoch))
#viz_sample_data(imgs=aug_model['data_aug'](xs), labels=ys, fig_name='samples/data_sample_epoch{}'.format(epoch))
if(not high_grad_track):
countcopy+=1
model_copy(src=fmodel, dst=model)
optim_copy(dopt=diffopt, opt=inner_opt)
val_loss = compute_vaLoss(model=fmodel, dl_val_it=dl_val_it)
#Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False)
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
accuracy=test(model)
model.train()
#### Print ####
if(print_freq and epoch%print_freq==0):
print('-'*9)
print('Epoch : %d/%d'%(epoch,epochs))
print('Time : %.00f ms'%(tf - t0))
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
print('Accuracy :', accuracy)
print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
print('TF Proba :', model['data_aug']['prob'].data)
#print('proba grad',aug_model['data_aug']['prob'].grad)
#############
#### Log ####
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": [p.item() for p in model['data_aug']['prob']],
}
log.append(data)
#############
if val_loss_monitor :
val_loss_monitor.register(val_loss.item())
if val_loss_monitor.end_training(): break #Stop training
if not model.is_augmenting() and (epoch == dataug_epoch_start or (val_loss_monitor and val_loss_monitor.limit_reached()==1)):
print('Starting Data Augmention...')
dataug_epoch_start = epoch
model.augment(mode=True)
if inner_it != 0: high_grad_track = True
print("Copy ", countcopy)
return log
##########################################
if __name__ == "__main__":
n_inner_iter = 0
epochs = 2
dataug_epoch_start=0
#### Classic ####
'''
model = LeNet(3,10).to(device)
#model = torchvision.models.resnet18()
#model = Augmented_model(Data_augV3(mix_dist=0.0), LeNet(3,10)).to(device)
#model.augment(mode=False)
print(str(model), 'on', device_name)
log= train_classic_higher(model=model, epochs=epochs)
####
plot_res(log, fig_name="res/{}-{} epochs".format(str(model),epochs))
print('-'*9)
times = [x["time"] for x in log]
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times)), "Device": device_name, "Log": log}
print(str(model),": acc", out["Accuracy"], "in (ms):", out["Time"][0], "+/-", out["Time"][1])
with open("res/log/%s.json" % "{}-{} epochs".format(str(model),epochs), "w+") as f:
json.dump(out, f, indent=True)
print('Log :\"',f.name, '\" saved !')
print('-'*9)
'''
#### Augmented Model ####
#'''
aug_model = Augmented_model(Data_augV4(TF_dict=TF.TF_dict, mix_dist=0.0), LeNet(3,10)).to(device)
print(str(aug_model), 'on', device_name)
#run_simple_dataug(inner_it=n_inner_iter, epochs=epochs)
log= run_dist_dataugV2(model=aug_model, epochs=epochs, inner_it=n_inner_iter, dataug_epoch_start=dataug_epoch_start, print_freq=10, loss_patience=10)
####
plot_res(log, fig_name="res/{}-{} epochs (dataug:{})- {} in_it".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter))
print('-'*9)
times = [x["time"] for x in log]
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times)), "Device": device_name, "Param_names": aug_model.TF_names(), "Log": log}
print(str(aug_model),": acc", out["Accuracy"], "in (ms):", out["Time"][0], "+/-", out["Time"][1])
with open("res/log/%s.json" % "{}-{} epochs (dataug:{})- {} in_it".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter), "w+") as f:
json.dump(out, f, indent=True)
print('Log :\"',f.name, '\" saved !')
print('-'*9)
#'''
#### Comparison ####
'''
files=[
#"res/log/LeNet-100 epochs.json",
#"res/log/Aug_mod(Data_augV4(Uniform-4 TF)-LeNet)-100 epochs (dataug:0)- 0 in_it.json",
#"res/log/Aug_mod(Data_augV4(Uniform-4 TF)-LeNet)-100 epochs (dataug:50)- 0 in_it.json",
#"res/log/Aug_mod(Data_augV4(Uniform-3 TF)-LeNet)-100 epochs (dataug:0)- 0 in_it.json",
#"res/log/Aug_mod(Data_augV3(Uniform-3 TF)-LeNet)-100 epochs (dataug:50)- 10 in_it.json",
#"res/log/Aug_mod(Data_augV4(Mix 0,5-3 TF)-LeNet)-100 epochs (dataug:0)- 1 in_it.json",
#"res/log/Aug_mod(Data_augV4(Mix 0.5-3 TF)-LeNet)-100 epochs (dataug:50)- 10 in_it.json",
#"res/log/Aug_mod(Data_augV4(Uniform-3 TF)-LeNet)-100 epochs (dataug:0)- 10 in_it.json",
"res/log/Aug_mod(Data_augV4(Uniform-10 TF)-LeNet)-100 epochs (dataug:50)- 10 in_it.json",
"res/log/Aug_mod(Data_augV4(Uniform-10 TF)-LeNet)-100 epochs (dataug:50)- 0 in_it.json",
]
plot_compare(filenames=files, fig_name="res/compare")
'''

150
higher/test_lr.py Normal file
View file

@ -0,0 +1,150 @@
import numpy as np
import json, math, time, os
from torch.utils.data import SubsetRandomSampler
import torch.optim as optim
import higher
from model import *
import copy
BATCH_SIZE = 300
TEST_SIZE = 300
mnist_train = torchvision.datasets.MNIST(
"./data", train=True, download=True,
transform=torchvision.transforms.Compose([
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
torchvision.transforms.ToTensor()
])
)
mnist_test = torchvision.datasets.MNIST(
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
)
#train_subset_indices=range(int(len(mnist_train)/2))
train_subset_indices=range(BATCH_SIZE)
val_subset_indices=range(int(len(mnist_train)/2),len(mnist_train))
dl_train = torch.utils.data.DataLoader(mnist_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
dl_val = torch.utils.data.DataLoader(mnist_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=TEST_SIZE, shuffle=False)
def test(model):
model.eval()
for i, (features, labels) in enumerate(dl_test):
pred = model.forward(features)
return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
def train_classic(model, optim, epochs=1):
model.train()
log = []
for epoch in range(epochs):
t0 = time.process_time()
for i, (features, labels) in enumerate(dl_train):
optim.zero_grad()
pred = model.forward(features)
loss = F.cross_entropy(pred,labels)
loss.backward()
optim.step()
#### Log ####
tf = time.process_time()
data={
"time": tf - t0,
}
log.append(data)
times = [x["time"] for x in log]
print("Vanilla : acc", test(model), "in (ms):", np.mean(times), "+/-", np.std(times))
##########################################
if __name__ == "__main__":
device = torch.device('cpu')
model = LeNet(1,10)
opt_param = {
"lr": torch.tensor(1e-2).requires_grad_(),
"momentum": torch.tensor(0.9).requires_grad_()
}
n_inner_iter = 1
dl_train_it = iter(dl_train)
dl_val_it = iter(dl_val)
epoch = 0
epochs = 10
####
train_classic(model=model, optim=torch.optim.Adam(model.parameters(), lr=0.001), epochs=epochs)
model = LeNet(1,10)
meta_opt = torch.optim.Adam(opt_param.values(), lr=1e-2)
inner_opt = torch.optim.SGD(model.parameters(), lr=opt_param['lr'], momentum=opt_param['momentum'])
#for xs_val, ys_val in dl_val:
while epoch < epochs:
#print(data_aug.params["mag"], data_aug.params["mag"].grad)
meta_opt.zero_grad()
model.train()
with higher.innerloop_ctx(model, inner_opt, copy_initial_weights=True, track_higher_grads=True) as (fmodel, diffopt): #effet copy_initial_weight pas clair...
for param_group in diffopt.param_groups:
param_group['lr'] = opt_param['lr']
param_group['momentum'] = opt_param['momentum']
for i in range(n_inner_iter):
try:
xs, ys = next(dl_train_it)
except StopIteration: #Fin epoch train
epoch +=1
dl_train_it = iter(dl_train)
xs, ys = next(dl_train_it)
print('Epoch', epoch)
print('train loss',loss.item(), '/ val loss', val_loss.item())
print('acc', test(model))
print('opt : lr', opt_param['lr'].item(), 'momentum', opt_param['momentum'].item())
print('-'*9)
model.train()
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
loss = F.cross_entropy(logits, ys) # no need to call loss.backwards()
#print('loss',loss.item())
diffopt.step(loss) # note that `step` must take `loss` as an argument!
# The line above gets P[t+1] from P[t] and loss[t]. `step` also returns
# these new parameters, as an alternative to getting them from
# `fmodel.fast_params` or `fmodel.parameters()` after calling
# `diffopt.step`.
# At this point, or at any point in the iteration, you can take the
# gradient of `fmodel.parameters()` (or equivalently
# `fmodel.fast_params`) w.r.t. `fmodel.parameters(time=0)` (equivalently
# `fmodel.init_fast_params`). i.e. `fast_params` will always have
# `grad_fn` as an attribute, and be part of the gradient tape.
# At the end of your inner loop you can obtain these e.g. ...
#grad_of_grads = torch.autograd.grad(
# meta_loss_fn(fmodel.parameters()), fmodel.parameters(time=0))
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val_it)
xs_val, ys_val = next(dl_val_it)
val_logits = fmodel(xs_val)
val_loss = F.cross_entropy(val_logits, ys_val)
#print('val_loss',val_loss.item())
val_loss.backward()
#meta_grads = torch.autograd.grad(val_loss, opt_lr, allow_unused=True)
#print(meta_grads)
for param_group in diffopt.param_groups:
print(param_group['lr'], '/',param_group['lr'].grad)
print(param_group['momentum'], '/',param_group['momentum'].grad)
#model=copy.deepcopy(fmodel)
model.load_state_dict(fmodel.state_dict())
meta_opt.step()

205
higher/transformations.py Normal file
View file

@ -0,0 +1,205 @@
import torch
import kornia
import random
### Available TF for Dataug ###
TF_dict={ #f(mag_normalise)=mag_reelle
## Geometric TF ##
'Identity' : (lambda mag: None),
'FlipUD' : (lambda mag: None),
'FlipLR' : (lambda mag: None),
'Rotate': (lambda mag: random.randint(-int_parameter(mag, maxval=30), int_parameter(mag, maxval=30))),
'TranslateX': (lambda mag: [random.randint(-int_parameter(mag, maxval=20), int_parameter(mag, maxval=20)), 0]),
'TranslateY': (lambda mag: [0, random.randint(-int_parameter(mag, maxval=20), int_parameter(mag, maxval=20))]),
'ShearX': (lambda mag: [random.uniform(-float_parameter(mag, maxval=0.3), float_parameter(mag, maxval=0.3)), 0]),
'ShearY': (lambda mag: [0, random.uniform(-float_parameter(mag, maxval=0.3), float_parameter(mag, maxval=0.3))]),
## Color TF (Expect image in the range of [0, 1]) ##
'Contrast': (lambda mag: random.uniform(0.1, float_parameter(mag, maxval=1.9))),
'Color':(lambda mag: random.uniform(0.1, float_parameter(mag, maxval=1.9))),
'Brightness':(lambda mag: random.uniform(1., float_parameter(mag, maxval=1.9))),
'Sharpness':(lambda mag: random.uniform(0.1, float_parameter(mag, maxval=1.9))),
'Posterize': (lambda mag: random.randint(4, int_parameter(mag, maxval=8))),
'Solarize': (lambda mag: random.randint(1, int_parameter(mag, maxval=256))/256.), #=>Image entre [0,1] #Pas opti pour des batch
#Non fonctionnel
#'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
#'Equalize': (lambda mag: None),
}
def int_image(float_image): #ATTENTION : legere perte d'info (granularite : 1/256 = 0.0039)
return (float_image*255.).type(torch.uint8)
def float_image(int_image):
return int_image.type(torch.float)/255.
def rand_inverse(value):
return value if random.random() < 0.5 else -value
#https://github.com/tensorflow/models/blob/fc2056bce6ab17eabdc139061fef8f4f2ee763ec/research/autoaugment/augmentation_transforms.py#L137
PARAMETER_MAX = 10 # What is the max 'level' a transform could be predicted
def float_parameter(level, maxval):
"""Helper function to scale `val` between 0 and maxval .
Args:
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
maxval: Maximum value that the operation can have. This will be scaled
to level/PARAMETER_MAX.
Returns:
A float that results from scaling `maxval` according to `level`.
"""
return float(level) * maxval / PARAMETER_MAX
def int_parameter(level, maxval):
"""Helper function to scale `val` between 0 and maxval .
Args:
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
maxval: Maximum value that the operation can have. This will be scaled
to level/PARAMETER_MAX.
Returns:
An int that results from scaling `maxval` according to `level`.
"""
return int(level * maxval / PARAMETER_MAX)
def flipLR(x):
device = x.device
(batch_size, channels, h, w) = x.shape
M =torch.tensor( [[[-1., 0., w-1],
[ 0., 1., 0.],
[ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
# warp the original image by the found transform
return kornia.warp_perspective(x, M, dsize=(h, w))
def flipUD(x):
device = x.device
(batch_size, channels, h, w) = x.shape
M =torch.tensor( [[[ 1., 0., 0.],
[ 0., -1., h-1],
[ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
# warp the original image by the found transform
return kornia.warp_perspective(x, M, dsize=(h, w))
def rotate(x, angle):
return kornia.rotate(x, angle=angle.type(torch.float32)) #Kornia ne supporte pas les int
def translate(x, translation):
return kornia.translate(x, translation=translation.type(torch.float32)) #Kornia ne supporte pas les int
def shear(x, shear):
return kornia.shear(x, shear=shear)
def contrast(x, contrast_factor):
return kornia.adjust_contrast(x, contrast_factor=contrast_factor) #Expect image in the range of [0, 1]
#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageEnhance.py
def color(x, color_factor):
(batch_size, channels, h, w) = x.shape
gray_x = kornia.rgb_to_grayscale(x)
gray_x = gray_x.repeat_interleave(channels, dim=1)
return blend(gray_x, x, color_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
def brightness(x, brightness_factor):
device = x.device
return blend(torch.zeros(x.size(), device=device), x, brightness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
def sharpeness(x, sharpness_factor):
device = x.device
(batch_size, channels, h, w) = x.shape
k = torch.tensor([[[ 1., 1., 1.],
[ 1., 5., 1.],
[ 1., 1., 1.]]], device=device) #Smooth Filter : https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageFilter.py
smooth_x = kornia.filter2D(x, kernel=k, border_type='reflect', normalized=True) #Peut etre necessaire de s'occuper du channel Alhpa differement
return blend(smooth_x, x, sharpness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py
def posterize(x, bits):
x = int_image(x) #Expect image in the range of [0, 1]
mask = ~(2 ** (8 - bits) - 1).type(torch.uint8)
(batch_size, channels, h, w) = x.shape
mask = mask.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
return float_image(x & mask)
def auto_contrast(x): #PAS OPTIMISE POUR DES BATCH #EXTRA LENT
# Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
print("Warning : Pas encore check !")
(batch_size, channels, h, w) = x.shape
x = int_image(x) #Expect image in the range of [0, 1]
#print('Start',x[0])
for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
#print(img.shape)
for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
#print(chan.shape)
hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
# find lowest/highest samples after preprocessing
for lo in range(256):
if hist[lo]:
break
for hi in range(255, -1, -1):
if hist[hi]:
break
if hi <= lo:
# don't bother
pass
else:
scale = 255.0 / (hi - lo)
offset = -lo * scale
for ix in range(256):
n_ix = int(ix * scale + offset)
if n_ix < 0: n_ix = 0
elif n_ix > 255: n_ix = 255
chan[chan==ix]=n_ix
x[im_idx, chan_idx]=chan
#print('End',x[0])
return float_image(x)
def equalize(x): #PAS OPTIMISE POUR DES BATCH
raise Exception(self, "not implemented")
# Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
(batch_size, channels, h, w) = x.shape
x = int_image(x) #Expect image in the range of [0, 1]
#print('Start',x[0])
for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
#print(img.shape)
for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
#print(chan.shape)
hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
return float_image(x)
def solarize(x, thresholds): #PAS OPTIMISE POUR DES BATCH
# Optimisation : Mask direct sur toute les donnees (Mask = (B,C,H,W)> (B))
for idx, t in enumerate(thresholds): #Operation par image
mask = x[idx] > t.item()
inv_x = 1-x[idx][mask]
x[idx][mask]=inv_x
return x
#https://github.com/python-pillow/Pillow/blob/9c78c3f97291bd681bc8637922d6a2fa9415916c/src/PIL/Image.py#L2818
def blend(x,y,alpha): #out = image1 * (1.0 - alpha) + image2 * alpha
#return kornia.add_weighted(src1=x, alpha=(1-alpha), src2=y, beta=alpha, gamma=0) #out=src1alpha+src2beta+gamma #Ne fonctionne pas pour des batch de alpha
if not isinstance(x, torch.Tensor):
raise TypeError("x should be a tensor. Got {}".format(type(x)))
if not isinstance(y, torch.Tensor):
raise TypeError("y should be a tensor. Got {}".format(type(y)))
(batch_size, channels, h, w) = x.shape
alpha = alpha.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
res = x*(1-alpha) + y*alpha
return res

184
higher/utils.py Normal file
View file

@ -0,0 +1,184 @@
import numpy as np
import json, math, time, os
import matplotlib.pyplot as plt
import copy
import gc
from torchviz import make_dot
import torch
import torch.nn.functional as F
def print_graph(PyTorch_obj, fig_name='graph'):
graph=make_dot(PyTorch_obj) #Loss give the whole graph
graph.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
graph.render(fig_name)
def plot_res(log, fig_name='res'):
epochs = [x["epoch"] for x in log]
fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
ax[0].set_title('Loss')
ax[0].plot(epochs,[x["train_loss"] for x in log], label='Train')
ax[0].plot(epochs,[x["val_loss"] for x in log], label='Val')
ax[0].legend()
ax[1].set_title('Acc')
ax[1].plot(epochs,[x["acc"] for x in log])
if log[0]["param"]!= None:
if isinstance(log[0]["param"],float):
ax[2].set_title('Mag')
ax[2].plot(epochs,[x["param"] for x in log], label='Mag')
ax[2].legend()
else :
ax[2].set_title('Prob')
for idx, _ in enumerate(log[0]["param"]):
ax[2].plot(epochs,[x["param"][idx] for x in log], label='P'+str(idx))
ax[2].legend()
#ax[2].legend(('P-0', 'P-45', 'P-180'))
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name)
def plot_compare(filenames, fig_name='res'):
all_data=[]
legend=""
for idx, file in enumerate(filenames):
legend+=str(idx)+'-'+file+'\n'
with open(file) as json_file:
data = json.load(json_file)
all_data.append(data)
fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
for data_idx, log in enumerate(all_data):
log=log['Log']
epochs = [x["epoch"] for x in log]
ax[0].plot(epochs,[x["train_loss"] for x in log], label=str(data_idx)+'-Train')
ax[0].plot(epochs,[x["val_loss"] for x in log], label=str(data_idx)+'-Val')
ax[1].plot(epochs,[x["acc"] for x in log], label=str(data_idx))
#ax[1].text(x=0.5,y=0,s=str(data_idx)+'-'+filenames[data_idx], transform=ax[1].transAxes)
if log[0]["param"]!= None:
if isinstance(log[0]["param"],float):
ax[2].plot(epochs,[x["param"] for x in log], label=str(data_idx)+'-Mag')
else :
for idx, _ in enumerate(log[0]["param"]):
ax[2].plot(epochs,[x["param"][idx] for x in log], label=str(data_idx)+'-P'+str(idx))
fig.suptitle(legend)
ax[0].set_title('Loss')
ax[1].set_title('Acc')
ax[2].set_title('Param')
for a in ax: a.legend()
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name, bbox_inches='tight')
def viz_sample_data(imgs, labels, fig_name='data_sample'):
sample = imgs[0:25,].permute(0, 2, 3, 1).squeeze().cpu()
plt.figure(figsize=(10,10))
for i in range(25):
plt.subplot(5,5,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(sample[i,], cmap=plt.cm.binary)
plt.xlabel(labels[i].item())
plt.savefig(fig_name)
def model_copy(src,dst, patch_copy=True, copy_grad=True):
#model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats)
dst.load_state_dict(src.state_dict()) #Do not copy gradient !
if patch_copy:
dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ?
dst['data_aug'].load_state_dict(src['data_aug'].state_dict())
#Copie des gradients
if copy_grad:
for paramName, paramValue, in src.named_parameters():
for netCopyName, netCopyValue, in dst.named_parameters():
if paramName == netCopyName:
netCopyValue.grad = paramValue.grad
#netCopyValue=copy.deepcopy(paramValue)
try: #Data_augV4
dst['data_aug']._input_info = src['data_aug']._input_info
dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix
except:
pass
def optim_copy(dopt, opt):
#inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state...
#opt_param=higher.optim.get_trainable_opt_params(diffopt)
for group_idx, group in enumerate(opt.param_groups):
# print('gp idx',group_idx)
for p_idx, p in enumerate(group['params']):
opt.state[p]=dopt.state[group_idx][p_idx]
def print_torch_mem(add_info=''):
nb=0
max_size=0
for obj in gc.get_objects():
#print(type(obj))
try:
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
#print(i, type(obj), obj.size())
size = np.sum(obj.size())
if(size>max_size): max_size=size
nb+=1
except:
pass
print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
#print(add_info, "-Garbage size :",len(gc.garbage))
class loss_monitor(): #Voir https://github.com/pytorch/ignite
def __init__(self, patience, end_train=1):
self.patience = patience
self.end_train = end_train
self.counter = 0
self.best_score = None
self.reached_limit = 0
def register(self, loss):
if self.best_score is None:
self.best_score = loss
elif loss > self.best_score:
self.counter += 1
#if not self.reached_limit:
print("loss no improve counter", self.counter, self.reached_limit)
else:
self.best_score = loss
self.counter = 0
def limit_reached(self):
if self.counter >= self.patience:
self.counter = 0
self.reached_limit +=1
self.best_score = None
return self.reached_limit
def end_training(self):
if self.limit_reached() >= self.end_train:
return True
else:
return False
def reset(self):
self.__init__(self.patience, self.end_train)