Initial Commit
46
.gitignore
vendored
Normal file
|
@ -0,0 +1,46 @@
|
|||
/higher/data
|
||||
/Gradient-Descent-The-Ultimate-Optimizer/data
|
||||
/FAR-HO/data
|
||||
/__pycache__
|
||||
|
||||
*.pyo
|
||||
*.pyc
|
||||
*~
|
||||
|
||||
# Compiled source #
|
||||
###################
|
||||
*.com
|
||||
*.class
|
||||
*.dll
|
||||
*.exe
|
||||
*.o
|
||||
*.so
|
||||
|
||||
# Packages #
|
||||
############
|
||||
# it's better to unpack these files and commit the raw source
|
||||
# git has its own built in compression methods
|
||||
*.7z
|
||||
*.dmg
|
||||
*.gz
|
||||
*.iso
|
||||
*.jar
|
||||
*.rar
|
||||
*.tar
|
||||
*.zip
|
||||
|
||||
# Logs and databases #
|
||||
######################
|
||||
*.log
|
||||
*.sql
|
||||
*.sqlite
|
||||
|
||||
# OS generated files #
|
||||
######################
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
456
FAR-HO/augmentation_transforms.py
Executable file
|
@ -0,0 +1,456 @@
|
|||
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""Transforms used in the Augmentation Policies."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import inspect
|
||||
import random
|
||||
import numpy as np
|
||||
# pylint:disable=g-multiple-import
|
||||
from PIL import ImageOps, ImageEnhance, ImageFilter, Image
|
||||
# pylint:enable=g-multiple-import
|
||||
|
||||
|
||||
IMAGE_SIZE = 28
|
||||
# What is the dataset mean and std of the images on the training set
|
||||
MEANS = [0.49139968, 0.48215841, 0.44653091]
|
||||
STDS = [0.24703223, 0.24348513, 0.26158784]
|
||||
PARAMETER_MAX = 10 # What is the max 'level' a transform could be predicted
|
||||
|
||||
|
||||
def random_flip(x):
|
||||
"""Flip the input x horizontally with 50% probability."""
|
||||
if np.random.rand(1)[0] > 0.5:
|
||||
return np.fliplr(x)
|
||||
return x
|
||||
|
||||
|
||||
def zero_pad_and_crop(img, amount=4):
|
||||
"""Zero pad by `amount` zero pixels on each side then take a random crop.
|
||||
|
||||
Args:
|
||||
img: numpy image that will be zero padded and cropped.
|
||||
amount: amount of zeros to pad `img` with horizontally and verically.
|
||||
|
||||
Returns:
|
||||
The cropped zero padded img. The returned numpy array will be of the same
|
||||
shape as `img`.
|
||||
"""
|
||||
padded_img = np.zeros((img.shape[0] + amount * 2, img.shape[1] + amount * 2,
|
||||
img.shape[2]))
|
||||
padded_img[amount:img.shape[0] + amount, amount:
|
||||
img.shape[1] + amount, :] = img
|
||||
top = np.random.randint(low=0, high=2 * amount)
|
||||
left = np.random.randint(low=0, high=2 * amount)
|
||||
new_img = padded_img[top:top + img.shape[0], left:left + img.shape[1], :]
|
||||
return new_img
|
||||
|
||||
|
||||
def create_cutout_mask(img_height, img_width, num_channels, size):
|
||||
"""Creates a zero mask used for cutout of shape `img_height` x `img_width`.
|
||||
|
||||
Args:
|
||||
img_height: Height of image cutout mask will be applied to.
|
||||
img_width: Width of image cutout mask will be applied to.
|
||||
num_channels: Number of channels in the image.
|
||||
size: Size of the zeros mask.
|
||||
|
||||
Returns:
|
||||
A mask of shape `img_height` x `img_width` with all ones except for a
|
||||
square of zeros of shape `size` x `size`. This mask is meant to be
|
||||
elementwise multiplied with the original image. Additionally returns
|
||||
the `upper_coord` and `lower_coord` which specify where the cutout mask
|
||||
will be applied.
|
||||
"""
|
||||
assert img_height == img_width
|
||||
|
||||
# Sample center where cutout mask will be applied
|
||||
height_loc = np.random.randint(low=0, high=img_height)
|
||||
width_loc = np.random.randint(low=0, high=img_width)
|
||||
|
||||
# Determine upper right and lower left corners of patch
|
||||
upper_coord = (max(0, height_loc - size // 2), max(0, width_loc - size // 2))
|
||||
lower_coord = (min(img_height, height_loc + size // 2),
|
||||
min(img_width, width_loc + size // 2))
|
||||
mask_height = lower_coord[0] - upper_coord[0]
|
||||
mask_width = lower_coord[1] - upper_coord[1]
|
||||
assert mask_height > 0
|
||||
assert mask_width > 0
|
||||
|
||||
mask = np.ones((img_height, img_width, num_channels))
|
||||
zeros = np.zeros((mask_height, mask_width, num_channels))
|
||||
mask[upper_coord[0]:lower_coord[0], upper_coord[1]:lower_coord[1], :] = (
|
||||
zeros)
|
||||
return mask, upper_coord, lower_coord
|
||||
|
||||
|
||||
def cutout_numpy(img, size=16):
|
||||
"""Apply cutout with mask of shape `size` x `size` to `img`.
|
||||
|
||||
The cutout operation is from the paper https://arxiv.org/abs/1708.04552.
|
||||
This operation applies a `size`x`size` mask of zeros to a random location
|
||||
within `img`.
|
||||
|
||||
Args:
|
||||
img: Numpy image that cutout will be applied to.
|
||||
size: Height/width of the cutout mask that will be
|
||||
|
||||
Returns:
|
||||
A numpy tensor that is the result of applying the cutout mask to `img`.
|
||||
"""
|
||||
img_height, img_width, num_channels = (img.shape[0], img.shape[1],
|
||||
img.shape[2])
|
||||
assert len(img.shape) == 3
|
||||
mask, _, _ = create_cutout_mask(img_height, img_width, num_channels, size)
|
||||
return img * mask
|
||||
|
||||
|
||||
def float_parameter(level, maxval):
|
||||
"""Helper function to scale `val` between 0 and maxval .
|
||||
|
||||
Args:
|
||||
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
|
||||
maxval: Maximum value that the operation can have. This will be scaled
|
||||
to level/PARAMETER_MAX.
|
||||
|
||||
Returns:
|
||||
A float that results from scaling `maxval` according to `level`.
|
||||
"""
|
||||
return float(level) * maxval / PARAMETER_MAX
|
||||
|
||||
|
||||
def int_parameter(level, maxval):
|
||||
"""Helper function to scale `val` between 0 and maxval .
|
||||
|
||||
Args:
|
||||
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
|
||||
maxval: Maximum value that the operation can have. This will be scaled
|
||||
to level/PARAMETER_MAX.
|
||||
|
||||
Returns:
|
||||
An int that results from scaling `maxval` according to `level`.
|
||||
"""
|
||||
return int(level * maxval / PARAMETER_MAX)
|
||||
|
||||
|
||||
def pil_wrap(img):
|
||||
"""Convert the `img` numpy tensor to a PIL Image."""
|
||||
return Image.fromarray(
|
||||
np.uint8((img * STDS + MEANS) * 255.0)).convert('RGBA')
|
||||
|
||||
|
||||
def pil_unwrap(pil_img):
|
||||
"""Converts the PIL img to a numpy array."""
|
||||
pic_array = (np.array(pil_img.getdata()).reshape((IMAGE_SIZE, IMAGE_SIZE, 4)) / 255.0)
|
||||
i1, i2 = np.where(pic_array[:, :, 3] == 0)
|
||||
pic_array = (pic_array[:, :, :3] - MEANS) / STDS
|
||||
pic_array[i1, i2] = [0, 0, 0]
|
||||
return pic_array
|
||||
|
||||
|
||||
def apply_policy(policy, img):
|
||||
"""Apply the `policy` to the numpy `img`.
|
||||
|
||||
Args:
|
||||
policy: A list of tuples with the form (name, probability, level) where
|
||||
`name` is the name of the augmentation operation to apply, `probability`
|
||||
is the probability of applying the operation and `level` is what strength
|
||||
the operation to apply.
|
||||
img: Numpy image that will have `policy` applied to it.
|
||||
|
||||
Returns:
|
||||
The result of applying `policy` to `img`.
|
||||
"""
|
||||
#print('img shape :',img.shape)
|
||||
#print('Policy len :',len(policy))
|
||||
pil_img = pil_wrap(img)
|
||||
for xform in policy:
|
||||
#print('xform :', len(xform))
|
||||
assert len(xform) == 3
|
||||
name, probability, level = xform
|
||||
#xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability, level)
|
||||
xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability.eval(), level)
|
||||
pil_img = xform_fn(pil_img)
|
||||
return pil_unwrap(pil_img)
|
||||
|
||||
|
||||
class TransformFunction(object):
|
||||
"""Wraps the Transform function for pretty printing options."""
|
||||
|
||||
def __init__(self, func, name):
|
||||
self.f = func
|
||||
self.name = name
|
||||
|
||||
def __repr__(self):
|
||||
return '<' + self.name + '>'
|
||||
|
||||
def __call__(self, pil_img):
|
||||
return self.f(pil_img)
|
||||
|
||||
|
||||
class TransformT(object):
|
||||
"""Each instance of this class represents a specific transform."""
|
||||
|
||||
def __init__(self, name, xform_fn):
|
||||
self.name = name
|
||||
self.xform = xform_fn
|
||||
|
||||
def pil_transformer(self, probability, level):
|
||||
|
||||
def return_function(im):
|
||||
if random.random() < probability:
|
||||
im = self.xform(im, level)
|
||||
return im
|
||||
|
||||
name = self.name + '({:.1f},{})'.format(probability, level)
|
||||
return TransformFunction(return_function, name)
|
||||
|
||||
def do_transform(self, image, level):
|
||||
f = self.pil_transformer(PARAMETER_MAX, level)
|
||||
return pil_unwrap(f(pil_wrap(image)))
|
||||
|
||||
|
||||
################## Transform Functions ##################
|
||||
identity = TransformT('identity', lambda pil_img, level: pil_img)
|
||||
flip_lr = TransformT(
|
||||
'FlipLR',
|
||||
lambda pil_img, level: pil_img.transpose(Image.FLIP_LEFT_RIGHT))
|
||||
flip_ud = TransformT(
|
||||
'FlipUD',
|
||||
lambda pil_img, level: pil_img.transpose(Image.FLIP_TOP_BOTTOM))
|
||||
# pylint:disable=g-long-lambda
|
||||
auto_contrast = TransformT(
|
||||
'AutoContrast',
|
||||
lambda pil_img, level: ImageOps.autocontrast(
|
||||
pil_img.convert('RGB')).convert('RGBA'))
|
||||
equalize = TransformT(
|
||||
'Equalize',
|
||||
lambda pil_img, level: ImageOps.equalize(
|
||||
pil_img.convert('RGB')).convert('RGBA'))
|
||||
invert = TransformT(
|
||||
'Invert',
|
||||
lambda pil_img, level: ImageOps.invert(
|
||||
pil_img.convert('RGB')).convert('RGBA'))
|
||||
# pylint:enable=g-long-lambda
|
||||
blur = TransformT(
|
||||
'Blur', lambda pil_img, level: pil_img.filter(ImageFilter.BLUR))
|
||||
smooth = TransformT(
|
||||
'Smooth',
|
||||
lambda pil_img, level: pil_img.filter(ImageFilter.SMOOTH))
|
||||
|
||||
|
||||
def _rotate_impl(pil_img, level):
|
||||
"""Rotates `pil_img` from -30 to 30 degrees depending on `level`."""
|
||||
degrees = int_parameter(level, 30)
|
||||
if random.random() > 0.5:
|
||||
degrees = -degrees
|
||||
return pil_img.rotate(degrees)
|
||||
|
||||
|
||||
rotate = TransformT('Rotate', _rotate_impl)
|
||||
|
||||
|
||||
def _posterize_impl(pil_img, level):
|
||||
"""Applies PIL Posterize to `pil_img`."""
|
||||
level = int_parameter(level, 4)
|
||||
return ImageOps.posterize(pil_img.convert('RGB'), 4 - level).convert('RGBA')
|
||||
|
||||
|
||||
posterize = TransformT('Posterize', _posterize_impl)
|
||||
|
||||
|
||||
def _shear_x_impl(pil_img, level):
|
||||
"""Applies PIL ShearX to `pil_img`.
|
||||
|
||||
The ShearX operation shears the image along the horizontal axis with `level`
|
||||
magnitude.
|
||||
|
||||
Args:
|
||||
pil_img: Image in PIL object.
|
||||
level: Strength of the operation specified as an Integer from
|
||||
[0, `PARAMETER_MAX`].
|
||||
|
||||
Returns:
|
||||
A PIL Image that has had ShearX applied to it.
|
||||
"""
|
||||
level = float_parameter(level, 0.3)
|
||||
if random.random() > 0.5:
|
||||
level = -level
|
||||
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, level, 0, 0, 1, 0))
|
||||
|
||||
|
||||
shear_x = TransformT('ShearX', _shear_x_impl)
|
||||
|
||||
|
||||
def _shear_y_impl(pil_img, level):
|
||||
"""Applies PIL ShearY to `pil_img`.
|
||||
|
||||
The ShearY operation shears the image along the vertical axis with `level`
|
||||
magnitude.
|
||||
|
||||
Args:
|
||||
pil_img: Image in PIL object.
|
||||
level: Strength of the operation specified as an Integer from
|
||||
[0, `PARAMETER_MAX`].
|
||||
|
||||
Returns:
|
||||
A PIL Image that has had ShearX applied to it.
|
||||
"""
|
||||
level = float_parameter(level, 0.3)
|
||||
if random.random() > 0.5:
|
||||
level = -level
|
||||
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, level, 1, 0))
|
||||
|
||||
|
||||
shear_y = TransformT('ShearY', _shear_y_impl)
|
||||
|
||||
|
||||
def _translate_x_impl(pil_img, level):
|
||||
"""Applies PIL TranslateX to `pil_img`.
|
||||
|
||||
Translate the image in the horizontal direction by `level`
|
||||
number of pixels.
|
||||
|
||||
Args:
|
||||
pil_img: Image in PIL object.
|
||||
level: Strength of the operation specified as an Integer from
|
||||
[0, `PARAMETER_MAX`].
|
||||
|
||||
Returns:
|
||||
A PIL Image that has had TranslateX applied to it.
|
||||
"""
|
||||
level = int_parameter(level, 10)
|
||||
if random.random() > 0.5:
|
||||
level = -level
|
||||
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, level, 0, 1, 0))
|
||||
|
||||
|
||||
translate_x = TransformT('TranslateX', _translate_x_impl)
|
||||
|
||||
|
||||
def _translate_y_impl(pil_img, level):
|
||||
"""Applies PIL TranslateY to `pil_img`.
|
||||
|
||||
Translate the image in the vertical direction by `level`
|
||||
number of pixels.
|
||||
|
||||
Args:
|
||||
pil_img: Image in PIL object.
|
||||
level: Strength of the operation specified as an Integer from
|
||||
[0, `PARAMETER_MAX`].
|
||||
|
||||
Returns:
|
||||
A PIL Image that has had TranslateY applied to it.
|
||||
"""
|
||||
level = int_parameter(level, 10)
|
||||
if random.random() > 0.5:
|
||||
level = -level
|
||||
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, 0, 1, level))
|
||||
|
||||
|
||||
translate_y = TransformT('TranslateY', _translate_y_impl)
|
||||
|
||||
|
||||
def _crop_impl(pil_img, level, interpolation=Image.BILINEAR):
|
||||
"""Applies a crop to `pil_img` with the size depending on the `level`."""
|
||||
cropped = pil_img.crop((level, level, IMAGE_SIZE - level, IMAGE_SIZE - level))
|
||||
resized = cropped.resize((IMAGE_SIZE, IMAGE_SIZE), interpolation)
|
||||
return resized
|
||||
|
||||
|
||||
crop_bilinear = TransformT('CropBilinear', _crop_impl)
|
||||
|
||||
|
||||
def _solarize_impl(pil_img, level):
|
||||
"""Applies PIL Solarize to `pil_img`.
|
||||
|
||||
Translate the image in the vertical direction by `level`
|
||||
number of pixels.
|
||||
|
||||
Args:
|
||||
pil_img: Image in PIL object.
|
||||
level: Strength of the operation specified as an Integer from
|
||||
[0, `PARAMETER_MAX`].
|
||||
|
||||
Returns:
|
||||
A PIL Image that has had Solarize applied to it.
|
||||
"""
|
||||
level = int_parameter(level, 256)
|
||||
return ImageOps.solarize(pil_img.convert('RGB'), 256 - level).convert('RGBA')
|
||||
|
||||
|
||||
solarize = TransformT('Solarize', _solarize_impl)
|
||||
|
||||
|
||||
def _cutout_pil_impl(pil_img, level):
|
||||
"""Apply cutout to pil_img at the specified level."""
|
||||
size = int_parameter(level, 20)
|
||||
if size <= 0:
|
||||
return pil_img
|
||||
img_height, img_width, num_channels = (IMAGE_SIZE, IMAGE_SIZE, 3)
|
||||
_, upper_coord, lower_coord = (
|
||||
create_cutout_mask(img_height, img_width, num_channels, size))
|
||||
pixels = pil_img.load() # create the pixel map
|
||||
for i in range(upper_coord[0], lower_coord[0]): # for every col:
|
||||
for j in range(upper_coord[1], lower_coord[1]): # For every row
|
||||
pixels[i, j] = (125, 122, 113, 0) # set the colour accordingly
|
||||
return pil_img
|
||||
|
||||
cutout = TransformT('Cutout', _cutout_pil_impl)
|
||||
|
||||
|
||||
def _enhancer_impl(enhancer):
|
||||
"""Sets level to be between 0.1 and 1.8 for ImageEnhance transforms of PIL."""
|
||||
def impl(pil_img, level):
|
||||
v = float_parameter(level, 1.8) + .1 # going to 0 just destroys it
|
||||
return enhancer(pil_img).enhance(v)
|
||||
return impl
|
||||
|
||||
|
||||
color = TransformT('Color', _enhancer_impl(ImageEnhance.Color))
|
||||
contrast = TransformT('Contrast', _enhancer_impl(ImageEnhance.Contrast))
|
||||
brightness = TransformT('Brightness', _enhancer_impl(
|
||||
ImageEnhance.Brightness))
|
||||
sharpness = TransformT('Sharpness', _enhancer_impl(ImageEnhance.Sharpness))
|
||||
|
||||
ALL_TRANSFORMS = [
|
||||
flip_lr,
|
||||
flip_ud,
|
||||
auto_contrast,
|
||||
equalize,
|
||||
invert,
|
||||
rotate,
|
||||
posterize,
|
||||
crop_bilinear,
|
||||
solarize,
|
||||
color,
|
||||
contrast,
|
||||
brightness,
|
||||
sharpness,
|
||||
shear_x,
|
||||
shear_y,
|
||||
translate_x,
|
||||
translate_y,
|
||||
cutout,
|
||||
blur,
|
||||
smooth
|
||||
]
|
||||
|
||||
NAME_TO_TRANSFORM = {t.name: t for t in ALL_TRANSFORMS}
|
||||
TRANSFORM_NAMES = NAME_TO_TRANSFORM.keys()
|
131
FAR-HO/blue_utils.py
Normal file
|
@ -0,0 +1,131 @@
|
|||
import matplotlib.pyplot as plt
|
||||
from far_ho.examples.datasets import Datasets, Dataset
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
import augmentation_transforms as augmentation_transforms ##### ATTENTION FICHIER EN DOUBLE => A REGLER MIEUX ####
|
||||
|
||||
def viz_data(dataset, fig_name='data_sample',aug_policy=None):
|
||||
|
||||
plt.figure(figsize=(10,10))
|
||||
for i in range(25):
|
||||
plt.subplot(5,5,i+1)
|
||||
plt.xticks([])
|
||||
plt.yticks([])
|
||||
plt.grid(False)
|
||||
|
||||
img = dataset.data[i][:,:,0]
|
||||
if aug_policy :
|
||||
img = augment_img(img,aug_policy)
|
||||
#print('im shape',img.shape)
|
||||
plt.imshow(img, cmap=plt.cm.binary)
|
||||
plt.xlabel(np.nonzero(dataset.target[i])[0].item())
|
||||
|
||||
plt.savefig(fig_name)
|
||||
|
||||
def augment_img(data, policy):
|
||||
|
||||
#print('Im shape',data.shape)
|
||||
data = np.stack((data,)*3, axis=-1) #BOF BOF juste pour forcer 3 channels
|
||||
#print('Im shape',data.shape)
|
||||
final_img = augmentation_transforms.apply_policy(policy, data)
|
||||
#final_img = augmentation_transforms.random_flip(augmentation_transforms.zero_pad_and_crop(final_img, 4))
|
||||
# Apply cutout
|
||||
#final_img = augmentation_transforms.cutout_numpy(final_img)
|
||||
|
||||
im_rgb = np.array(final_img, np.float32)
|
||||
im_gray = np.dot(im_rgb[...,:3], [0.2989, 0.5870, 0.1140]) #Just pour retourner a 1 channel
|
||||
|
||||
return im_gray
|
||||
|
||||
|
||||
### https://www.kaggle.com/raoulma/mnist-image-class-tensorflow-cnn-99-51-test-acc#5.-Build-the-neural-network-with-tensorflow-
|
||||
## build the neural network class
|
||||
# weight initialization
|
||||
def weight_variable(shape, name = None):
|
||||
initial = tf.truncated_normal(shape, stddev=0.1)
|
||||
return tf.Variable(initial, name = name)
|
||||
|
||||
# bias initialization
|
||||
def bias_variable(shape, name = None):
|
||||
initial = tf.constant(0.1, shape=shape) # positive bias
|
||||
return tf.Variable(initial, name = name)
|
||||
|
||||
# 2D convolution
|
||||
def conv2d(x, W, name = None):
|
||||
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name = name)
|
||||
|
||||
# max pooling
|
||||
def max_pool_2x2(x, name = None):
|
||||
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
|
||||
padding='SAME', name = name)
|
||||
|
||||
def cnn(x_data_tf,y_data_tf, name='model'):
|
||||
# tunable hyperparameters for nn architecture
|
||||
s_f_conv1 = 3; # filter size of first convolution layer (default = 3)
|
||||
n_f_conv1 = 36; # number of features of first convolution layer (default = 36)
|
||||
s_f_conv2 = 3; # filter size of second convolution layer (default = 3)
|
||||
n_f_conv2 = 36; # number of features of second convolution layer (default = 36)
|
||||
s_f_conv3 = 3; # filter size of third convolution layer (default = 3)
|
||||
n_f_conv3 = 36; # number of features of third convolution layer (default = 36)
|
||||
n_n_fc1 = 576; # number of neurons of first fully connected layer (default = 576)
|
||||
|
||||
# 1.layer: convolution + max pooling
|
||||
W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, 1, n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
|
||||
b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
|
||||
h_conv1_tf = tf.nn.relu(conv2d(x_data_tf,
|
||||
W_conv1_tf) + b_conv1_tf,
|
||||
name = 'h_conv1_tf') # (.,28,28,32)
|
||||
h_pool1_tf = max_pool_2x2(h_conv1_tf,
|
||||
name = 'h_pool1_tf') # (.,14,14,32)
|
||||
|
||||
# 2.layer: convolution + max pooling
|
||||
W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2,
|
||||
n_f_conv1, n_f_conv2],
|
||||
name = 'W_conv2_tf')
|
||||
b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
|
||||
h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf,
|
||||
W_conv2_tf) + b_conv2_tf,
|
||||
name ='h_conv2_tf') #(.,14,14,32)
|
||||
h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
|
||||
|
||||
# 3.layer: convolution + max pooling
|
||||
W_conv3_tf = weight_variable([s_f_conv3, s_f_conv3,
|
||||
n_f_conv2, n_f_conv3],
|
||||
name = 'W_conv3_tf')
|
||||
b_conv3_tf = bias_variable([n_f_conv3], name = 'b_conv3_tf')
|
||||
h_conv3_tf = tf.nn.relu(conv2d(h_pool2_tf,
|
||||
W_conv3_tf) + b_conv3_tf,
|
||||
name = 'h_conv3_tf') #(.,7,7,32)
|
||||
h_pool3_tf = max_pool_2x2(h_conv3_tf,
|
||||
name = 'h_pool3_tf') # (.,4,4,32)
|
||||
|
||||
# 4.layer: fully connected
|
||||
W_fc1_tf = weight_variable([4*4*n_f_conv3,n_n_fc1],
|
||||
name = 'W_fc1_tf') # (4*4*32, 1024)
|
||||
b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
|
||||
h_pool3_flat_tf = tf.reshape(h_pool3_tf, [-1,4*4*n_f_conv3],
|
||||
name = 'h_pool3_flat_tf') # (.,1024)
|
||||
h_fc1_tf = tf.nn.relu(tf.matmul(h_pool3_flat_tf,
|
||||
W_fc1_tf) + b_fc1_tf,
|
||||
name = 'h_fc1_tf') # (.,1024)
|
||||
|
||||
# add dropout
|
||||
#keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
|
||||
#h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
|
||||
|
||||
# 5.layer: fully connected
|
||||
W_fc2_tf = weight_variable([n_n_fc1, 10], name = 'W_fc2_tf')
|
||||
b_fc2_tf = bias_variable([10], name = 'b_fc2_tf')
|
||||
z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf),
|
||||
b_fc2_tf, name = 'z_pred_tf')# => (.,10)
|
||||
# predicted probabilities in one-hot encoding
|
||||
y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf')
|
||||
|
||||
# tensor of correct predictions
|
||||
y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
|
||||
tf.argmax(y_data_tf, 1),
|
||||
name = 'y_pred_correct_tf')
|
||||
return y_pred_proba_tf
|
166
FAR-HO/far_pba_cifar.py
Normal file
|
@ -0,0 +1,166 @@
|
|||
#https://github.com/arcelien/pba/blob/master/autoaugment/train_cifar.py
|
||||
from __future__ import absolute_import, print_function, division
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
#import tensorflow.contrib.layers as layers
|
||||
import far_ho as far
|
||||
import far_ho.examples as far_ex
|
||||
#import pprint
|
||||
|
||||
import autoaugment.augmentation_transforms as augmentation_transforms
|
||||
#import autoaugment.policies as found_policies
|
||||
from autoaugment.wrn import build_wrn_model
|
||||
|
||||
|
||||
def build_model(inputs, num_classes, is_training, hparams):
|
||||
"""Constructs the vision model being trained/evaled.
|
||||
Args:
|
||||
inputs: input features/images being fed to the image model build built.
|
||||
num_classes: number of output classes being predicted.
|
||||
is_training: is the model training or not.
|
||||
hparams: additional hyperparameters associated with the image model.
|
||||
Returns:
|
||||
The logits of the image model.
|
||||
"""
|
||||
scopes = setup_arg_scopes(is_training)
|
||||
with contextlib.nested(*scopes):
|
||||
if hparams.model_name == 'pyramid_net':
|
||||
logits = build_shake_drop_model(
|
||||
inputs, num_classes, is_training)
|
||||
elif hparams.model_name == 'wrn':
|
||||
logits = build_wrn_model(
|
||||
inputs, num_classes, hparams.wrn_size)
|
||||
elif hparams.model_name == 'shake_shake':
|
||||
logits = build_shake_shake_model(
|
||||
inputs, num_classes, hparams, is_training)
|
||||
return logits
|
||||
|
||||
|
||||
class CifarModel(object):
|
||||
"""Builds an image model for Cifar10/Cifar100."""
|
||||
|
||||
def __init__(self, hparams):
|
||||
self.hparams = hparams
|
||||
|
||||
def build(self, mode):
|
||||
"""Construct the cifar model."""
|
||||
assert mode in ['train', 'eval']
|
||||
self.mode = mode
|
||||
self._setup_misc(mode)
|
||||
self._setup_images_and_labels()
|
||||
self._build_graph(self.images, self.labels, mode)
|
||||
|
||||
self.init = tf.group(tf.global_variables_initializer(),
|
||||
tf.local_variables_initializer())
|
||||
|
||||
def _setup_misc(self, mode):
|
||||
"""Sets up miscellaneous in the cifar model constructor."""
|
||||
self.lr_rate_ph = tf.Variable(0.0, name='lrn_rate', trainable=False)
|
||||
self.reuse = None if (mode == 'train') else True
|
||||
self.batch_size = self.hparams.batch_size
|
||||
if mode == 'eval':
|
||||
self.batch_size = 25
|
||||
|
||||
def _setup_images_and_labels(self):
|
||||
"""Sets up image and label placeholders for the cifar model."""
|
||||
if FLAGS.dataset == 'cifar10':
|
||||
self.num_classes = 10
|
||||
else:
|
||||
self.num_classes = 100
|
||||
self.images = tf.placeholder(tf.float32, [self.batch_size, 32, 32, 3])
|
||||
self.labels = tf.placeholder(tf.float32,
|
||||
[self.batch_size, self.num_classes])
|
||||
|
||||
def assign_epoch(self, session, epoch_value):
|
||||
session.run(self._epoch_update, feed_dict={self._new_epoch: epoch_value})
|
||||
|
||||
def _build_graph(self, images, labels, mode):
|
||||
"""Constructs the TF graph for the cifar model.
|
||||
Args:
|
||||
images: A 4-D image Tensor
|
||||
labels: A 2-D labels Tensor.
|
||||
mode: string indicating training mode ( e.g., 'train', 'valid', 'test').
|
||||
"""
|
||||
is_training = 'train' in mode
|
||||
if is_training:
|
||||
self.global_step = tf.train.get_or_create_global_step()
|
||||
|
||||
logits = build_model(
|
||||
images,
|
||||
self.num_classes,
|
||||
is_training,
|
||||
self.hparams)
|
||||
self.predictions, self.cost = helper_utils.setup_loss(
|
||||
logits, labels)
|
||||
self.accuracy, self.eval_op = tf.metrics.accuracy(
|
||||
tf.argmax(labels, 1), tf.argmax(self.predictions, 1))
|
||||
self._calc_num_trainable_params()
|
||||
|
||||
# Adds L2 weight decay to the cost
|
||||
self.cost = helper_utils.decay_weights(self.cost,
|
||||
self.hparams.weight_decay_rate)
|
||||
#### Attention: differe implem originale
|
||||
|
||||
self.init = tf.group(tf.global_variables_initializer(),
|
||||
tf.local_variables_initializer())
|
||||
|
||||
|
||||
########################################################
|
||||
|
||||
######## PBA ############
|
||||
|
||||
#Parallele Cifar model trainer
|
||||
tf.flags.DEFINE_string('model_name', 'wrn',
|
||||
'wrn, shake_shake_32, shake_shake_96, shake_shake_112, '
|
||||
'pyramid_net')
|
||||
tf.flags.DEFINE_string('checkpoint_dir', '/tmp/training', 'Training Directory.')
|
||||
tf.flags.DEFINE_string('data_path', '/tmp/data',
|
||||
'Directory where dataset is located.')
|
||||
tf.flags.DEFINE_string('dataset', 'cifar10',
|
||||
'Dataset to train with. Either cifar10 or cifar100')
|
||||
tf.flags.DEFINE_integer('use_cpu', 1, '1 if use CPU, else GPU.')
|
||||
## ???
|
||||
|
||||
FLAGS = tf.flags.FLAGS
|
||||
FLAGS.dataset
|
||||
FLAGS.data_path
|
||||
FLAGS.model_name = 'wrn'
|
||||
|
||||
hparams = tf.contrib.training.HParams(
|
||||
train_size=50000,
|
||||
validation_size=0,
|
||||
eval_test=1,
|
||||
dataset=FLAGS.dataset,
|
||||
data_path=FLAGS.data_path,
|
||||
batch_size=128,
|
||||
gradient_clipping_by_global_norm=5.0)
|
||||
if FLAGS.model_name == 'wrn':
|
||||
hparams.add_hparam('model_name', 'wrn')
|
||||
hparams.add_hparam('num_epochs', 200)
|
||||
hparams.add_hparam('wrn_size', 160)
|
||||
hparams.add_hparam('lr', 0.1)
|
||||
hparams.add_hparam('weight_decay_rate', 5e-4)
|
||||
|
||||
data_loader = data_utils.DataSet(hparams)
|
||||
data_loader.reset()
|
||||
|
||||
with tf.Graph().as_default(): #, tf.device('/cpu:0' if FLAGS.use_cpu else '/gpu:0'):
|
||||
"""Builds the image models for train and eval."""
|
||||
# Determine if we should build the train and eval model. When using
|
||||
# distributed training we only want to build one or the other and not both.
|
||||
with tf.variable_scope('model', use_resource=False):
|
||||
m = CifarModel(self.hparams)
|
||||
m.build('train')
|
||||
#self._num_trainable_params = m.num_trainable_params
|
||||
#self._saver = m.saver
|
||||
#with tf.variable_scope('model', reuse=True, use_resource=False):
|
||||
# meval = CifarModel(self.hparams)
|
||||
# meval.build('eval')
|
||||
|
||||
|
||||
##### FAR-HO ####
|
||||
for _ in range(n_hyper_iterations):
|
||||
|
||||
|
92
FAR-HO/test.py
Normal file
|
@ -0,0 +1,92 @@
|
|||
import os
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import tensorflow.contrib.layers as layers
|
||||
import far_ho as far
|
||||
import far_ho.examples as far_ex
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
sess = tf.InteractiveSession()
|
||||
|
||||
|
||||
def get_data():
|
||||
# load a small portion of mnist data
|
||||
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=(.1, .1,))
|
||||
return datasets.train, datasets.validation
|
||||
|
||||
|
||||
def g_logits(x,y):
|
||||
with tf.variable_scope('model'):
|
||||
h1 = layers.fully_connected(x, 300)
|
||||
logits = layers.fully_connected(h1, int(y.shape[1]))
|
||||
return logits
|
||||
|
||||
|
||||
x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
|
||||
y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
|
||||
logits = g_logits(x,y)
|
||||
train_set, validation_set = get_data()
|
||||
|
||||
lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
|
||||
lr = far.get_hyperparameter('lr', initializer=0.01)
|
||||
|
||||
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
|
||||
L = tf.reduce_mean(tf.sigmoid(lambdas)*ce)
|
||||
E = tf.reduce_mean(ce)
|
||||
|
||||
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
|
||||
|
||||
inner_optimizer = far.GradientDescentOptimizer(lr)
|
||||
outer_optimizer = tf.train.AdamOptimizer()
|
||||
rev_it =10
|
||||
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
|
||||
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
|
||||
|
||||
T = 20 # Number of inner iterations
|
||||
train_set_supplier = train_set.create_supplier(x, y)
|
||||
validation_set_supplier = validation_set.create_supplier(x, y)
|
||||
tf.global_variables_initializer().run()
|
||||
|
||||
print('inner:', L.eval(train_set_supplier()))
|
||||
print('outer:', E.eval(validation_set_supplier()))
|
||||
# print('-'*50)
|
||||
n_hyper_iterations = 200
|
||||
inner_losses = []
|
||||
outer_losses = []
|
||||
train_accs = []
|
||||
val_accs = []
|
||||
|
||||
for _ in range(n_hyper_iterations):
|
||||
hyper_step(T,
|
||||
inner_objective_feed_dicts=train_set_supplier,
|
||||
outer_objective_feed_dicts=validation_set_supplier)
|
||||
|
||||
inner_obj = L.eval(train_set_supplier())
|
||||
outer_obj = E.eval(validation_set_supplier())
|
||||
inner_losses.append(inner_obj)
|
||||
outer_losses.append(outer_obj)
|
||||
print('inner:', inner_obj)
|
||||
print('outer:', outer_obj)
|
||||
|
||||
train_acc = accuracy.eval(train_set_supplier())
|
||||
val_acc = accuracy.eval(validation_set_supplier())
|
||||
train_accs.append(train_acc)
|
||||
val_accs.append(val_acc)
|
||||
print('training accuracy', train_acc)
|
||||
print('validation accuracy', val_acc)
|
||||
|
||||
print('learning rate', lr.eval())
|
||||
print('norm of examples weight', tf.norm(lambdas).eval())
|
||||
print('-'*50)
|
||||
|
||||
plt.subplot(211)
|
||||
plt.plot(inner_losses, label='training loss')
|
||||
plt.plot(outer_losses, label='validation loss')
|
||||
plt.legend(loc=0, frameon=True)
|
||||
#plt.xlim(0, 19)
|
||||
plt.subplot(212)
|
||||
plt.plot(train_accs, label='training accuracy')
|
||||
plt.plot(val_accs, label='validation accuracy')
|
||||
plt.legend(loc=0, frameon=True)
|
||||
|
||||
plt.savefig('H%d - I%d - R%d'%(n_hyper_iterations,T,rev_it))
|
126
FAR-HO/test_cnn.py
Normal file
|
@ -0,0 +1,126 @@
|
|||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import tensorflow.contrib.layers as layers
|
||||
import far_ho as far
|
||||
import far_ho.examples as far_ex
|
||||
|
||||
tf.logging.set_verbosity(tf.logging.ERROR)
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import blue_utils as butil
|
||||
|
||||
#Reset
|
||||
try:
|
||||
sess.close()
|
||||
except: pass
|
||||
rnd = np.random.RandomState(1)
|
||||
tf.reset_default_graph()
|
||||
sess = tf.InteractiveSession()
|
||||
|
||||
def get_data(data_split):
|
||||
# load a small portion of mnist data
|
||||
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False)
|
||||
print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target)
|
||||
[print("Nb samples : ", d.num_examples) for d in datasets]
|
||||
return datasets.train, datasets.validation, datasets.test
|
||||
|
||||
#Model
|
||||
# FC : reshape = True
|
||||
def g_logits(x,y, name='model'):
|
||||
with tf.variable_scope(name):
|
||||
h1 = layers.fully_connected(x, 300)
|
||||
logits = layers.fully_connected(h1, int(y.shape[1]))
|
||||
return logits
|
||||
|
||||
#### Hyper-parametres ####
|
||||
n_hyper_iterations = 500
|
||||
T = 20 # Number of inner iterations
|
||||
rev_it =10
|
||||
hp_lr = 1.e-3
|
||||
##########################
|
||||
|
||||
#MNIST
|
||||
#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
|
||||
#y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
|
||||
#logits = g_logits(x, y)
|
||||
|
||||
#CNN : reshape = False
|
||||
x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
|
||||
y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
|
||||
|
||||
logits = butil.cnn(x,y)
|
||||
|
||||
train_set, validation_set, test_set = get_data(data_split=(.05, .05,))
|
||||
|
||||
butil.viz_data(train_set)
|
||||
print('Data sampled !')
|
||||
|
||||
# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
|
||||
#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, .1), 1.e-7))
|
||||
#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5))
|
||||
#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.))
|
||||
lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4))
|
||||
mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9))
|
||||
rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.00001), 0.00001))
|
||||
|
||||
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
|
||||
L = tf.reduce_mean(ce) + rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles
|
||||
E = tf.reduce_mean(ce)
|
||||
|
||||
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
|
||||
|
||||
inner_optimizer = far.MomentumOptimizer(lr, mu)
|
||||
outer_optimizer = tf.train.AdamOptimizer(hp_lr)
|
||||
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
|
||||
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
|
||||
|
||||
train_set_supplier = train_set.create_supplier(x, y, batch_size=256) # stochastic GD
|
||||
validation_set_supplier = validation_set.create_supplier(x, y)
|
||||
|
||||
his_params = []
|
||||
|
||||
tf.global_variables_initializer().run()
|
||||
|
||||
for hyt in range(n_hyper_iterations):
|
||||
hyper_step(T,
|
||||
inner_objective_feed_dicts=train_set_supplier,
|
||||
outer_objective_feed_dicts=validation_set_supplier)
|
||||
res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()),
|
||||
E.eval(validation_set_supplier()),
|
||||
accuracy.eval(train_set_supplier()),
|
||||
accuracy.eval(validation_set_supplier())]
|
||||
his_params.append(res)
|
||||
|
||||
print('Hyper-it :',hyt,'/',n_hyper_iterations)
|
||||
print('inner:', L.eval(train_set_supplier()))
|
||||
print('outer:', E.eval(validation_set_supplier()))
|
||||
print('training accuracy:', res[5])
|
||||
print('validation accuracy:', res[6])
|
||||
#print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
|
||||
print('-'*50)
|
||||
|
||||
test_set_supplier = test_set.create_supplier(x, y)
|
||||
print('Test accuracy:',accuracy.eval(test_set_supplier()))
|
||||
|
||||
fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
|
||||
ax[0].set_title('Learning rate')
|
||||
ax[0].plot([e[0] for e in his_params])
|
||||
|
||||
ax[1].set_title('Momentum factor')
|
||||
ax[1].plot([e[1] for e in his_params])
|
||||
|
||||
#ax[2].set_title('L2 regulariz.')
|
||||
#ax[2].plot([e[2] for e in his_params])
|
||||
ax[2].set_title('Tr. and val. acc')
|
||||
ax[2].plot([e[5] for e in his_params])
|
||||
ax[2].plot([e[6] for e in his_params])
|
||||
|
||||
ax[3].set_title('Tr. and val. errors')
|
||||
ax[3].plot([e[3] for e in his_params])
|
||||
ax[3].plot([e[4] for e in his_params])
|
||||
|
||||
plt.savefig('res_cnn_H{}_I{}'.format(n_hyper_iterations,T))
|
141
FAR-HO/test_cnn_aug.py
Normal file
|
@ -0,0 +1,141 @@
|
|||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import tensorflow.contrib.layers as layers
|
||||
import far_ho as far
|
||||
import far_ho.examples as far_ex
|
||||
|
||||
tf.logging.set_verbosity(tf.logging.ERROR)
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import blue_utils as butil
|
||||
|
||||
#Reset
|
||||
try:
|
||||
sess.close()
|
||||
except: pass
|
||||
rnd = np.random.RandomState(1)
|
||||
tf.reset_default_graph()
|
||||
sess = tf.InteractiveSession()
|
||||
|
||||
def get_data(data_split):
|
||||
# load a small portion of mnist data
|
||||
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False)
|
||||
print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target)
|
||||
[print("Nb samples : ", d.num_examples) for d in datasets]
|
||||
return datasets.train, datasets.validation, datasets.test
|
||||
|
||||
#Model
|
||||
# FC : reshape = True
|
||||
def g_logits(x,y, name='model'):
|
||||
with tf.variable_scope(name):
|
||||
h1 = layers.fully_connected(x, 300)
|
||||
logits = layers.fully_connected(h1, int(y.shape[1]))
|
||||
return logits
|
||||
|
||||
#### Hyper-parametres ####
|
||||
n_hyper_iterations = 10
|
||||
T = 10 # Number of inner iterations
|
||||
rev_it =10
|
||||
hp_lr = 0.02
|
||||
##########################
|
||||
|
||||
#MNIST
|
||||
#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
|
||||
#y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
|
||||
#logits = g_logits(x, y)
|
||||
|
||||
#CNN : reshape = False
|
||||
x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
|
||||
y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
|
||||
|
||||
logits = butil.cnn(x,y)
|
||||
|
||||
train_set, validation_set, test_set = get_data(data_split=(.1, .1,))
|
||||
|
||||
probX = far.get_hyperparameter('probX', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9))
|
||||
probY = far.get_hyperparameter('probY', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9))
|
||||
|
||||
#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4))
|
||||
#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9))
|
||||
|
||||
#probX, probY = 0.5, 0.5
|
||||
#policy = [('TranslateX', probX, 8), ('TranslateY', probY, 8)]
|
||||
policy = [('TranslateX', probX, 8), ('FlipUD', probY, 8)]
|
||||
print('Hyp :',far.utils.hyperparameters(scope=None))
|
||||
|
||||
#butil.viz_data(train_set, aug_policy= policy)
|
||||
#print('Data sampled !')
|
||||
|
||||
#Ajout artificiel des transfo a la loss juste pour qu il soit compter dans la dynamique du graph
|
||||
probX_loss = tf.sigmoid(probX)
|
||||
probY_loss = tf.sigmoid(probY)
|
||||
|
||||
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
|
||||
L = tf.reduce_mean(probX_loss*probY_loss*ce)
|
||||
E = tf.reduce_mean(ce)
|
||||
|
||||
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
|
||||
|
||||
inner_optimizer = far.AdamOptimizer()
|
||||
outer_optimizer = tf.train.AdamOptimizer(hp_lr)
|
||||
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
|
||||
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
|
||||
|
||||
train_set_supplier = train_set.create_supplier(x, y, batch_size=256, aug_policy=policy) # stochastic GD
|
||||
validation_set_supplier = validation_set.create_supplier(x, y)
|
||||
|
||||
#print(train_set.dim_data,validation_set.dim_data)
|
||||
|
||||
his_params = []
|
||||
|
||||
tf.global_variables_initializer().run()
|
||||
|
||||
butil.viz_data(train_set, fig_name= 'Start_sample',aug_policy= policy)
|
||||
print('Data sampled !')
|
||||
|
||||
for hyt in range(n_hyper_iterations):
|
||||
hyper_step(T,
|
||||
inner_objective_feed_dicts=train_set_supplier,
|
||||
outer_objective_feed_dicts=validation_set_supplier,
|
||||
_skip_hyper_ts=True)
|
||||
res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()),
|
||||
E.eval(validation_set_supplier()),
|
||||
accuracy.eval(train_set_supplier()),
|
||||
accuracy.eval(validation_set_supplier())]
|
||||
his_params.append(res)
|
||||
|
||||
butil.viz_data(train_set, fig_name= 'Train_sample_{}'.format(hyt),aug_policy= policy)
|
||||
print('Data sampled !')
|
||||
|
||||
print('Hyper-it :',hyt,'/',n_hyper_iterations)
|
||||
print('inner:', L.eval(train_set_supplier()))
|
||||
print('outer:', E.eval(validation_set_supplier()))
|
||||
print('training accuracy:', res[4])
|
||||
print('validation accuracy:', res[5])
|
||||
print('Transformation : ProbX -',res[0],'/ProbY -',res[1])
|
||||
#print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
|
||||
print('-'*50)
|
||||
|
||||
test_set_supplier = test_set.create_supplier(x, y)
|
||||
print('Test accuracy:',accuracy.eval(test_set_supplier()))
|
||||
|
||||
fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
|
||||
ax[0].set_title('ProbX')
|
||||
ax[0].plot([e[0] for e in his_params])
|
||||
|
||||
ax[1].set_title('ProbY')
|
||||
ax[1].plot([e[1] for e in his_params])
|
||||
|
||||
ax[2].set_title('Tr. and val. errors')
|
||||
ax[2].plot([e[2] for e in his_params])
|
||||
ax[2].plot([e[3] for e in his_params])
|
||||
|
||||
ax[3].set_title('Tr. and val. acc')
|
||||
ax[3].plot([e[4] for e in his_params])
|
||||
ax[3].plot([e[5] for e in his_params])
|
||||
|
||||
plt.savefig('res_cnn_aug_H{}_I{}'.format(n_hyper_iterations,T))
|
133
FAR-HO/test_fc.py
Normal file
|
@ -0,0 +1,133 @@
|
|||
#https://github.com/lucfra/FAR-HO/blob/master/far_ho/examples/autoMLDemos/Far-HO%20Demo%2C%20AutoML%202018%2C%20ICML%20workshop.ipynb
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import tensorflow.contrib.layers as layers
|
||||
import far_ho as far
|
||||
import far_ho.examples as far_ex
|
||||
|
||||
tf.logging.set_verbosity(tf.logging.ERROR)
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
#import blue_utils as butil
|
||||
|
||||
#Reset
|
||||
try:
|
||||
sess.close()
|
||||
except: pass
|
||||
rnd = np.random.RandomState(1)
|
||||
tf.reset_default_graph()
|
||||
sess = tf.InteractiveSession()
|
||||
|
||||
def get_data(data_split):
|
||||
# load a small portion of mnist data
|
||||
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=True)
|
||||
print("Data shape : ", datasets.train.dim_data, " / Label shape : ", datasets.train.dim_target)
|
||||
[print("Nb samples : ", d.num_examples) for d in datasets]
|
||||
return datasets.train, datasets.validation, datasets.test
|
||||
|
||||
#Model
|
||||
# FC : reshape = True
|
||||
def g_logits(x,y, name='model'):
|
||||
with tf.variable_scope(name):
|
||||
h1 = layers.fully_connected(x, 300)
|
||||
logits = layers.fully_connected(h1, int(y.shape[1]))
|
||||
return logits
|
||||
|
||||
#### Hyper-parametres ####
|
||||
n_hyper_iterations = 90
|
||||
T = 20 # Number of inner iterations
|
||||
rev_it =10
|
||||
hp_lr = 0.1
|
||||
epochs =10
|
||||
batch_size = 256
|
||||
##########################
|
||||
|
||||
#MNIST
|
||||
x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
|
||||
y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
|
||||
logits = g_logits(x, y)
|
||||
|
||||
#CNN : reshape = False
|
||||
#x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
|
||||
#y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
|
||||
|
||||
#logits = butil.cnn(x,y)
|
||||
|
||||
train_set, validation_set, test_set = get_data(data_split=(.6, .3,))
|
||||
|
||||
#butil.viz_data(train_set)
|
||||
|
||||
# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
|
||||
lr = far.get_hyperparameter('lr', initializer=1e-2, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 1.e-7))
|
||||
mu = far.get_hyperparameter('mu', initializer=0.95, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5))
|
||||
#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.))
|
||||
|
||||
|
||||
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
|
||||
L = tf.reduce_mean(ce) #+ rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles
|
||||
E = tf.reduce_mean(ce)
|
||||
|
||||
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
|
||||
|
||||
inner_optimizer = far.MomentumOptimizer(lr, mu)
|
||||
#inner_optimizer = far.GradientDescentOptimizer(lr)
|
||||
outer_optimizer = tf.train.AdamOptimizer(hp_lr)
|
||||
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
|
||||
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)#, global_step=tf.train.get_or_create_step())
|
||||
|
||||
train_set_supplier = train_set.create_supplier(x, y, batch_size=batch_size)#, epochs=1) # stochastic GD
|
||||
validation_set_supplier = validation_set.create_supplier(x, y)
|
||||
|
||||
|
||||
print('Hyper iterations par epochs',int(train_set.num_examples/batch_size*epochs/T))
|
||||
|
||||
his_params = []
|
||||
|
||||
tf.global_variables_initializer().run()
|
||||
|
||||
for hyt in range(n_hyper_iterations):
|
||||
hyper_step(T,
|
||||
inner_objective_feed_dicts=train_set_supplier,
|
||||
outer_objective_feed_dicts=validation_set_supplier,
|
||||
_skip_hyper_ts=False)
|
||||
res = sess.run(far.hyperparameters()) + [0, L.eval(train_set_supplier()),
|
||||
E.eval(validation_set_supplier()),
|
||||
accuracy.eval(train_set_supplier()),
|
||||
accuracy.eval(validation_set_supplier())]
|
||||
|
||||
his_params.append(res)
|
||||
|
||||
print('Hyper-it :',hyt,'/',n_hyper_iterations)
|
||||
print('inner:', res[3])
|
||||
print('outer:', res[4])
|
||||
print('training accuracy:', res[5])
|
||||
print('validation accuracy:', res[6])
|
||||
#print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
|
||||
print('-'*50)
|
||||
|
||||
test_set_supplier = test_set.create_supplier(x, y)
|
||||
print('Test accuracy:',accuracy.eval(test_set_supplier()))
|
||||
|
||||
fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
|
||||
ax[0].set_title('Learning rate')
|
||||
ax[0].plot([e[0] for e in his_params])
|
||||
|
||||
ax[1].set_title('Momentum factor')
|
||||
ax[1].plot([e[1] for e in his_params])
|
||||
|
||||
#ax[2].set_title('L2 regulariz.')
|
||||
#ax[2].plot([e[2] for e in his_params])
|
||||
ax[2].set_title('Tr. and val. acc')
|
||||
ax[2].plot([e[5] for e in his_params])
|
||||
ax[2].plot([e[6] for e in his_params])
|
||||
|
||||
ax[3].set_title('Tr. and val. errors')
|
||||
ax[3].plot([e[3] for e in his_params])
|
||||
ax[3].plot([e[4] for e in his_params])
|
||||
|
||||
plt.savefig('resultats/res_fc_H{}_I{}'.format(n_hyper_iterations,T))
|
||||
#plt.savefig('resultats/res_fc_H{}_I{}_noHyp'.format(n_hyper_iterations,T))
|
5
Gradient-Descent-The-Ultimate-Optimizer/.gitignore
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
venv/
|
||||
__pycache__
|
||||
data/
|
||||
log/
|
||||
.vscode/
|
BIN
Gradient-Descent-The-Ultimate-Optimizer/20190929-paper.pdf
Normal file
33
Gradient-Descent-The-Ultimate-Optimizer/README.md
Normal file
|
@ -0,0 +1,33 @@
|
|||
# Gradient Descent: The Ultimate Optimizer
|
||||
|
||||
[](https://github.com/ambv/black)
|
||||
|
||||
| ⚠️ WARNING: THIS IS NOT MY WORK ⚠️ |
|
||||
| --- |
|
||||
|
||||
This repository contains the paper and code to the paper [Gradient Descent:
|
||||
The Ultimate Optimizer](https://arxiv.org/abs/1909.13371).
|
||||
|
||||
I couldn't find the code (which is found in the appendix at the end of the
|
||||
paper) anywhere on the web. What I present here is the code of the paper with
|
||||
instructions on how to set it up.
|
||||
|
||||
Getting the code in a runnable state required some fixes on my part so the
|
||||
code might be slightly different than that presented in the paper.
|
||||
|
||||
## Set up
|
||||
|
||||
```sh
|
||||
git clone https://github.com/Rainymood/Gradient-Descent-The-Ultimate-Optimizer
|
||||
cd Gradient-Descent-The-Ultimate-Optimizer
|
||||
virtualenv -p python3 venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
python main.py
|
||||
```
|
||||
|
||||
When you are done you can exit the virtualenv with
|
||||
|
||||
```shell
|
||||
deactivate
|
||||
```
|
244
Gradient-Descent-The-Ultimate-Optimizer/data_aug.py
Normal file
|
@ -0,0 +1,244 @@
|
|||
from hyperopt import *
|
||||
#from hyperopt_v2 import *
|
||||
|
||||
import torchvision.transforms.functional as TF
|
||||
import torchvision.transforms as T
|
||||
|
||||
#from scipy import ndimage
|
||||
import kornia
|
||||
|
||||
import random
|
||||
|
||||
|
||||
class MNIST_FullyConnected_Augmented(Optimizable):
|
||||
"""
|
||||
A fully-connected NN for the MNIST task. This is Optimizable but not itself
|
||||
an optimizer.
|
||||
"""
|
||||
|
||||
def __init__(self, num_inp, num_hid, num_out, optimizer, device = torch.device('cuda')):
|
||||
self.device = device
|
||||
#print(self.device)
|
||||
parameters = {
|
||||
"w1": torch.zeros(num_inp, num_hid, device=self.device).t(),
|
||||
"b1": torch.zeros(num_hid, device=self.device).t(),
|
||||
"w2": torch.zeros(num_hid, num_out, device=self.device).t(),
|
||||
"b2": torch.zeros(num_out, device=self.device).t(),
|
||||
|
||||
#Data augmentation
|
||||
"prob": torch.tensor(0.5, device=self.device),
|
||||
"mag": torch.tensor(180.0, device=self.device),
|
||||
}
|
||||
super().__init__(parameters, optimizer)
|
||||
|
||||
def initialize(self):
|
||||
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
|
||||
self.optimizer.initialize()
|
||||
#print(self.device)
|
||||
|
||||
def forward(self, x):
|
||||
"""Compute a prediction."""
|
||||
#print("Prob:",self.parameters["prob"].item())
|
||||
if random.random() < self.parameters["prob"]:
|
||||
#angle = 45
|
||||
#x = TF.rotate(x, angle)
|
||||
#print(self.device)
|
||||
#x = F.linear(x, torch.ones(28*28, 28*28, device=self.device).t()*self.parameters["mag"], bias=None)
|
||||
x = x + self.parameters["mag"]
|
||||
|
||||
x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
|
||||
x = torch.tanh(x)
|
||||
x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
|
||||
x = torch.tanh(x)
|
||||
x = F.log_softmax(x, dim=1)
|
||||
return x
|
||||
|
||||
def adjust(self):
|
||||
self.optimizer.adjust(self.parameters)
|
||||
|
||||
def __str__(self):
|
||||
return "mnist_FC_augmented / " + str(self.optimizer)
|
||||
|
||||
class LeNet(Optimizable, nn.Module):
|
||||
def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
|
||||
nn.Module.__init__(self)
|
||||
self.device = device
|
||||
parameters = {
|
||||
"w1": torch.zeros(20, num_inp, 5, 5, device=self.device),
|
||||
"b1": torch.zeros(20, device=self.device),
|
||||
"w2": torch.zeros(50, 20, 5, 5, device=self.device),
|
||||
"b2": torch.zeros(50, device=self.device),
|
||||
"w3": torch.zeros(500,4*4*50, device=self.device),
|
||||
"b3": torch.zeros(500, device=self.device),
|
||||
"w4": torch.zeros(10, 500, device=self.device),
|
||||
"b4": torch.zeros(10, device=self.device),
|
||||
|
||||
#Data augmentation
|
||||
"prob": torch.tensor(1.0, device=self.device),
|
||||
"mag": torch.tensor(180.0, device=self.device),
|
||||
}
|
||||
super().__init__(parameters, optimizer)
|
||||
|
||||
def initialize(self):
|
||||
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.parameters["w3"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.parameters["w4"], a=math.sqrt(5))
|
||||
self.optimizer.initialize()
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
if random.random() < self.parameters["prob"]:
|
||||
|
||||
batch_size = x.shape[0]
|
||||
# create transformation (rotation)
|
||||
alpha = self.parameters["mag"] # in degrees
|
||||
angle = torch.ones(batch_size, device=self.device) * alpha
|
||||
|
||||
# define the rotation center
|
||||
center = torch.ones(batch_size, 2, device=self.device)
|
||||
center[..., 0] = x.shape[3] / 2 # x
|
||||
center[..., 1] = x.shape[2] / 2 # y
|
||||
|
||||
#print(x.shape, center)
|
||||
# define the scale factor
|
||||
scale = torch.ones(batch_size, device=self.device)
|
||||
|
||||
# compute the transformation matrix
|
||||
M = kornia.get_rotation_matrix2d(center, angle, scale)
|
||||
|
||||
# apply the transformation to original image
|
||||
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
|
||||
|
||||
#print("Start Shape ", x.shape)
|
||||
out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = out.view(out.size(0), -1)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
|
||||
#print("Shape ", out.shape)
|
||||
return F.log_softmax(out, dim=1)
|
||||
|
||||
def adjust(self):
|
||||
self.optimizer.adjust(self.parameters)
|
||||
|
||||
def __str__(self):
|
||||
return "mnist_CNN_augmented / " + str(self.optimizer)
|
||||
|
||||
class LeNet_v2(Optimizable, nn.Module):
|
||||
def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
|
||||
|
||||
nn.Module.__init__(self)
|
||||
self.device = device
|
||||
self.conv1 = nn.Conv2d(num_inp, 20, 5, 1)
|
||||
self.conv2 = nn.Conv2d(20, 50, 5, 1)
|
||||
#self.fc1 = nn.Linear(4*4*50, 500)
|
||||
self.fc1 = nn.Linear(1250, 500)
|
||||
self.fc2 = nn.Linear(500, 10)
|
||||
|
||||
#print(self.conv1.weight)
|
||||
parameters = {
|
||||
"w1": self.conv1.weight,
|
||||
"b1": self.conv1.bias,
|
||||
"w2": self.conv2.weight,
|
||||
"b2": self.conv2.bias,
|
||||
"w3": self.fc1.weight,
|
||||
"b3": self.fc1.bias,
|
||||
"w4": self.fc2.weight,
|
||||
"b4": self.fc2.bias,
|
||||
|
||||
#Data augmentation
|
||||
"prob": torch.tensor(0.5, device=self.device),
|
||||
"mag": torch.tensor(1.0, device=self.device),
|
||||
}
|
||||
Optimizable.__init__(self, parameters, optimizer)
|
||||
|
||||
'''
|
||||
def forward(self, x): #Sature la memoire ???
|
||||
x = F.relu(self.conv1(x))
|
||||
x = F.max_pool2d(x, 2, 2)
|
||||
x = F.relu(self.conv2(x))
|
||||
x = F.max_pool2d(x, 2, 2)
|
||||
#x = x.view(-1, 4*4*50)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = self.fc2(x)
|
||||
return F.log_softmax(x, dim=1)
|
||||
'''
|
||||
def forward(self, x):
|
||||
|
||||
if random.random() < self.parameters["prob"].item():
|
||||
#print(self.parameters["prob"])
|
||||
#x = [T.ToTensor()(
|
||||
# TF.affine(img=T.ToPILImage()(im), angle=self.parameters["mag"], translate=(0,0), scale=1, shear=0, resample=0, fillcolor=None))
|
||||
# for im in torch.unbind(x,dim=0)]
|
||||
#x = torch.stack(x,dim=0)
|
||||
|
||||
#x = [ndimage.rotate(im, self.parameters["mag"], reshape=False)
|
||||
# for im in torch.unbind(x,dim=0)]
|
||||
#x = torch.stack(x,dim=0)
|
||||
|
||||
#x = [im + self.parameters["mag"]
|
||||
# for im in torch.unbind(x,dim=0)]
|
||||
#x = torch.stack(x,dim=0)
|
||||
|
||||
batch_size = x.shape[0]
|
||||
# create transformation (rotation)
|
||||
alpha = self.parameters["mag"] * 180 # in degrees
|
||||
angle = torch.ones(batch_size, device=self.device) * alpha
|
||||
|
||||
# define the rotation center
|
||||
center = torch.ones(batch_size, 2, device=self.device)
|
||||
center[..., 0] = x.shape[3] / 2 # x
|
||||
center[..., 1] = x.shape[2] / 2 # y
|
||||
|
||||
#print(x.shape, center)
|
||||
# define the scale factor
|
||||
scale = torch.ones(batch_size, device=self.device)
|
||||
|
||||
# compute the transformation matrix
|
||||
M = kornia.get_rotation_matrix2d(center, angle, scale)
|
||||
|
||||
# apply the transformation to original image
|
||||
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
|
||||
|
||||
#print("Start Shape ", x.shape)
|
||||
out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = out.view(out.size(0), -1)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
|
||||
#print("Shape ", out.shape)
|
||||
return F.log_softmax(out, dim=1)
|
||||
|
||||
def initialize(self):
|
||||
self.optimizer.initialize()
|
||||
|
||||
def adjust(self):
|
||||
self.optimizer.adjust(self.parameters)
|
||||
|
||||
def adjust_val(self):
|
||||
self.optimizer.adjust_val(self.parameters)
|
||||
|
||||
def eval(self):
|
||||
self.parameters['prob']=torch.tensor(0.0, device=self.device)
|
||||
|
||||
def __str__(self):
|
||||
return "mnist_CNN_augmented / " + str(self.optimizer)
|
52
Gradient-Descent-The-Ultimate-Optimizer/dataset_aug.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
import torch
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from torchvision import transforms
|
||||
import torchvision.transforms.functional as TF
|
||||
|
||||
class MNIST_aug(Dataset):
|
||||
|
||||
training_file = 'training.pt'
|
||||
test_file = 'test.pt'
|
||||
classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four',
|
||||
'5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
|
||||
|
||||
def __init__(self):
|
||||
self.images = [TF.to_pil_image(x) for x in torch.ByteTensor(10, 3, 48, 48)]
|
||||
self.set_stage(0) # initial stage
|
||||
|
||||
def __getitem__(self, index):
|
||||
image = self.images[index]
|
||||
|
||||
# Just apply your transformations here
|
||||
image = self.crop(image)
|
||||
x = TF.to_tensor(image)
|
||||
return x
|
||||
|
||||
def set_stage(self, stage):
|
||||
if stage == 0:
|
||||
print('Using (32, 32) crops')
|
||||
self.crop = transforms.RandomCrop((32, 32))
|
||||
elif stage == 1:
|
||||
print('Using (28, 28) crops')
|
||||
self.crop = transforms.RandomCrop((28, 28))
|
||||
|
||||
def __len__(self):
|
||||
return len(self.images)
|
||||
|
||||
|
||||
dataset = MyData()
|
||||
loader = DataLoader(dataset,
|
||||
batch_size=2,
|
||||
num_workers=2,
|
||||
shuffle=True)
|
||||
|
||||
for batch_idx, data in enumerate(loader):
|
||||
print('Batch idx {}, data shape {}'.format(
|
||||
batch_idx, data.shape))
|
||||
|
||||
loader.dataset.set_stage(1)
|
||||
|
||||
for batch_idx, data in enumerate(loader):
|
||||
print('Batch idx {}, data shape {}'.format(
|
||||
batch_idx, data.shape))
|
||||
|
150
Gradient-Descent-The-Ultimate-Optimizer/dataset_aug_v2.py
Normal file
|
@ -0,0 +1,150 @@
|
|||
#from hyperopt import *
|
||||
from hyperopt_v2 import *
|
||||
|
||||
import torchvision.transforms.functional as TF
|
||||
import torchvision.transforms as T
|
||||
|
||||
#from scipy import ndimage
|
||||
import kornia
|
||||
|
||||
import random
|
||||
|
||||
|
||||
class LeNet_v3(nn.Module):
|
||||
def __init__(self, num_inp, num_out):
|
||||
super(LeNet_v3, self).__init__()
|
||||
self.params = nn.ParameterDict({
|
||||
'w1': nn.Parameter(torch.zeros(20, num_inp, 5, 5)),
|
||||
'b1': nn.Parameter(torch.zeros(20)),
|
||||
'w2': nn.Parameter(torch.zeros(50, 20, 5, 5)),
|
||||
'b2': nn.Parameter(torch.zeros(50)),
|
||||
'w3': nn.Parameter(torch.zeros(500,4*4*50)),
|
||||
'b3': nn.Parameter(torch.zeros(500)),
|
||||
'w4': nn.Parameter(torch.zeros(10, 500)),
|
||||
'b4': nn.Parameter(torch.zeros(10))
|
||||
})
|
||||
|
||||
|
||||
def initialize(self):
|
||||
nn.init.kaiming_uniform_(self.params["w1"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.params["w2"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.params["w3"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.params["w4"], a=math.sqrt(5))
|
||||
|
||||
def forward(self, x):
|
||||
#print("Start Shape ", x.shape)
|
||||
out = F.relu(F.conv2d(input=x, weight=self.params["w1"], bias=self.params["b1"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.conv2d(input=out, weight=self.params["w2"], bias=self.params["b2"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = out.view(out.size(0), -1)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.linear(out, self.params["w3"], self.params["b3"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.linear(out, self.params["w4"], self.params["b4"])
|
||||
#print("Shape ", out.shape)
|
||||
return F.log_softmax(out, dim=1)
|
||||
|
||||
|
||||
def print_grad_fn(self):
|
||||
for n, p in self.params.items():
|
||||
print(n, p.grad_fn)
|
||||
|
||||
def __str__(self):
|
||||
return "mnist_CNN_augmented / "
|
||||
|
||||
class Data_aug(nn.Module):
|
||||
def __init__(self):
|
||||
super(Data_aug, self).__init__()
|
||||
self.data_augmentation = True
|
||||
self.params = nn.ParameterDict({
|
||||
"prob": nn.Parameter(torch.tensor(0.5)),
|
||||
"mag": nn.Parameter(torch.tensor(180.0))
|
||||
})
|
||||
|
||||
#self.params["mag"].register_hook(print)
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
if self.data_augmentation and self.training and random.random() < self.params["prob"]:
|
||||
#print('Aug')
|
||||
batch_size = x.shape[0]
|
||||
# create transformation (rotation)
|
||||
alpha = self.params["mag"] # in degrees
|
||||
angle = torch.ones(batch_size, device=x.device) * alpha
|
||||
|
||||
# define the rotation center
|
||||
center = torch.ones(batch_size, 2, device=x.device)
|
||||
center[..., 0] = x.shape[3] / 2 # x
|
||||
center[..., 1] = x.shape[2] / 2 # y
|
||||
|
||||
#print(x.shape, center)
|
||||
# define the scale factor
|
||||
scale = torch.ones(batch_size, device=x.device)
|
||||
|
||||
# compute the transformation matrix
|
||||
M = kornia.get_rotation_matrix2d(center, angle, scale)
|
||||
|
||||
# apply the transformation to original image
|
||||
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
|
||||
|
||||
return x
|
||||
|
||||
def eval(self):
|
||||
self.params['prob']=torch.tensor(0.0, device=self.device)
|
||||
nn.Module.eval(self)
|
||||
|
||||
def data_augmentation(self, mode=True):
|
||||
self.data_augmentation=mode
|
||||
|
||||
def print_grad_fn(self):
|
||||
for n, p in self.params.items():
|
||||
print(n, p.grad_fn)
|
||||
|
||||
def __str__(self):
|
||||
return "Data_Augmenter / "
|
||||
|
||||
class Augmented_model(nn.Module):
|
||||
def __init__(self, model, data_augmenter):
|
||||
#self.model = model
|
||||
#self.data_aug = data_augmenter
|
||||
super(Augmented_model, self).__init__()#nn.Module.__init__(self)
|
||||
#super().__init__()
|
||||
self.mods = nn.ModuleDict({
|
||||
'data_aug': data_augmenter,
|
||||
'model': model
|
||||
})
|
||||
#for name, param in self.mods.named_parameters():
|
||||
# print(name, type(param.data), param.size())
|
||||
|
||||
#params = self.mods.named_parameters() #self.parameters()
|
||||
#parameters = [param for param in self.model.parameters()] + [param for param in self.data_aug.parameters()]
|
||||
#Optimizable.__init__(self, params, optimizer)
|
||||
|
||||
def initialize(self):
|
||||
self.mods['model'].initialize()
|
||||
|
||||
def forward(self, x):
|
||||
return self.mods['model'](self.mods['data_aug'](x))
|
||||
|
||||
#def adjust(self):
|
||||
# self.optimizer.adjust(self) #Parametres des dict
|
||||
|
||||
def data_augmentation(self, mode=True):
|
||||
self.mods['data_aug'].data_augmentation=mode
|
||||
|
||||
def begin(self):
|
||||
for param in self.parameters():
|
||||
param.requires_grad_() # keep gradient information…
|
||||
param.retain_grad() # even if not a leaf…
|
||||
|
||||
def print_grad_fn(self):
|
||||
for n, m in self.mods.items():
|
||||
m.print_grad_fn()
|
||||
|
||||
def __str__(self):
|
||||
return str(self.mods['data_aug'])+ str(self.mods['model'])# + str(self.optimizer)
|
5
Gradient-Descent-The-Ultimate-Optimizer/graph/graph
Normal file
|
@ -0,0 +1,5 @@
|
|||
digraph {
|
||||
graph [size="12,12"]
|
||||
node [align=left fontsize=12 height=0.2 ranksep=0.1 shape=box style=filled]
|
||||
94296775052080 [label=NoneType fillcolor=darkolivegreen1]
|
||||
}
|
19
Gradient-Descent-The-Ultimate-Optimizer/graph/graph.svg
Normal file
|
@ -0,0 +1,19 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<!-- Generated by graphviz version 2.40.1 (20161225.0304)
|
||||
-->
|
||||
<!-- Title: %3 Pages: 1 -->
|
||||
<svg width="75pt" height="30pt"
|
||||
viewBox="0.00 0.00 74.65 30.40" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 26.4)">
|
||||
<title>%3</title>
|
||||
<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-26.4 70.6472,-26.4 70.6472,4 -4,4"/>
|
||||
<!-- 94296775052080 -->
|
||||
<g id="node1" class="node">
|
||||
<title>94296775052080</title>
|
||||
<polygon fill="#caff70" stroke="#000000" points="66.4717,-22.6036 .1755,-22.6036 .1755,.2036 66.4717,.2036 66.4717,-22.6036"/>
|
||||
<text text-anchor="middle" x="33.3236" y="-7.6" font-family="Times,serif" font-size="12.00" fill="#000000">NoneType</text>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 937 B |
345
Gradient-Descent-The-Ultimate-Optimizer/hyperopt.py
Normal file
|
@ -0,0 +1,345 @@
|
|||
import math
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
|
||||
|
||||
class Optimizable():#nn.Module):
|
||||
"""
|
||||
This is the interface for anything that has parameters that need to be
|
||||
optimized, somewhat like torch.nn.Model but with the right plumbing for
|
||||
hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
|
||||
interface which does not give us enough control about the detachments.)
|
||||
Nominal operation of an Optimizable at the lowest level is as follows:
|
||||
o = MyOptimizable(…)
|
||||
o.initialize()
|
||||
loop {
|
||||
o.begin()
|
||||
o.zero_grad()
|
||||
loss = –compute loss function from parameters–
|
||||
loss.backward()
|
||||
o.adjust()
|
||||
}
|
||||
Optimizables recursively handle updates to their optimiz*ers*.
|
||||
"""
|
||||
#def __init__(self):
|
||||
# super(Optimizable, self).__init__()
|
||||
# self.parameters = nn.Parameter(torch.zeros(()))
|
||||
|
||||
def __init__(self, parameters, optimizer):
|
||||
#super(Optimizable, self).__init__()
|
||||
self.parameters = parameters # a dict mapping names to tensors
|
||||
self.optimizer = optimizer # which must itself be Optimizable!
|
||||
self.all_params_with_gradients = []
|
||||
#self.device = device
|
||||
|
||||
def initialize(self):
|
||||
"""Initialize parameters, e.g. with a Kaiming initializer."""
|
||||
pass
|
||||
|
||||
def begin(self):
|
||||
"""Enable gradient tracking on current parameters."""
|
||||
self.all_params_with_gradients = [] #Reintialisation pour eviter surcharge de la memoire
|
||||
for name, param in self.parameters.items():
|
||||
#for param in self.parameters:
|
||||
param.requires_grad_() # keep gradient information…
|
||||
param.retain_grad() # even if not a leaf…
|
||||
#param.to(self.device)
|
||||
#if param.device == torch.device('cuda:0'):
|
||||
# print(name, param.device)
|
||||
self.all_params_with_gradients.append(param)
|
||||
self.optimizer.begin()
|
||||
|
||||
def zero_grad(self):
|
||||
""" Set all gradients to zero. """
|
||||
for param in self.all_params_with_gradients:
|
||||
#param = param.to(self.device)
|
||||
param.grad = torch.zeros(param.shape, device=param.device)
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
""" Note: at this point you would probably call .backwards() on the loss
|
||||
function. """
|
||||
|
||||
def adjust(self):
|
||||
""" Update parameters """
|
||||
pass
|
||||
|
||||
|
||||
def print_grad_fn(self):
|
||||
self.optimizer.print_grad_fn()
|
||||
for n, p in self.parameters.items():
|
||||
print(n," - ", p.grad_fn)
|
||||
|
||||
def param_grad(self):
|
||||
return self.all_params_with_gradients
|
||||
|
||||
def param(self, param_name):
|
||||
return self.parameters[param_name].item()
|
||||
|
||||
|
||||
class MNIST_FullyConnected(Optimizable):
|
||||
"""
|
||||
A fully-connected NN for the MNIST task. This is Optimizable but not itself
|
||||
an optimizer.
|
||||
"""
|
||||
|
||||
def __init__(self, num_inp, num_hid, num_out, optimizer):
|
||||
parameters = {
|
||||
"w1": torch.zeros(num_inp, num_hid).t(),
|
||||
"b1": torch.zeros(num_hid).t(),
|
||||
"w2": torch.zeros(num_hid, num_out).t(),
|
||||
"b2": torch.zeros(num_out).t(),
|
||||
}
|
||||
super().__init__(parameters, optimizer)
|
||||
|
||||
def initialize(self):
|
||||
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
|
||||
self.optimizer.initialize()
|
||||
|
||||
def forward(self, x):
|
||||
"""Compute a prediction."""
|
||||
x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
|
||||
x = torch.tanh(x)
|
||||
x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
|
||||
x = torch.tanh(x)
|
||||
x = F.log_softmax(x, dim=1)
|
||||
return x
|
||||
|
||||
def adjust(self):
|
||||
self.optimizer.adjust(self.parameters)
|
||||
|
||||
def __str__(self):
|
||||
return "mnist / " + str(self.optimizer)
|
||||
|
||||
|
||||
class NoOpOptimizer(Optimizable):#, nn.Module):
|
||||
"""
|
||||
NoOpOptimizer sits on top of a stack, and does not affect what lies below.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
#super(Optimizable, self).__init__()
|
||||
pass
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def begin(self):
|
||||
pass
|
||||
|
||||
def zero_grad(self):
|
||||
pass
|
||||
|
||||
def adjust(self, params):
|
||||
pass
|
||||
|
||||
def adjust_val(self, params):
|
||||
pass
|
||||
|
||||
def print_grad_fn(self):
|
||||
pass
|
||||
|
||||
def __str__(self):
|
||||
return "static"
|
||||
|
||||
class Adam(Optimizable):
|
||||
"""
|
||||
A fully hyperoptimizable Adam optimizer
|
||||
"""
|
||||
|
||||
def clamp(x):
|
||||
return (x.tanh() + 1.0) / 2.0
|
||||
|
||||
def unclamp(y):
|
||||
z = y * 2.0 - 1.0
|
||||
return ((1.0 + z) / (1.0 - z)).log() / 2.0
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
alpha=0.001,
|
||||
beta1=0.9,
|
||||
beta2=0.999,
|
||||
log_eps=-8.0,
|
||||
optimizer=NoOpOptimizer(),
|
||||
device = torch.device('cuda')
|
||||
):
|
||||
self.device = device
|
||||
parameters = {
|
||||
"alpha": torch.tensor(alpha, device=self.device),
|
||||
"beta1": Adam.unclamp(torch.tensor(beta1, device=self.device)),
|
||||
"beta2": Adam.unclamp(torch.tensor(beta2, device=self.device)),
|
||||
"log_eps": torch.tensor(log_eps, device=self.device),
|
||||
}
|
||||
super().__init__(parameters, optimizer)
|
||||
self.num_adjustments = 0
|
||||
self.num_adjustments_val = 0
|
||||
self.cache = {}
|
||||
|
||||
for name, param in parameters.items():
|
||||
param.requires_grad_() # keep gradient information…
|
||||
param.retain_grad() # even if not a leaf…
|
||||
#param.to(self.device)
|
||||
#if param.device == torch.device('cuda:0'):
|
||||
# print(name, param.device)
|
||||
|
||||
def adjust(self, params): #Update param d'apprentissage
|
||||
self.num_adjustments += 1
|
||||
self.optimizer.adjust(self.parameters)
|
||||
#print('Adam update')
|
||||
t = self.num_adjustments
|
||||
beta1 = Adam.clamp(self.parameters["beta1"])
|
||||
beta2 = Adam.clamp(self.parameters["beta2"])
|
||||
for name, param in params.items():
|
||||
if name == "mag": continue
|
||||
if name not in self.cache:
|
||||
self.cache[name] = {
|
||||
"m": torch.zeros(param.shape, device=self.device),
|
||||
"v": torch.zeros(param.shape, device=self.device)
|
||||
+ 10.0 ** self.parameters["log_eps"].data
|
||||
# NOTE that we add a little ‘fudge factor' here because sqrt is not
|
||||
# differentiable at exactly zero
|
||||
}
|
||||
#print(name, param.device)
|
||||
g = param.grad.detach()
|
||||
self.cache[name]["m"] = m = (
|
||||
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
|
||||
)
|
||||
self.cache[name]["v"] = v = (
|
||||
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
|
||||
)
|
||||
self.all_params_with_gradients.append(m)
|
||||
self.all_params_with_gradients.append(v)
|
||||
m_hat = m / (1.0 - beta1 ** float(t))
|
||||
v_hat = v / (1.0 - beta2 ** float(t))
|
||||
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
|
||||
params[name] = param.detach() - self.parameters["alpha"] * dparam
|
||||
#print(name)
|
||||
|
||||
def adjust_val(self, params): #Update param Transformations
|
||||
self.num_adjustments_val += 1
|
||||
self.optimizer.adjust_val(self.parameters)
|
||||
#print('Adam update')
|
||||
t = self.num_adjustments_val
|
||||
beta1 = Adam.clamp(self.parameters["beta1"])
|
||||
beta2 = Adam.clamp(self.parameters["beta2"])
|
||||
for name, param in params.items():
|
||||
if name != "mag": continue
|
||||
if name not in self.cache:
|
||||
self.cache[name] = {
|
||||
"m": torch.zeros(param.shape, device=self.device),
|
||||
"v": torch.zeros(param.shape, device=self.device)
|
||||
+ 10.0 ** self.parameters["log_eps"].data
|
||||
# NOTE that we add a little ‘fudge factor' here because sqrt is not
|
||||
# differentiable at exactly zero
|
||||
}
|
||||
#print(name, param.device)
|
||||
g = param.grad.detach()
|
||||
self.cache[name]["m"] = m = (
|
||||
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
|
||||
)
|
||||
self.cache[name]["v"] = v = (
|
||||
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
|
||||
)
|
||||
self.all_params_with_gradients.append(m)
|
||||
self.all_params_with_gradients.append(v)
|
||||
m_hat = m / (1.0 - beta1 ** float(t))
|
||||
v_hat = v / (1.0 - beta2 ** float(t))
|
||||
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
|
||||
params[name] = param.detach() - self.parameters["alpha"] * dparam
|
||||
#print(name)
|
||||
|
||||
def __str__(self):
|
||||
return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
|
||||
'''
|
||||
class SGD(Optimizable):
|
||||
"""
|
||||
A hyperoptimizable SGD
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=0.01, optimizer=NoOpOptimizer()):
|
||||
parameters = {"alpha": torch.tensor(alpha)}
|
||||
super().__init__(parameters, optimizer)
|
||||
|
||||
def adjust(self, params):
|
||||
self.optimizer.adjust(self.parameters)
|
||||
for name, param in params.items():
|
||||
g = param.grad.detach()
|
||||
params[name] = param.detach() - g * self.parameters["alpha"]
|
||||
|
||||
def __str__(self):
|
||||
return "sgd(%f) / " % self.parameters["alpha"] + str(self.optimizer)
|
||||
|
||||
class SGDPerParam(Optimizable):
|
||||
"""
|
||||
Like above, but can be taught a separate step size for each parameter it
|
||||
tunes.
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=0.01, params=[], optimizer=NoOpOptimizer()):
|
||||
parameters = {name + "_alpha": torch.tensor(alpha) for name in params}
|
||||
super().__init__(parameters, optimizer)
|
||||
|
||||
def adjust(self, params):
|
||||
self.optimizer.adjust(self.parameters)
|
||||
for name, param in params.items():
|
||||
g = param.grad.detach()
|
||||
params[name] = param.detach() - g * self.parameters[name + "_alpha"]
|
||||
|
||||
def __str__(self):
|
||||
return "sgd(%s) / " % str(
|
||||
{k: t.item() for k, t in self.parameters.items()}
|
||||
) + str(self.optimizer)
|
||||
'''
|
||||
'''
|
||||
class AdamBaydin(Optimizable):
|
||||
""" Same as above, but only optimizes the learning rate, treating the
|
||||
remaining hyperparameters as constants. """
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
alpha=0.001,
|
||||
beta1=0.9,
|
||||
beta2=0.999,
|
||||
log_eps=-8.0,
|
||||
optimizer=NoOpOptimizer(),
|
||||
):
|
||||
parameters = {"alpha": torch.tensor(alpha)}
|
||||
self.beta1 = beta1
|
||||
self.beta2 = beta2
|
||||
self.log_eps = log_eps
|
||||
super().__init__(parameters, optimizer)
|
||||
self.num_adjustments = 0
|
||||
self.cache = {}
|
||||
|
||||
def adjust(self, params):
|
||||
self.num_adjustments += 1
|
||||
self.optimizer.adjust(self.parameters)
|
||||
t = self.num_adjustments
|
||||
beta1 = self.beta1
|
||||
beta2 = self.beta2
|
||||
for name, param in params.items():
|
||||
if name not in self.cache:
|
||||
self.cache[name] = {
|
||||
"m": torch.zeros(param.shape),
|
||||
"v": torch.zeros(param.shape) + 10.0 ** self.log_eps,
|
||||
}
|
||||
g = param.grad.detach()
|
||||
self.cache[name]["m"] = m = (
|
||||
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
|
||||
)
|
||||
self.cache[name]["v"] = v = (
|
||||
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
|
||||
)
|
||||
self.all_params_with_gradients.append(m)
|
||||
self.all_params_with_gradients.append(v)
|
||||
m_hat = m / (1.0 - beta1 ** float(t))
|
||||
v_hat = v / (1.0 - beta2 ** float(t))
|
||||
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.log_eps)
|
||||
params[name] = param.detach() - self.parameters["alpha"] * dparam
|
||||
|
||||
def __str__(self):
|
||||
return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
|
||||
'''
|
296
Gradient-Descent-The-Ultimate-Optimizer/hyperopt_v2.py
Normal file
|
@ -0,0 +1,296 @@
|
|||
import math
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.optim.optimizer import Optimizer
|
||||
|
||||
class Optimizable():
|
||||
"""
|
||||
This is the interface for anything that has parameters that need to be
|
||||
optimized, somewhat like torch.nn.Model but with the right plumbing for
|
||||
hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
|
||||
interface which does not give us enough control about the detachments.)
|
||||
Nominal operation of an Optimizable at the lowest level is as follows:
|
||||
o = MyOptimizable(…)
|
||||
o.initialize()
|
||||
loop {
|
||||
o.begin()
|
||||
o.zero_grad()
|
||||
loss = –compute loss function from parameters–
|
||||
loss.backward()
|
||||
o.adjust()
|
||||
}
|
||||
Optimizables recursively handle updates to their optimiz*ers*.
|
||||
"""
|
||||
#def __init__(self):
|
||||
# super(Optimizable, self).__init__()
|
||||
# self.parameters = nn.Parameter(torch.zeros(()))
|
||||
|
||||
def __init__(self, parameters, optimizer):
|
||||
self.params = parameters # a dict mapping names to tensors
|
||||
self.optimizer = optimizer # which must itself be Optimizable!
|
||||
self.all_params_with_gradients = []
|
||||
#self.device = device
|
||||
|
||||
def initialize(self):
|
||||
"""Initialize parameters, e.g. with a Kaiming initializer."""
|
||||
pass
|
||||
|
||||
def begin(self):
|
||||
"""Enable gradient tracking on current parameters."""
|
||||
self.all_params_with_gradients = nn.ParameterList() #Reintialisation pour eviter surcharge de la memoire
|
||||
print("Opti param :", type(self.params))
|
||||
#for name, param in self.params:
|
||||
if isinstance(self.params,dict): #Dict
|
||||
for name, param in self.params:
|
||||
param.requires_grad_() # keep gradient information…
|
||||
param.retain_grad() # even if not a leaf…
|
||||
self.all_params_with_gradients.append(param)
|
||||
if isinstance(self.params,list): #List
|
||||
for param in self.params:
|
||||
param.requires_grad_() # keep gradient information…
|
||||
param.retain_grad() # even if not a leaf…
|
||||
self.all_params_with_gradients.append(param)
|
||||
self.optimizer.begin()
|
||||
|
||||
def zero_grad(self):
|
||||
""" Set all gradients to zero. """
|
||||
for param in self.all_params_with_gradients:
|
||||
param.grad = torch.zeros(param.shape, device=param.device)
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
""" Note: at this point you would probably call .backwards() on the loss
|
||||
function. """
|
||||
|
||||
def adjust(self):
|
||||
""" Update parameters """
|
||||
pass
|
||||
|
||||
|
||||
class NoOpOptimizer(Optimizable):#, nn.Module):
|
||||
"""
|
||||
NoOpOptimizer sits on top of a stack, and does not affect what lies below.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
#super(Optimizable, self).__init__()
|
||||
pass
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def begin(self):
|
||||
#print("NoOpt begin")
|
||||
pass
|
||||
|
||||
def zero_grad(self):
|
||||
pass
|
||||
|
||||
def adjust(self, params):
|
||||
pass
|
||||
|
||||
def step(self):
|
||||
pass
|
||||
|
||||
def print_grad_fn(self):
|
||||
pass
|
||||
|
||||
def __str__(self):
|
||||
return "static"
|
||||
|
||||
|
||||
class SGD(Optimizer, nn.Module): #Eviter Optimizer
|
||||
"""
|
||||
A hyperoptimizable SGD
|
||||
"""
|
||||
|
||||
def __init__(self, params, lr=0.01, height=0):
|
||||
self.height=height
|
||||
#params : a optimiser
|
||||
#reste (defaults) param de l'opti
|
||||
print('SGD - H', height)
|
||||
nn.Module.__init__(self)
|
||||
|
||||
optim_keys = ('lr','') #A mettre dans Optimizable ? #'' pour eviter iteration dans la chaine de charactere...
|
||||
'''
|
||||
self_params = {"lr": torch.tensor(lr),
|
||||
"momentum": 0,
|
||||
"dampening":0,
|
||||
"weight_decay":0,
|
||||
"nesterov": False}
|
||||
'''
|
||||
#self_params = dict(lr=torch.tensor(lr),
|
||||
# momentum=0, dampening=0, weight_decay=0, nesterov=False)
|
||||
|
||||
self_params = nn.ParameterDict({
|
||||
"lr": nn.Parameter(torch.tensor(lr)),
|
||||
"momentum": nn.Parameter(torch.tensor(0.0)),
|
||||
"dampening": nn.Parameter(torch.tensor(0.0)),
|
||||
"weight_decay": nn.Parameter(torch.tensor(0.0)),
|
||||
})
|
||||
|
||||
for k in self_params.keys() & optim_keys:
|
||||
self_params[k].requires_grad_() # keep gradient information…
|
||||
self_params[k].retain_grad() # even if not a leaf…
|
||||
#self_params[k].register_hook(print)
|
||||
|
||||
if height==0:
|
||||
optimizer = NoOpOptimizer()
|
||||
else:
|
||||
#def dict_generator(): yield {k: self_params[k] for k in self_params.keys() & optim_keys}
|
||||
#(dict for dict in {k: self_params[k] for k in self_params.keys() & optim_keys}) #Devrait mar
|
||||
optimizer = SGD(params=(self_params[k]for k in self_params.keys() & optim_keys), lr=lr, height=height-1)
|
||||
#optimizer.register_backward_hook(print)
|
||||
|
||||
self.optimizer = optimizer
|
||||
#if(height==0):
|
||||
# for n,p in params.items():
|
||||
# print(n,p)
|
||||
|
||||
#Optimizable.__init__(self, self_params, optimizer)
|
||||
|
||||
#print(type(params))
|
||||
#for p in params:
|
||||
# print(type(p))
|
||||
Optimizer.__init__(self, params, self_params)
|
||||
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
print(type(p.data), p.size())
|
||||
print('End SGD-H', height)
|
||||
|
||||
def begin(self):
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
#print(type(p.data), p.size())
|
||||
p.requires_grad_() # keep gradient information…
|
||||
p.retain_grad() # even if not a leaf…
|
||||
#p.register_hook(lambda x: print(self.height, x.grad_fn))
|
||||
|
||||
self.optimizer.begin()
|
||||
|
||||
def print_grad_fn(self):
|
||||
self.optimizer.print_grad_fn()
|
||||
for group in self.param_groups:
|
||||
for i, p in enumerate(group['params']):
|
||||
print(self.height," - ", i, p.grad_fn)
|
||||
|
||||
#def adjust(self, params):
|
||||
# self.optimizer.adjust(self.params)
|
||||
# for name, param in params.items():
|
||||
# g = param.grad.detach()
|
||||
# params[name] = param.detach() - g * self.params["lr"]
|
||||
|
||||
def step(self):
|
||||
"""Performs a single optimization step.
|
||||
|
||||
Arguments:
|
||||
closure (callable, optional): A closure that reevaluates the model
|
||||
and returns the loss.
|
||||
"""
|
||||
print('SGD start')
|
||||
self.optimizer.step()
|
||||
|
||||
for group in self.param_groups:
|
||||
for i, p in enumerate(group['params']):
|
||||
if p.grad is None:
|
||||
continue
|
||||
#d_p = p.grad.data
|
||||
d_p = p.grad.detach()
|
||||
|
||||
#print(group['lr'])
|
||||
p.data.add_(-group['lr'].item(), d_p)
|
||||
#group['params'][i] = p.detach() - d_p * group['lr']
|
||||
p.data-= group['lr']*d_p #Data ne pas utiliser perte info
|
||||
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
print(p, p.grad)
|
||||
continue
|
||||
|
||||
print("SGD end")
|
||||
#return loss
|
||||
|
||||
def __str__(self):
|
||||
return "sgd(%f) / " % self.params["lr"] + str(self.optimizer)
|
||||
|
||||
|
||||
class Adam(Optimizable, nn.Module):
|
||||
"""
|
||||
A fully hyperoptimizable Adam optimizer
|
||||
"""
|
||||
|
||||
def clamp(x):
|
||||
return (x.tanh() + 1.0) / 2.0
|
||||
|
||||
def unclamp(y):
|
||||
z = y * 2.0 - 1.0
|
||||
return ((1.0 + z) / (1.0 - z)).log() / 2.0
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
alpha=0.001,
|
||||
beta1=0.9,
|
||||
beta2=0.999,
|
||||
log_eps=-8.0,
|
||||
optimizer=NoOpOptimizer(),
|
||||
device = torch.device('cuda')
|
||||
):
|
||||
#super(Adam, self).__init__()
|
||||
nn.Module.__init__(self)
|
||||
self.device = device
|
||||
params = nn.ParameterDict({
|
||||
"alpha": nn.Parameter(torch.tensor(alpha, device=self.device)),
|
||||
"beta1": nn.Parameter(Adam.unclamp(torch.tensor(beta1, device=self.device))),
|
||||
"beta2": nn.Parameter(Adam.unclamp(torch.tensor(beta2, device=self.device))),
|
||||
"log_eps": nn.Parameter(torch.tensor(log_eps, device=self.device)),
|
||||
})
|
||||
Optimizable.__init__(self, params, optimizer)
|
||||
self.num_adjustments = 0
|
||||
self.cache = {}
|
||||
|
||||
for name, param in params.items():
|
||||
param.requires_grad_() # keep gradient information…
|
||||
param.retain_grad() # even if not a leaf…
|
||||
|
||||
def adjust(self, params, pytorch_mod=False):
|
||||
self.num_adjustments += 1
|
||||
self.optimizer.adjust(self.params)
|
||||
t = self.num_adjustments
|
||||
beta1 = Adam.clamp(self.params["beta1"])
|
||||
beta2 = Adam.clamp(self.params["beta2"])
|
||||
|
||||
updated_param = []
|
||||
if pytorch_mod:
|
||||
params = params.named_parameters(prefix='') #Changer nom d'input...
|
||||
|
||||
for name, param in params:
|
||||
if name not in self.cache:
|
||||
self.cache[name] = {
|
||||
"m": torch.zeros(param.shape, device=self.device),
|
||||
"v": torch.zeros(param.shape, device=self.device)
|
||||
+ 10.0 ** self.params["log_eps"].data
|
||||
# NOTE that we add a little ‘fudge factor' here because sqrt is not
|
||||
# differentiable at exactly zero
|
||||
}
|
||||
#print(name, param.device)
|
||||
g = param.grad.detach()
|
||||
self.cache[name]["m"] = m = (
|
||||
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
|
||||
)
|
||||
self.cache[name]["v"] = v = (
|
||||
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
|
||||
)
|
||||
self.all_params_with_gradients.append(nn.Parameter(m)) #Risque de surcharger la memoire => Dict mieux ?
|
||||
self.all_params_with_gradients.append(nn.Parameter(v))
|
||||
m_hat = m / (1.0 - beta1 ** float(t))
|
||||
v_hat = v / (1.0 - beta2 ** float(t))
|
||||
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.params["log_eps"])
|
||||
updated_param[name] = param.detach() - self.params["alpha"] * dparam
|
||||
|
||||
if pytorch_mod: params.update(updated_param) #Changer nom d'input...
|
||||
else: params = updated_param
|
||||
|
||||
def __str__(self):
|
||||
return "adam(" + str(self.params) + ") / " + str(self.optimizer)
|
182
Gradient-Descent-The-Ultimate-Optimizer/main.py
Normal file
|
@ -0,0 +1,182 @@
|
|||
import numpy as np
|
||||
import json, math, time, os
|
||||
from hyperopt import *
|
||||
import gc
|
||||
|
||||
BATCH_SIZE = 300
|
||||
|
||||
mnist_train = torchvision.datasets.MNIST(
|
||||
"./data", train=True, download=True, transform=torchvision.transforms.ToTensor()
|
||||
)
|
||||
|
||||
mnist_test = torchvision.datasets.MNIST(
|
||||
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
||||
)
|
||||
|
||||
dl_train = torch.utils.data.DataLoader(
|
||||
mnist_train, batch_size=BATCH_SIZE, shuffle=False
|
||||
)
|
||||
dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=10000, shuffle=False)
|
||||
|
||||
|
||||
def test(model):
|
||||
for i, (features_, labels_) in enumerate(dl_test):
|
||||
features, labels = torch.reshape(features_, (10000, 28 * 28)), labels_
|
||||
pred = model.forward(features)
|
||||
return pred.argmax(dim=1).eq(labels).sum().item() / 10000 * 100
|
||||
|
||||
|
||||
def train(model, epochs=3, height=1):
|
||||
stats = []
|
||||
for epoch in range(epochs):
|
||||
for i, (features_, labels_) in enumerate(dl_train):
|
||||
t0 = time.process_time()
|
||||
model.begin()
|
||||
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
|
||||
pred = model.forward(
|
||||
features
|
||||
) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
|
||||
loss = F.nll_loss(pred, labels)
|
||||
model.zero_grad()
|
||||
loss.backward(create_graph=True)
|
||||
model.adjust()
|
||||
tf = time.process_time()
|
||||
data = {
|
||||
"time": tf - t0,
|
||||
"iter": epoch * len(dl_train) + i,
|
||||
"loss": loss.item(),
|
||||
"params": {
|
||||
k: v.item()
|
||||
for k, v in model.optimizer.parameters.items()
|
||||
if "." not in k
|
||||
},
|
||||
}
|
||||
stats.append(data)
|
||||
return stats
|
||||
|
||||
|
||||
def run(opt, name="out", usr={}, epochs=3, height=1):
|
||||
torch.manual_seed(0x42)
|
||||
model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
|
||||
print("Running...", str(model))
|
||||
model.initialize()
|
||||
log = train(model, epochs, height)
|
||||
acc = test(model)
|
||||
out = {"acc": acc, "log": log, "usr": usr}
|
||||
with open("log/%s.json" % name, "w+") as f:
|
||||
json.dump(out, f, indent=True)
|
||||
times = [x["time"] for x in log]
|
||||
print("Times (ms):", np.mean(times), "+/-", np.std(times))
|
||||
print("Final accuracy:", acc)
|
||||
return out
|
||||
|
||||
|
||||
def sgd_experiments():
|
||||
run(SGD(0.01), "sgd", epochs=1)
|
||||
out = run(SGD(0.01, optimizer=SGD(0.01)), "sgd+sgd", epochs=1)
|
||||
alpha = out["log"][-1]["params"]["alpha"]
|
||||
print(alpha)
|
||||
run(SGD(alpha), "sgd-final", epochs=1)
|
||||
|
||||
|
||||
def adam_experiments():
|
||||
run(Adam(), "adam", epochs=1)
|
||||
print()
|
||||
mo = SGDPerParam(
|
||||
0.001, ["alpha", "beta1", "beta2", "log_eps"], optimizer=SGD(0.0001)
|
||||
)
|
||||
out = run(Adam(optimizer=mo), "adam+sgd", epochs=1)
|
||||
p = out["log"][-1]["params"]
|
||||
alpha = p["alpha"]
|
||||
beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
|
||||
beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
|
||||
log_eps = p["log_eps"]
|
||||
print(alpha, beta1, beta2, log_eps)
|
||||
print(mo)
|
||||
run(
|
||||
Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
|
||||
"adam+sgd-final",
|
||||
epochs=1,
|
||||
)
|
||||
print()
|
||||
out = run(Adam(optimizer=Adam()), "adam2", epochs=1)
|
||||
p = out["log"][-1]["params"]
|
||||
alpha = p["alpha"]
|
||||
beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
|
||||
beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
|
||||
log_eps = p["log_eps"]
|
||||
print(alpha, beta1, beta2, log_eps)
|
||||
run(
|
||||
Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
|
||||
"adam2-final",
|
||||
epochs=1,
|
||||
)
|
||||
print()
|
||||
mo = SGDPerParam(0.001, ["alpha"], optimizer=SGD(0.0001))
|
||||
out = run(AdamBaydin(optimizer=mo), "adambaydin+sgd", epochs=1)
|
||||
p = out["log"][-1]["params"]
|
||||
alpha = p["alpha"]
|
||||
print(alpha)
|
||||
print(mo)
|
||||
run(Adam(alpha=p["alpha"]), "adambaydin+sgd-final", epochs=1)
|
||||
print()
|
||||
out = run(AdamBaydin(optimizer=Adam()), "adambaydin2", epochs=1)
|
||||
p = out["log"][-1]["params"]
|
||||
alpha = p["alpha"]
|
||||
print(alpha)
|
||||
run(Adam(alpha=p["alpha"]), "adambaydin2-final", epochs=1)
|
||||
|
||||
|
||||
def surface():
|
||||
run(SGD(10 ** -3, optimizer=SGD(10 ** -1)), "tst", epochs=1)
|
||||
for log_alpha in np.linspace(-3, 2, 10):
|
||||
run(SGD(10 ** log_alpha), "sgd@1e%+.2f" % log_alpha, epochs=1)
|
||||
|
||||
|
||||
def make_sgd_stack(height, top):
|
||||
if height == 0:
|
||||
return SGD(alpha=top)
|
||||
return SGD(alpha=top, optimizer=make_sgd_stack(height - 1, top))
|
||||
|
||||
|
||||
def make_adam_stack(height, top=0.0000001):
|
||||
if height == 0:
|
||||
return Adam(alpha=top)
|
||||
return Adam(alpha=top, optimizer=make_adam_stack(height - 1))
|
||||
|
||||
|
||||
def stack_test():
|
||||
for top in np.linspace(-7, 3, 20):
|
||||
for height in range(6):
|
||||
print("height =", height, "to p=", top)
|
||||
opt = make_sgd_stack(height, 10 ** top)
|
||||
run(
|
||||
opt,
|
||||
"metasgd3-%d@%+.2f" % (height, top),
|
||||
{"height": height, "top": top},
|
||||
epochs=1,
|
||||
height=height,
|
||||
)
|
||||
gc.collect()
|
||||
|
||||
|
||||
def perf_test():
|
||||
for h in range(51):
|
||||
print("height:", h)
|
||||
# opt = make_sgd_stack(h, 0.01)
|
||||
opt = make_adam_stack(h)
|
||||
run(opt, "adamperf-%d" % h, {"height": h}, epochs=1)
|
||||
gc.collect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
os.mkdir("log")
|
||||
except:
|
||||
print("log/ exists already")
|
||||
|
||||
surface()
|
||||
sgd_experiments()
|
||||
adam_experiments()
|
||||
stack_test()
|
||||
perf_test()
|
5
Gradient-Descent-The-Ultimate-Optimizer/requirements.txt
Normal file
|
@ -0,0 +1,5 @@
|
|||
numpy==1.17.2
|
||||
Pillow==6.2.0
|
||||
six==1.12.0
|
||||
torch==1.2.0
|
||||
torchvision==0.4.0
|
344
Gradient-Descent-The-Ultimate-Optimizer/tests.py
Normal file
|
@ -0,0 +1,344 @@
|
|||
import numpy as np
|
||||
import json, math, time, os
|
||||
from data_aug import *
|
||||
#from data_aug_v2 import *
|
||||
import gc
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from torchviz import make_dot, make_dot_from_trace
|
||||
|
||||
from torch.utils.data import SubsetRandomSampler
|
||||
|
||||
BATCH_SIZE = 300
|
||||
#TEST_SIZE = 10000
|
||||
TEST_SIZE = 300
|
||||
DATA_LIMIT = 10
|
||||
|
||||
'''
|
||||
data_train = torchvision.datasets.MNIST(
|
||||
"./data", train=True, download=True,
|
||||
transform=torchvision.transforms.Compose([
|
||||
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
||||
torchvision.transforms.ToTensor()
|
||||
])
|
||||
)
|
||||
data_test = torchvision.datasets.MNIST(
|
||||
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
||||
)
|
||||
|
||||
'''
|
||||
data_train = torchvision.datasets.CIFAR10(
|
||||
"./data", train=True, download=True,
|
||||
transform=torchvision.transforms.Compose([
|
||||
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
||||
torchvision.transforms.ToTensor()
|
||||
])
|
||||
)
|
||||
|
||||
data_test = torchvision.datasets.CIFAR10(
|
||||
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
||||
)
|
||||
|
||||
train_subset_indices=range(int(len(data_train)/2))
|
||||
val_subset_indices=range(int(len(data_train)/2),len(data_train))
|
||||
|
||||
dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
|
||||
dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
|
||||
dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False)
|
||||
|
||||
def test(model, reshape_in=True, device = torch.device('cuda')):
|
||||
for i, (features_, labels_) in enumerate(dl_test):
|
||||
if reshape_in :
|
||||
features, labels = torch.reshape(features_, (TEST_SIZE, 28 * 28)), labels_
|
||||
else:
|
||||
features, labels =features_, labels_
|
||||
|
||||
features, labels = features.to(device), labels.to(device)
|
||||
|
||||
pred = model.forward(features)
|
||||
return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
|
||||
|
||||
def train_one_epoch(model, optimizer, epoch=0, reshape_in=True, device = torch.device('cuda'), train_data=True):
|
||||
if train_data: dl = dl_train
|
||||
else: dl = dl_val
|
||||
for i, (features_, labels_) in enumerate(dl):
|
||||
if i > DATA_LIMIT : break
|
||||
#t0 = time.process_time()
|
||||
|
||||
if reshape_in :
|
||||
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
|
||||
else:
|
||||
features, labels =features_, labels_
|
||||
|
||||
features, labels = features.to(device), labels.to(device)
|
||||
|
||||
#optimizer.begin()
|
||||
#optimizer.zero_grad()
|
||||
model.begin()
|
||||
model.zero_grad()
|
||||
pred = model.forward(features)
|
||||
|
||||
#loss = F.nll_loss(pred, labels)
|
||||
loss = F.cross_entropy(pred,labels)
|
||||
|
||||
#model.print_grad_fn()
|
||||
#optimizer.print_grad_fn()
|
||||
#print('-'*50)
|
||||
|
||||
loss.backward(create_graph=True)
|
||||
|
||||
#optimizer.step()
|
||||
if train_data: model.adjust()
|
||||
else: model.adjust_val()
|
||||
|
||||
#tf = time.process_time()
|
||||
#data = {
|
||||
# "time": tf - t0,
|
||||
# "iter": epoch * len(dl_train) + i,
|
||||
# "loss": loss.item(),
|
||||
# "params": {
|
||||
# k: v.item()
|
||||
# for k, v in model.optimizer.parameters.items()
|
||||
# if "." not in k
|
||||
# },
|
||||
#}
|
||||
#stats.append(data)
|
||||
|
||||
#print_torch_mem(i)
|
||||
return loss.item()
|
||||
|
||||
def train_v2(model, optimizer, epochs=3, reshape_in=True, device = torch.device('cuda')):
|
||||
log = []
|
||||
for epoch in range(epochs):
|
||||
|
||||
#dl_train.dataset.transform=torchvision.transforms.Compose([
|
||||
# torchvision.transforms.RandomAffine(degrees=model.param('mag'), translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
||||
# torchvision.transforms.ToTensor()
|
||||
#])
|
||||
viz_data(fig_name='res/data_sample')
|
||||
t0 = time.process_time()
|
||||
loss = train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device)
|
||||
train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device,train_data=False)
|
||||
|
||||
#acc = test(model=model, reshape_in=reshape_in, device=device)
|
||||
acc = 0
|
||||
|
||||
|
||||
tf = time.process_time()
|
||||
data = {
|
||||
"time": tf - t0,
|
||||
"epoch": epoch,
|
||||
"loss": loss,
|
||||
"acc": acc,
|
||||
"params": {
|
||||
k: v.item()
|
||||
for k, v in model.optimizer.parameters.items()
|
||||
#for k, v in model.mods.data_aug.params.named_parameters()
|
||||
if "." not in k
|
||||
|
||||
},
|
||||
}
|
||||
log.append(data)
|
||||
|
||||
|
||||
print("Epoch :",epoch+1, "/",epochs, "- Loss :",log[-1]["loss"])
|
||||
param = [p for p in model.param_grad() if p.grad is not None]
|
||||
if(len(param)!=0):
|
||||
print(param[-2],' / ', param[-2].grad)
|
||||
print(param[-1],' / ', param[-1].grad)
|
||||
return log
|
||||
|
||||
def train(model, epochs=3, height=1, reshape_in=True, device = torch.device('cuda')):
|
||||
stats = []
|
||||
for epoch in range(epochs):
|
||||
for i, (features_, labels_) in enumerate(dl_train):
|
||||
t0 = time.process_time()
|
||||
model.begin()
|
||||
if reshape_in :
|
||||
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
|
||||
else:
|
||||
features, labels =features_, labels_
|
||||
|
||||
features, labels = features.to(device), labels.to(device)
|
||||
|
||||
pred = model.forward(
|
||||
features
|
||||
) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
|
||||
#loss = F.nll_loss(pred, labels)
|
||||
loss = F.cross_entropy(pred,labels)
|
||||
|
||||
#print('-'*50)
|
||||
#param = [p for p in model.param_grad() if p.grad is not None]
|
||||
#if(len(param)!=0):
|
||||
# print(param[-2],' / ', param[-2].grad)
|
||||
# print(param[-1],' / ', param[-1].grad)
|
||||
|
||||
model.zero_grad()
|
||||
loss.backward(create_graph=True)
|
||||
model.adjust()
|
||||
tf = time.process_time()
|
||||
data = {
|
||||
"time": tf - t0,
|
||||
"iter": epoch * len(dl_train) + i,
|
||||
"loss": loss.item(),
|
||||
"params": {
|
||||
k: v.item()
|
||||
for k, v in model.optimizer.parameters.items()
|
||||
if "." not in k
|
||||
},
|
||||
}
|
||||
stats.append(data)
|
||||
|
||||
print('-'*50)
|
||||
i=0
|
||||
for obj in gc.get_objects():
|
||||
try:
|
||||
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)) and len(obj.size())>1:
|
||||
print(i, type(obj), obj.size())
|
||||
i+=1
|
||||
except:
|
||||
pass
|
||||
print("Epoch :",epoch+1, "/",epochs, "- Loss :",stats[-1]["loss"])
|
||||
param = [p for p in model.param_grad() if p.grad is not None]
|
||||
if(len(param)!=0):
|
||||
print(param[-2],' / ', param[-2].grad)
|
||||
print(param[-1],' / ', param[-1].grad)
|
||||
return stats
|
||||
|
||||
def run(opt, name="out", usr={}, epochs=10, height=1, cnn=True, device = torch.device('cuda')):
|
||||
torch.manual_seed(0x42)
|
||||
if not cnn:
|
||||
reshape_in = True
|
||||
#model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
|
||||
model = MNIST_FullyConnected_Augmented(28 * 28, 128, 10, opt, device=device)
|
||||
|
||||
else:
|
||||
reshape_in = False
|
||||
#model = LeNet(1, 10,opt, device)
|
||||
#model = LeNet_v2(1, 10,opt, device).to(device=device)
|
||||
model = LeNet_v2(3, 10,opt, device).to(device=device)
|
||||
optimizer=None
|
||||
'''
|
||||
m = LeNet_v3(1, 10)
|
||||
a = Data_aug()
|
||||
model = Augmented_model(model=m,
|
||||
data_augmenter=a,
|
||||
optimizer=opt).to(device) #deux fois le meme optimizer ?...
|
||||
'''
|
||||
'''
|
||||
m = LeNet_v3(1, 10)
|
||||
a = Data_aug()
|
||||
model = Augmented_model(model=m, data_augmenter=a).to(device)
|
||||
#optimizer = SGD(model.parameters())
|
||||
optimizer = SGD(model.parameters(), lr=0.01, height=1)
|
||||
'''
|
||||
|
||||
|
||||
#for idx, m in enumerate(model.modules()):
|
||||
# print(idx, '->', m)
|
||||
print("Running...", str(model))
|
||||
model.initialize()
|
||||
#print_model(model)
|
||||
#model.data_augmentation(False)
|
||||
#model.eval()
|
||||
|
||||
log = train_v2(model=model, optimizer=optimizer, epochs=epochs, reshape_in=reshape_in, device=device)
|
||||
model.eval()
|
||||
acc = test(model, reshape_in, device=device)
|
||||
|
||||
|
||||
#param = [p for p in model.param_grad() if p.grad is not None]
|
||||
#if(len(param)!=0):
|
||||
# print(param[-2],' / ', param[-2].grad)
|
||||
# print(param[-1],' / ', param[-1].grad)
|
||||
|
||||
out = {"acc": acc, "log": log, "usr": usr}
|
||||
with open("log/%s.json" % name, "w+") as f:
|
||||
json.dump(out, f, indent=True)
|
||||
times = [x["time"] for x in log]
|
||||
print("Times (ms):", np.mean(times), "+/-", np.std(times))
|
||||
print("Final accuracy:", acc)
|
||||
|
||||
#plot_res(log, fig_name='res/'+name)
|
||||
|
||||
return out
|
||||
|
||||
def make_adam_stack(height, top=0.0000001, device = torch.device('cuda')):
|
||||
#print(height,device)
|
||||
if height == 0:
|
||||
return Adam(alpha=top, device=device)
|
||||
return Adam(alpha=top, optimizer=make_adam_stack(height - 1, top, device=device), device=device)
|
||||
|
||||
def plot_res(log, fig_name='res'):
|
||||
|
||||
fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
|
||||
ax[0].set_title('Loss')
|
||||
ax[0].plot([x["loss"] for x in log])
|
||||
|
||||
ax[1].set_title('Acc')
|
||||
ax[1].plot([x["acc"] for x in log])
|
||||
|
||||
ax[2].set_title('mag')
|
||||
ax[2].plot([x["data_aug"] for x in log])
|
||||
|
||||
plt.savefig(fig_name)
|
||||
|
||||
def print_torch_mem(add_info=''):
|
||||
|
||||
nb=0
|
||||
max_size=0
|
||||
for obj in gc.get_objects():
|
||||
try:
|
||||
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
|
||||
#print(i, type(obj), obj.size())
|
||||
size = np.sum(obj.size())
|
||||
if(size>max_size): max_size=size
|
||||
nb+=1
|
||||
except:
|
||||
pass
|
||||
print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
|
||||
|
||||
def print_model(model, fig_name='graph/graph'): #Semble ne pas marcher pour les models en fonctionnel
|
||||
x = torch.randn(1,1,28,28, device=device)
|
||||
dot=make_dot(model(x), params=dict(model.named_parameters()))
|
||||
dot.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
|
||||
dot.render(fig_name)
|
||||
print("Model graph generated !")
|
||||
|
||||
def viz_data(fig_name='data_sample'):
|
||||
|
||||
features_, labels_ = next(iter(dl_train))
|
||||
plt.figure(figsize=(10,10))
|
||||
#for i, (features_, labels_) in enumerate(dl_train):
|
||||
for i in range(25):
|
||||
if i==25: break
|
||||
#print(features_.size(), labels_.size())
|
||||
|
||||
plt.subplot(5,5,i+1)
|
||||
plt.xticks([])
|
||||
plt.yticks([])
|
||||
plt.grid(False)
|
||||
|
||||
img = features_[i,0,:,:]
|
||||
|
||||
#print('im shape',img.shape)
|
||||
plt.imshow(img, cmap=plt.cm.binary)
|
||||
plt.xlabel(labels_[i].item())
|
||||
|
||||
plt.savefig(fig_name)
|
||||
|
||||
##########################################
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
os.mkdir("log")
|
||||
except:
|
||||
print("log/ exists already")
|
||||
|
||||
device = torch.device('cuda')
|
||||
|
||||
run(make_adam_stack(height=1, top=0.001, device=device),
|
||||
"Augmented_MNIST",
|
||||
epochs=100,
|
||||
cnn=True,
|
||||
device = device)
|
||||
print()
|
583
higher/dataug.py
Normal file
|
@ -0,0 +1,583 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.distributions import *
|
||||
|
||||
#import kornia
|
||||
#import random
|
||||
#import numpy as np
|
||||
import copy
|
||||
|
||||
import transformations as TF
|
||||
|
||||
class Data_aug(nn.Module): #Rotation parametree
|
||||
def __init__(self):
|
||||
super(Data_aug, self).__init__()
|
||||
self._data_augmentation = True
|
||||
self._params = nn.ParameterDict({
|
||||
"prob": nn.Parameter(torch.tensor(0.5)),
|
||||
"mag": nn.Parameter(torch.tensor(1.0))
|
||||
})
|
||||
|
||||
#self.params["mag"].register_hook(print)
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
if self._data_augmentation and random.random() < self._params["prob"]:
|
||||
#print('Aug')
|
||||
batch_size = x.shape[0]
|
||||
# create transformation (rotation)
|
||||
alpha = self._params["mag"]*180 # in degrees
|
||||
angle = torch.ones(batch_size, device=x.device) * alpha
|
||||
|
||||
# define the rotation center
|
||||
center = torch.ones(batch_size, 2, device=x.device)
|
||||
center[..., 0] = x.shape[3] / 2 # x
|
||||
center[..., 1] = x.shape[2] / 2 # y
|
||||
|
||||
#print(x.shape, center)
|
||||
# define the scale factor
|
||||
scale = torch.ones(batch_size, device=x.device)
|
||||
|
||||
# compute the transformation matrix
|
||||
M = kornia.get_rotation_matrix2d(center, angle, scale)
|
||||
|
||||
# apply the transformation to original image
|
||||
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
|
||||
|
||||
return x
|
||||
|
||||
def eval(self):
|
||||
self.augment(mode=False)
|
||||
nn.Module.eval(self)
|
||||
|
||||
def augment(self, mode=True):
|
||||
self._data_augmentation=mode
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._params[key]
|
||||
|
||||
def __str__(self):
|
||||
return "Data_aug(Mag-1 TF)"
|
||||
|
||||
class Data_augV2(nn.Module): #Methode exacte
|
||||
def __init__(self):
|
||||
super(Data_augV2, self).__init__()
|
||||
self._data_augmentation = True
|
||||
|
||||
self._fixed_transf=[0.0, 45.0, 180.0] #Degree rotation
|
||||
#self._fixed_transf=[0.0]
|
||||
self._nb_tf= len(self._fixed_transf)
|
||||
|
||||
self._params = nn.ParameterDict({
|
||||
"prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme
|
||||
#"prob2": nn.Parameter(torch.ones(len(self._fixed_transf)).softmax(dim=0))
|
||||
})
|
||||
|
||||
#print(self._params["prob"], self._params["prob2"])
|
||||
|
||||
self.transf_idx=0
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
if self._data_augmentation:
|
||||
#print('Aug',self._fixed_transf[self.transf_idx])
|
||||
device = x.device
|
||||
batch_size = x.shape[0]
|
||||
|
||||
# create transformation (rotation)
|
||||
#alpha = 180 # in degrees
|
||||
alpha = self._fixed_transf[self.transf_idx]
|
||||
angle = torch.ones(batch_size, device=device) * alpha
|
||||
|
||||
x = self.rotate(x,angle)
|
||||
|
||||
return x
|
||||
|
||||
def rotate(self, x, angle):
|
||||
|
||||
device = x.device
|
||||
batch_size = x.shape[0]
|
||||
# define the rotation center
|
||||
center = torch.ones(batch_size, 2, device=device)
|
||||
center[..., 0] = x.shape[3] / 2 # x
|
||||
center[..., 1] = x.shape[2] / 2 # y
|
||||
|
||||
#print(x.shape, center)
|
||||
# define the scale factor
|
||||
scale = torch.ones(batch_size, device=device)
|
||||
|
||||
# compute the transformation matrix
|
||||
M = kornia.get_rotation_matrix2d(center, angle, scale)
|
||||
|
||||
# apply the transformation to original image
|
||||
return kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
|
||||
|
||||
|
||||
def adjust_prob(self): #Detach from gradient ?
|
||||
self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0)
|
||||
#print('proba',self._params['prob'])
|
||||
self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
|
||||
#print('Sum p', sum(self._params['prob']))
|
||||
|
||||
def eval(self):
|
||||
self.augment(mode=False)
|
||||
nn.Module.eval(self)
|
||||
|
||||
def augment(self, mode=True):
|
||||
self._data_augmentation=mode
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._params[key]
|
||||
|
||||
def __str__(self):
|
||||
return "Data_augV2(Exact-%d TF)" % self._nb_tf
|
||||
|
||||
class Data_augV3(nn.Module): #Echantillonage uniforme/Mixte
|
||||
def __init__(self, mix_dist=0.0):
|
||||
super(Data_augV3, self).__init__()
|
||||
self._data_augmentation = True
|
||||
|
||||
#self._fixed_transf=[0.0, 45.0, 180.0] #Degree rotation
|
||||
self._fixed_transf=[0.0, 1.0, -1.0] #Flips (Identity,Horizontal,Vertical)
|
||||
#self._fixed_transf=[0.0]
|
||||
self._nb_tf= len(self._fixed_transf)
|
||||
|
||||
self._params = nn.ParameterDict({
|
||||
"prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme
|
||||
#"prob2": nn.Parameter(torch.ones(len(self._fixed_transf)).softmax(dim=0))
|
||||
})
|
||||
|
||||
#print(self._params["prob"], self._params["prob2"])
|
||||
self._sample = []
|
||||
|
||||
self._mix_dist = False
|
||||
if mix_dist != 0.0:
|
||||
self._mix_dist = True
|
||||
self._mix_factor = max(min(mix_dist, 1.0), 0.0)
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
if self._data_augmentation:
|
||||
device = x.device
|
||||
batch_size = x.shape[0]
|
||||
|
||||
|
||||
#good_distrib = Uniform(low=torch.zeros(batch_size,1, device=device),high=torch.new_full((batch_size,1),self._params["prob"], device=device))
|
||||
#bad_distrib = Uniform(low=torch.zeros(batch_size,1, device=device),high=torch.new_full((batch_size,1), 1-self._params["prob"], device=device))
|
||||
|
||||
#transform_dist = Categorical(probs=torch.tensor([self._params["prob"], 1-self._params["prob"]], device=device))
|
||||
#self._sample = transform_dist._sample(sample_shape=torch.Size([batch_size,1]))
|
||||
|
||||
uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=0)
|
||||
|
||||
if not self._mix_dist:
|
||||
distrib = uniforme_dist
|
||||
else:
|
||||
distrib = (self._mix_factor*self._params["prob"]+(1-self._mix_factor)*uniforme_dist).softmax(dim=0) #Mix distrib reel / uniforme avec mix_factor
|
||||
|
||||
cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*distrib)
|
||||
self._sample = cat_distrib.sample()
|
||||
|
||||
TF_param = torch.tensor([self._fixed_transf[x] for x in self._sample], device=device) #Approche de marco peut-etre plus rapide
|
||||
|
||||
#x = self.rotate(x,angle=TF_param)
|
||||
x = self.flip(x,flip_mat=TF_param)
|
||||
|
||||
return x
|
||||
|
||||
def rotate(self, x, angle):
|
||||
|
||||
device = x.device
|
||||
batch_size = x.shape[0]
|
||||
# define the rotation center
|
||||
center = torch.ones(batch_size, 2, device=device)
|
||||
center[..., 0] = x.shape[3] / 2 # x
|
||||
center[..., 1] = x.shape[2] / 2 # y
|
||||
|
||||
#print(x.shape, center)
|
||||
# define the scale factor
|
||||
scale = torch.ones(batch_size, device=device)
|
||||
|
||||
# compute the transformation matrix
|
||||
M = kornia.get_rotation_matrix2d(center, angle, scale)
|
||||
|
||||
# apply the transformation to original image
|
||||
return kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
|
||||
|
||||
def flip(self, x, flip_mat):
|
||||
|
||||
#print(flip_mat)
|
||||
device = x.device
|
||||
batch_size = x.shape[0]
|
||||
|
||||
h, w = x.shape[2], x.shape[3] # destination size
|
||||
#points_src = torch.ones(batch_size, 4, 2, device=device)
|
||||
#points_dst = torch.ones(batch_size, 4, 2, device=device)
|
||||
|
||||
#Identity
|
||||
iM=torch.tensor(np.eye(3))
|
||||
|
||||
#Horizontal flip
|
||||
# the source points are the region to crop corners
|
||||
#points_src = torch.FloatTensor([[
|
||||
# [w - 1, 0], [0, 0], [0, h - 1], [w - 1, h - 1],
|
||||
#]])
|
||||
# the destination points are the image vertexes
|
||||
#points_dst = torch.FloatTensor([[
|
||||
# [0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1],
|
||||
#]])
|
||||
# compute perspective transform
|
||||
#hM = kornia.get_perspective_transform(points_src, points_dst)
|
||||
hM =torch.tensor( [[[-1., 0., w-1],
|
||||
[ 0., 1., 0.],
|
||||
[ 0., 0., 1.]]])
|
||||
|
||||
#Vertical flip
|
||||
# the source points are the region to crop corners
|
||||
#points_src = torch.FloatTensor([[
|
||||
# [0, h - 1], [w - 1, h - 1], [w - 1, 0], [0, 0],
|
||||
#]])
|
||||
# the destination points are the image vertexes
|
||||
#points_dst = torch.FloatTensor([[
|
||||
# [0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1],
|
||||
#]])
|
||||
# compute perspective transform
|
||||
#vM = kornia.get_perspective_transform(points_src, points_dst)
|
||||
vM =torch.tensor( [[[ 1., 0., 0.],
|
||||
[ 0., -1., h-1],
|
||||
[ 0., 0., 1.]]])
|
||||
#print(vM)
|
||||
|
||||
M=torch.ones(batch_size, 3, 3, device=device)
|
||||
|
||||
for i in range(batch_size): # A optimiser
|
||||
if flip_mat[i]==0.0:
|
||||
M[i,]=iM
|
||||
elif flip_mat[i]==1.0:
|
||||
M[i,]=hM
|
||||
elif flip_mat[i]==-1.0:
|
||||
M[i,]=vM
|
||||
|
||||
# warp the original image by the found transform
|
||||
return kornia.warp_perspective(x, M, dsize=(h, w))
|
||||
|
||||
def adjust_prob(self, soft=False): #Detach from gradient ?
|
||||
|
||||
if soft :
|
||||
self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible
|
||||
else:
|
||||
#self._params['prob'].clamp(min=0.0,max=1.0)
|
||||
self._params['prob'].data = F.relu(self._params['prob'].data)
|
||||
#self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0)
|
||||
#print('proba',self._params['prob'])
|
||||
self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
|
||||
#print('Sum p', sum(self._params['prob']))
|
||||
|
||||
def loss_weight(self):
|
||||
#w_loss = [self._params["prob"][x] for x in self._sample]
|
||||
#print(self._sample.view(-1,1).shape)
|
||||
#print(self._sample[:10])
|
||||
|
||||
w_loss = torch.zeros((self._sample.shape[0],self._nb_tf), device=self._sample.device)
|
||||
w_loss.scatter_(1, self._sample.view(-1,1), 1)
|
||||
#print(w_loss.shape)
|
||||
#print(w_loss[:10,:])
|
||||
w_loss = w_loss * self._params["prob"]
|
||||
#print(w_loss.shape)
|
||||
#print(w_loss[:10,:])
|
||||
w_loss = torch.sum(w_loss,dim=1)
|
||||
#print(w_loss.shape)
|
||||
#print(w_loss[:10])
|
||||
return w_loss
|
||||
|
||||
def train(self, mode=None):
|
||||
if mode is None :
|
||||
mode=self._data_augmentation
|
||||
self.augment(mode=mode) #Inutile si mode=None
|
||||
super(Data_augV3, self).train(mode)
|
||||
|
||||
def eval(self):
|
||||
self.train(mode=False)
|
||||
#super(Augmented_model, self).eval()
|
||||
|
||||
def augment(self, mode=True):
|
||||
self._data_augmentation=mode
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._params[key]
|
||||
|
||||
def __str__(self):
|
||||
if not self._mix_dist:
|
||||
return "Data_augV3(Uniform-%d TF)" % self._nb_tf
|
||||
else:
|
||||
return "Data_augV3(Mix %.1f-%d TF)" % (self._mix_factor, self._nb_tf)
|
||||
|
||||
class Data_augV4(nn.Module): #Transformations avec mask
|
||||
def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mix_dist=0.0):
|
||||
super(Data_augV4, self).__init__()
|
||||
self._data_augmentation = True
|
||||
|
||||
#self._TF_matrix={}
|
||||
#self._input_info={'h':0, 'w':0, 'device':None} #Input associe a TF_matrix
|
||||
'''
|
||||
self._mag_fct={ #f(mag_normalise)=mag_reelle
|
||||
## Geometric TF ##
|
||||
'Identity' : (lambda mag: None),
|
||||
'FlipUD' : (lambda mag: None),
|
||||
'FlipLR' : (lambda mag: None),
|
||||
'Rotate': (lambda mag: random.randint(-int_parameter(mag, maxval=30), int_parameter(mag, maxval=30))),
|
||||
'TranslateX': (lambda mag: [random.randint(-int_parameter(mag, maxval=20), int_parameter(mag, maxval=20)), 0]),
|
||||
'TranslateY': (lambda mag: [0, random.randint(-int_parameter(mag, maxval=20), int_parameter(mag, maxval=20))]),
|
||||
'ShearX': (lambda mag: [random.uniform(-float_parameter(mag, maxval=0.3), float_parameter(mag, maxval=0.3)), 0]),
|
||||
'ShearY': (lambda mag: [0, random.uniform(-float_parameter(mag, maxval=0.3), float_parameter(mag, maxval=0.3))]),
|
||||
|
||||
## Color TF (Expect image in the range of [0, 1]) ##
|
||||
'Contrast': (lambda mag: random.uniform(0.1, float_parameter(mag, maxval=1.9))),
|
||||
'Color':(lambda mag: random.uniform(0.1, float_parameter(mag, maxval=1.9))),
|
||||
'Brightness':(lambda mag: random.uniform(1., float_parameter(mag, maxval=1.9))),
|
||||
'Sharpness':(lambda mag: random.uniform(0.1, float_parameter(mag, maxval=1.9))),
|
||||
'Posterize': (lambda mag: random.randint(4, int_parameter(mag, maxval=8))),
|
||||
'Solarize': (lambda mag: random.randint(1, int_parameter(mag, maxval=256))/256.), #=>Image entre [0,1] #Pas opti pour des batch
|
||||
|
||||
#Non fonctionnel
|
||||
'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
|
||||
#'Equalize': (lambda mag: None),
|
||||
}
|
||||
'''
|
||||
self._mag_fct = TF_dict
|
||||
self._TF=list(self._mag_fct.keys())
|
||||
self._nb_tf= len(self._TF)
|
||||
|
||||
self._fixed_mag=5 #[0, PARAMETER_MAX]
|
||||
self._params = nn.ParameterDict({
|
||||
"prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme
|
||||
})
|
||||
|
||||
self._sample = []
|
||||
|
||||
self._mix_dist = False
|
||||
if mix_dist != 0.0:
|
||||
self._mix_dist = True
|
||||
self._mix_factor = max(min(mix_dist, 1.0), 0.0)
|
||||
|
||||
def forward(self, x):
|
||||
if self._data_augmentation:
|
||||
device = x.device
|
||||
batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
|
||||
|
||||
|
||||
## Echantillonage ##
|
||||
uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=1)
|
||||
|
||||
if not self._mix_dist:
|
||||
self._distrib = uniforme_dist
|
||||
else:
|
||||
self._distrib = (self._mix_factor*self._params["prob"]+(1-self._mix_factor)*uniforme_dist).softmax(dim=1) #Mix distrib reel / uniforme avec mix_factor
|
||||
print(self.distrib.shape)
|
||||
|
||||
cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*self._distrib)
|
||||
self._sample = cat_distrib.sample()
|
||||
|
||||
## Transformations ##
|
||||
#'''
|
||||
x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
|
||||
smps_x=[]
|
||||
masks=[]
|
||||
for tf_idx in range(self._nb_tf):
|
||||
mask = self._sample==tf_idx #Create selection mask
|
||||
smp_x = x[mask] #torch.masked_select() ?
|
||||
|
||||
if smp_x.shape[0]!=0: #if there's data to TF
|
||||
magnitude=self._fixed_mag
|
||||
tf=self._TF[tf_idx]
|
||||
|
||||
## Geometric TF ##
|
||||
if tf=='Identity':
|
||||
pass
|
||||
elif tf=='FlipLR':
|
||||
smp_x = TF.flipLR(smp_x)
|
||||
elif tf=='FlipUD':
|
||||
smp_x = TF.flipUD(smp_x)
|
||||
elif tf=='Rotate':
|
||||
smp_x = TF.rotate(smp_x, angle=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
|
||||
elif tf=='TranslateX' or tf=='TranslateY':
|
||||
smp_x = TF.translate(smp_x, translation=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
|
||||
elif tf=='ShearX' or tf=='ShearY' :
|
||||
smp_x = TF.shear(smp_x, shear=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
|
||||
|
||||
## Color TF (Expect image in the range of [0, 1]) ##
|
||||
elif tf=='Contrast':
|
||||
smp_x = TF.contrast(smp_x, contrast_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
|
||||
elif tf=='Color':
|
||||
smp_x = TF.color(smp_x, color_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
|
||||
elif tf=='Brightness':
|
||||
smp_x = TF.brightness(smp_x, brightness_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
|
||||
elif tf=='Sharpness':
|
||||
smp_x = TF.sharpeness(smp_x, sharpness_factor=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
|
||||
elif tf=='Posterize':
|
||||
smp_x = TF.posterize(smp_x, bits=torch.tensor([1 for _ in smp_x], device=device))
|
||||
elif tf=='Solarize':
|
||||
smp_x = TF.solarize(smp_x, thresholds=torch.tensor([self._mag_fct[tf](magnitude) for _ in smp_x], device=device))
|
||||
elif tf=='Equalize':
|
||||
smp_x = TF.equalize(smp_x)
|
||||
elif tf=='Auto_Contrast':
|
||||
smp_x = TF.auto_contrast(smp_x)
|
||||
else:
|
||||
raise Exception("Invalid TF requested : ", tf)
|
||||
|
||||
x[mask]=smp_x # Refusionner eviter x[mask] : in place
|
||||
|
||||
#idx= mask.nonzero()
|
||||
#print('-'*8)
|
||||
#print(idx[0], tf_idx)
|
||||
#print(smp_x[0,])
|
||||
#x=x.view(-1,3*32*32)
|
||||
#x=x.scatter(dim=0, index=idx, src=smp_x.view(-1,3*32*32)) #Changement des Tensor mais pas visible sur la visualisation...
|
||||
#x=x.view(-1,3,32,32)
|
||||
#print(x[0,])
|
||||
|
||||
'''
|
||||
if len(self._TF_matrix)==0 or self._input_info['h']!=h or self._input_info['w']!=w or self._input_info['device']!=device: #Device different:Pas necessaire de tout recalculer
|
||||
self.compute_TF_matrix(sample_info={'h': x.shape[2],
|
||||
'w': x.shape[3],
|
||||
'device': x.device})
|
||||
|
||||
TF_matrix = torch.zeros(batch_size, 3, 3, device=device) #All geom TF
|
||||
|
||||
for tf_idx in range(self._nb_tf):
|
||||
mask = self._sample==tf_idx #Create selection mask
|
||||
TF_matrix[mask,]=self._TF_matrix[self._TF[tf_idx]]
|
||||
|
||||
x=kornia.warp_perspective(x, TF_matrix, dsize=(h, w))
|
||||
'''
|
||||
return x
|
||||
'''
|
||||
def compute_TF_matrix(self, magnitude=None, sample_info= None):
|
||||
print('Computing TF_matrix...')
|
||||
if not magnitude :
|
||||
magnitude=self._fixed_mag
|
||||
|
||||
if sample_info:
|
||||
self._input_info['h']= sample_info['h']
|
||||
self._input_info['w']= sample_info['w']
|
||||
self._input_info['device'] = sample_info['device']
|
||||
h, w, device= self._input_info['h'], self._input_info['w'], self._input_info['device']
|
||||
|
||||
self._TF_matrix={}
|
||||
for tf in self._TF :
|
||||
if tf=='Id':
|
||||
self._TF_matrix[tf]=torch.tensor([[[ 1., 0., 0.],
|
||||
[ 0., 1., 0.],
|
||||
[ 0., 0., 1.]]], device=device)
|
||||
elif tf=='Rot':
|
||||
center = torch.ones(1, 2, device=device)
|
||||
center[0, 0] = w / 2 # x
|
||||
center[0, 1] = h / 2 # y
|
||||
scale = torch.ones(1, device=device)
|
||||
angle = self._mag_fct[tf](magnitude) * torch.ones(1, device=device)
|
||||
R = kornia.get_rotation_matrix2d(center, angle, scale) #Rotation matrix (1,2,3)
|
||||
self._TF_matrix[tf]=torch.cat((R,torch.tensor([[[ 0., 0., 1.]]], device=device)), dim=1) #TF matrix (1,3,3)
|
||||
elif tf=='FlipLR':
|
||||
self._TF_matrix[tf]=torch.tensor([[[-1., 0., w-1],
|
||||
[ 0., 1., 0.],
|
||||
[ 0., 0., 1.]]], device=device)
|
||||
elif tf=='FlipUD':
|
||||
self._TF_matrix[tf]=torch.tensor([[[ 1., 0., 0.],
|
||||
[ 0., -1., h-1],
|
||||
[ 0., 0., 1.]]], device=device)
|
||||
else:
|
||||
raise Exception("Invalid TF requested")
|
||||
'''
|
||||
def adjust_prob(self, soft=False): #Detach from gradient ?
|
||||
|
||||
if soft :
|
||||
self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible
|
||||
else:
|
||||
#self._params['prob'].clamp(min=0.0,max=1.0)
|
||||
self._params['prob'].data = F.relu(self._params['prob'].data)
|
||||
#self._params['prob'].data = self._params['prob'].clamp(min=0.0,max=1.0)
|
||||
|
||||
self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
|
||||
|
||||
def loss_weight(self):
|
||||
w_loss = torch.zeros((self._sample.shape[0],self._nb_tf), device=self._sample.device)
|
||||
w_loss.scatter_(1, self._sample.view(-1,1), 1)
|
||||
w_loss = w_loss * self._params["prob"]/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss)
|
||||
w_loss = torch.sum(w_loss,dim=1)
|
||||
return w_loss
|
||||
|
||||
def train(self, mode=None):
|
||||
if mode is None :
|
||||
mode=self._data_augmentation
|
||||
self.augment(mode=mode) #Inutile si mode=None
|
||||
super(Data_augV4, self).train(mode)
|
||||
|
||||
def eval(self):
|
||||
self.train(mode=False)
|
||||
|
||||
def augment(self, mode=True):
|
||||
self._data_augmentation=mode
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._params[key]
|
||||
|
||||
def __str__(self):
|
||||
if not self._mix_dist:
|
||||
return "Data_augV4(Uniform-%d TF)" % self._nb_tf
|
||||
else:
|
||||
return "Data_augV4(Mix %.1f-%d TF)" % (self._mix_factor, self._nb_tf)
|
||||
|
||||
class Augmented_model(nn.Module):
|
||||
def __init__(self, data_augmenter, model):
|
||||
super(Augmented_model, self).__init__()
|
||||
|
||||
self._mods = nn.ModuleDict({
|
||||
'data_aug': data_augmenter,
|
||||
'model': model
|
||||
})
|
||||
|
||||
self.augment(mode=True)
|
||||
|
||||
def initialize(self):
|
||||
self._mods['model'].initialize()
|
||||
|
||||
def forward(self, x):
|
||||
return self._mods['model'](self._mods['data_aug'](x))
|
||||
|
||||
def augment(self, mode=True):
|
||||
self._data_augmentation=mode
|
||||
self._mods['data_aug'].augment(mode)
|
||||
|
||||
def train(self, mode=None):
|
||||
if mode is None :
|
||||
mode=self._data_augmentation
|
||||
self._mods['data_aug'].augment(mode)
|
||||
super(Augmented_model, self).train(mode)
|
||||
|
||||
def eval(self):
|
||||
self.train(mode=False)
|
||||
#super(Augmented_model, self).eval()
|
||||
|
||||
def items(self):
|
||||
"""Return an iterable of the ModuleDict key/value pairs.
|
||||
"""
|
||||
return self._mods.items()
|
||||
|
||||
def update(self, modules):
|
||||
self._mods.update(modules)
|
||||
|
||||
def is_augmenting(self):
|
||||
return self._data_augmentation
|
||||
|
||||
def TF_names(self):
|
||||
try:
|
||||
return self._mods['data_aug']._TF
|
||||
except:
|
||||
return None
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._mods[key]
|
||||
|
||||
def __str__(self):
|
||||
return "Aug_mod("+str(self._mods['data_aug'])+"-"+str(self._mods['model'])+")"
|
51
higher/model.py
Normal file
|
@ -0,0 +1,51 @@
|
|||
import math
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class LeNet(nn.Module):
|
||||
def __init__(self, num_inp, num_out):
|
||||
super(LeNet, self).__init__()
|
||||
self._params = nn.ParameterDict({
|
||||
'w1': nn.Parameter(torch.zeros(20, num_inp, 5, 5)),
|
||||
'b1': nn.Parameter(torch.zeros(20)),
|
||||
'w2': nn.Parameter(torch.zeros(50, 20, 5, 5)),
|
||||
'b2': nn.Parameter(torch.zeros(50)),
|
||||
#'w3': nn.Parameter(torch.zeros(500,4*4*50)), #num_imp=1
|
||||
'w3': nn.Parameter(torch.zeros(500,5*5*50)), #num_imp=3
|
||||
'b3': nn.Parameter(torch.zeros(500)),
|
||||
'w4': nn.Parameter(torch.zeros(num_out, 500)),
|
||||
'b4': nn.Parameter(torch.zeros(num_out))
|
||||
})
|
||||
self.initialize()
|
||||
|
||||
|
||||
def initialize(self):
|
||||
nn.init.kaiming_uniform_(self._params["w1"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self._params["w2"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self._params["w3"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self._params["w4"], a=math.sqrt(5))
|
||||
|
||||
def forward(self, x):
|
||||
#print("Start Shape ", x.shape)
|
||||
out = F.relu(F.conv2d(input=x, weight=self._params["w1"], bias=self._params["b1"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.conv2d(input=out, weight=self._params["w2"], bias=self._params["b2"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = out.view(out.size(0), -1)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.linear(out, self._params["w3"], self._params["b3"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.linear(out, self._params["w4"], self._params["b4"])
|
||||
#print("Shape ", out.shape)
|
||||
return F.log_softmax(out, dim=1)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._params[key]
|
||||
|
||||
def __str__(self):
|
||||
return "LeNet"
|
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 118 KiB |
After Width: | Height: | Size: 45 KiB |
After Width: | Height: | Size: 55 KiB |
After Width: | Height: | Size: 65 KiB |
BIN
higher/res/LeNet-100 epochs.png
Normal file
After Width: | Height: | Size: 40 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 40 KiB |
After Width: | Height: | Size: 41 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 46 KiB |
After Width: | Height: | Size: 53 KiB |
BIN
higher/res/MNIST/LeNet-10 epochs.png
Normal file
After Width: | Height: | Size: 42 KiB |
764
higher/test_dataug.py
Normal file
|
@ -0,0 +1,764 @@
|
|||
from torch.utils.data import SubsetRandomSampler
|
||||
import torch.optim as optim
|
||||
import torchvision
|
||||
import higher
|
||||
|
||||
from model import *
|
||||
from dataug import *
|
||||
from utils import *
|
||||
|
||||
BATCH_SIZE = 300
|
||||
#TEST_SIZE = 300
|
||||
TEST_SIZE = 10000
|
||||
|
||||
#ATTENTION : Dataug (Kornia) Expect image in the range of [0, 1]
|
||||
transform = torchvision.transforms.Compose([
|
||||
torchvision.transforms.ToTensor(),
|
||||
#torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #CIFAR10
|
||||
])
|
||||
'''
|
||||
data_train = torchvision.datasets.MNIST(
|
||||
"./data", train=True, download=True,
|
||||
transform=torchvision.transforms.Compose([
|
||||
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
||||
torchvision.transforms.ToTensor()
|
||||
])
|
||||
)
|
||||
data_test = torchvision.datasets.MNIST(
|
||||
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
||||
)
|
||||
'''
|
||||
data_train = torchvision.datasets.CIFAR10(
|
||||
"./data", train=True, download=True, transform=transform
|
||||
)
|
||||
data_test = torchvision.datasets.CIFAR10(
|
||||
"./data", train=False, download=True, transform=transform
|
||||
)
|
||||
#'''
|
||||
train_subset_indices=range(int(len(data_train)/2))
|
||||
#train_subset_indices=range(BATCH_SIZE*10)
|
||||
val_subset_indices=range(int(len(data_train)/2),len(data_train))
|
||||
|
||||
dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
|
||||
dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
|
||||
dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False)
|
||||
|
||||
device = torch.device('cuda')
|
||||
|
||||
if device == torch.device('cpu'):
|
||||
device_name = 'CPU'
|
||||
else:
|
||||
device_name = torch.cuda.get_device_name(device)
|
||||
|
||||
|
||||
def test(model):
|
||||
model.eval()
|
||||
for i, (features, labels) in enumerate(dl_test):
|
||||
features,labels = features.to(device), labels.to(device)
|
||||
|
||||
pred = model.forward(features)
|
||||
return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
|
||||
|
||||
def compute_vaLoss(model, dl_val_it):
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
|
||||
|
||||
try:
|
||||
model.augment(mode=False) #Validation sans transfornations !
|
||||
except:
|
||||
pass
|
||||
return F.cross_entropy(model(xs_val), ys_val)
|
||||
|
||||
def train_classic(model, epochs=1):
|
||||
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
|
||||
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
|
||||
|
||||
model.train()
|
||||
dl_val_it = iter(dl_val)
|
||||
log = []
|
||||
for epoch in range(epochs):
|
||||
print_torch_mem("Start epoch")
|
||||
t0 = time.process_time()
|
||||
for i, (features, labels) in enumerate(dl_train):
|
||||
#print_torch_mem("Start iter")
|
||||
features,labels = features.to(device), labels.to(device)
|
||||
|
||||
optim.zero_grad()
|
||||
pred = model.forward(features)
|
||||
loss = F.cross_entropy(pred,labels)
|
||||
loss.backward()
|
||||
optim.step()
|
||||
|
||||
#### Tests ####
|
||||
tf = time.process_time()
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
|
||||
|
||||
val_loss = F.cross_entropy(model(xs_val), ys_val)
|
||||
accuracy=test(model)
|
||||
model.train()
|
||||
#### Log ####
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": None,
|
||||
}
|
||||
log.append(data)
|
||||
|
||||
return log
|
||||
|
||||
def train_classic_higher(model, epochs=1):
|
||||
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
|
||||
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
|
||||
|
||||
model.train()
|
||||
dl_val_it = iter(dl_val)
|
||||
log = []
|
||||
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
diffopt = higher.optim.get_diff_optim(optim, model.parameters(),fmodel=fmodel,track_higher_grads=False)
|
||||
#with higher.innerloop_ctx(model, optim, copy_initial_weights=True, track_higher_grads=False) as (fmodel, diffopt):
|
||||
|
||||
for epoch in range(epochs):
|
||||
print_torch_mem("Start epoch "+str(epoch))
|
||||
print("Fast param ",len(fmodel._fast_params))
|
||||
t0 = time.process_time()
|
||||
for i, (features, labels) in enumerate(dl_train):
|
||||
#print_torch_mem("Start iter")
|
||||
features,labels = features.to(device), labels.to(device)
|
||||
|
||||
#optim.zero_grad()
|
||||
pred = fmodel.forward(features)
|
||||
loss = F.cross_entropy(pred,labels)
|
||||
#.backward()
|
||||
#optim.step()
|
||||
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
|
||||
|
||||
model_copy(src=fmodel, dst=model, patch_copy=False)
|
||||
optim_copy(dopt=diffopt, opt=optim)
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
diffopt = higher.optim.get_diff_optim(optim, model.parameters(),fmodel=fmodel,track_higher_grads=False)
|
||||
|
||||
#### Tests ####
|
||||
tf = time.process_time()
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
|
||||
|
||||
val_loss = F.cross_entropy(model(xs_val), ys_val)
|
||||
accuracy=test(model)
|
||||
model.train()
|
||||
#### Log ####
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": None,
|
||||
}
|
||||
log.append(data)
|
||||
|
||||
return log
|
||||
|
||||
def train_classic_tests(model, epochs=1):
|
||||
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
|
||||
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
|
||||
|
||||
countcopy=0
|
||||
model.train()
|
||||
dl_val_it = iter(dl_val)
|
||||
log = []
|
||||
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
doptim = higher.optim.get_diff_optim(optim, model.parameters(), fmodel=fmodel, track_higher_grads=False)
|
||||
for epoch in range(epochs):
|
||||
print_torch_mem("Start epoch")
|
||||
print(len(fmodel._fast_params))
|
||||
t0 = time.process_time()
|
||||
#with higher.innerloop_ctx(model, optim, copy_initial_weights=True, track_higher_grads=True) as (fmodel, doptim):
|
||||
|
||||
#fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
#doptim = higher.optim.get_diff_optim(optim, model.parameters(), track_higher_grads=True)
|
||||
|
||||
for i, (features, labels) in enumerate(dl_train):
|
||||
features,labels = features.to(device), labels.to(device)
|
||||
|
||||
#with higher.innerloop_ctx(model, optim, copy_initial_weights=True, track_higher_grads=False) as (fmodel, doptim):
|
||||
|
||||
|
||||
#optim.zero_grad()
|
||||
pred = fmodel.forward(features)
|
||||
loss = F.cross_entropy(pred,labels)
|
||||
doptim.step(loss) #(opt.zero_grad, loss.backward, opt.step)
|
||||
#loss.backward()
|
||||
#new_params = doptim.step(loss, params=fmodel.parameters())
|
||||
#fmodel.update_params(new_params)
|
||||
|
||||
|
||||
#print('Fast param',len(fmodel._fast_params))
|
||||
#print('opt state', type(doptim.state[0][0]['momentum_buffer']), doptim.state[0][2]['momentum_buffer'].shape)
|
||||
|
||||
if False or (len(fmodel._fast_params)>1):
|
||||
print("fmodel fast param",len(fmodel._fast_params))
|
||||
'''
|
||||
#val_loss = F.cross_entropy(fmodel(features), labels)
|
||||
|
||||
#print_graph(val_loss)
|
||||
|
||||
#val_loss.backward()
|
||||
#print('bip')
|
||||
|
||||
tmp = fmodel.parameters()
|
||||
|
||||
#print(list(tmp)[1])
|
||||
tmp = [higher.utils._copy_tensor(t,safe_copy=True) if isinstance(t, torch.Tensor) else t for t in tmp]
|
||||
#print(len(tmp))
|
||||
|
||||
#fmodel._fast_params.clear()
|
||||
del fmodel._fast_params
|
||||
fmodel._fast_params=None
|
||||
|
||||
fmodel.fast_params=tmp # Surcharge la memoire
|
||||
#fmodel.update_params(tmp) #Meilleur perf / Surcharge la memoire avec trach higher grad
|
||||
|
||||
#optim._fmodel=fmodel
|
||||
'''
|
||||
|
||||
|
||||
countcopy+=1
|
||||
model_copy(src=fmodel, dst=model, patch_copy=False)
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
#doptim.detach_dyn()
|
||||
#tmp = doptim.state
|
||||
#tmp = doptim.state_dict()
|
||||
#for k, v in tmp['state'].items():
|
||||
# print('dict',k, type(v))
|
||||
|
||||
a = optim.param_groups[0]['params'][0]
|
||||
state = optim.state[a]
|
||||
#state['momentum_buffer'] = None
|
||||
#print('opt state', type(optim.state[a]), len(optim.state[a]))
|
||||
#optim.load_state_dict(tmp)
|
||||
|
||||
|
||||
for group_idx, group in enumerate(optim.param_groups):
|
||||
# print('gp idx',group_idx)
|
||||
for p_idx, p in enumerate(group['params']):
|
||||
optim.state[p]=doptim.state[group_idx][p_idx]
|
||||
|
||||
#print('opt state', type(optim.state[a]['momentum_buffer']), optim.state[a]['momentum_buffer'][0:10])
|
||||
#print('dopt state', type(doptim.state[0][0]['momentum_buffer']), doptim.state[0][0]['momentum_buffer'][0:10])
|
||||
'''
|
||||
for a in tmp:
|
||||
#print(type(a), len(a))
|
||||
for nb, b in a.items():
|
||||
#print(nb, type(b), len(b))
|
||||
for n, state in b.items():
|
||||
#print(n, type(states))
|
||||
#print(state.grad_fn)
|
||||
state = torch.tensor(state.data).requires_grad_()
|
||||
#print(state.grad_fn)
|
||||
'''
|
||||
|
||||
|
||||
doptim = higher.optim.get_diff_optim(optim, model.parameters(), track_higher_grads=True)
|
||||
#doptim.state = tmp
|
||||
|
||||
|
||||
countcopy+=1
|
||||
model_copy(src=fmodel, dst=model)
|
||||
optim_copy(dopt=diffopt, opt=inner_opt)
|
||||
|
||||
#### Tests ####
|
||||
tf = time.process_time()
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
|
||||
|
||||
val_loss = F.cross_entropy(model(xs_val), ys_val)
|
||||
accuracy=test(model)
|
||||
model.train()
|
||||
#### Log ####
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": None,
|
||||
}
|
||||
log.append(data)
|
||||
|
||||
#countcopy+=1
|
||||
#model_copy(src=fmodel, dst=model, patch_copy=False)
|
||||
#optim.load_state_dict(doptim.state_dict()) #Besoin sauver etat otpim ?
|
||||
|
||||
print("Copy ", countcopy)
|
||||
return log
|
||||
|
||||
def run_simple_dataug(inner_it, epochs=1):
|
||||
|
||||
dl_train_it = iter(dl_train)
|
||||
dl_val_it = iter(dl_val)
|
||||
|
||||
#aug_model = nn.Sequential(
|
||||
# Data_aug(),
|
||||
# LeNet(1,10),
|
||||
# )
|
||||
aug_model = Augmented_model(Data_aug(), LeNet(1,10)).to(device)
|
||||
print(str(aug_model))
|
||||
meta_opt = torch.optim.Adam(aug_model['data_aug'].parameters(), lr=1e-2)
|
||||
inner_opt = torch.optim.SGD(aug_model['model'].parameters(), lr=1e-2, momentum=0.9)
|
||||
|
||||
log = []
|
||||
t0 = time.process_time()
|
||||
|
||||
epoch = 0
|
||||
while epoch < epochs:
|
||||
meta_opt.zero_grad()
|
||||
aug_model.train()
|
||||
with higher.innerloop_ctx(aug_model, inner_opt, copy_initial_weights=True, track_higher_grads=True) as (fmodel, diffopt): #effet copy_initial_weight pas clair...
|
||||
|
||||
for i in range(n_inner_iter):
|
||||
try:
|
||||
xs, ys = next(dl_train_it)
|
||||
except StopIteration: #Fin epoch train
|
||||
tf = time.process_time()
|
||||
epoch +=1
|
||||
dl_train_it = iter(dl_train)
|
||||
xs, ys = next(dl_train_it)
|
||||
|
||||
accuracy=test(aug_model)
|
||||
aug_model.train()
|
||||
|
||||
#### Print ####
|
||||
print('-'*9)
|
||||
print('Epoch %d/%d'%(epoch,epochs))
|
||||
print('train loss',loss.item(), '/ val loss', val_loss.item())
|
||||
print('acc', accuracy)
|
||||
print('mag', aug_model['data_aug']['mag'].item())
|
||||
|
||||
#### Log ####
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": aug_model['data_aug']['mag'].item(),
|
||||
}
|
||||
log.append(data)
|
||||
t0 = time.process_time()
|
||||
|
||||
xs, ys = xs.to(device), ys.to(device)
|
||||
|
||||
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
|
||||
|
||||
loss = F.cross_entropy(logits, ys) # no need to call loss.backwards()
|
||||
#loss.backward(retain_graph=True)
|
||||
#print(fmodel['model']._params['b4'].grad)
|
||||
#print('mag', fmodel['data_aug']['mag'].grad)
|
||||
|
||||
diffopt.step(loss) # note that `step` must take `loss` as an argument!
|
||||
# The line above gets P[t+1] from P[t] and loss[t]. `step` also returns
|
||||
# these new parameters, as an alternative to getting them from
|
||||
# `fmodel.fast_params` or `fmodel.parameters()` after calling
|
||||
# `diffopt.step`.
|
||||
|
||||
# At this point, or at any point in the iteration, you can take the
|
||||
# gradient of `fmodel.parameters()` (or equivalently
|
||||
# `fmodel.fast_params`) w.r.t. `fmodel.parameters(time=0)` (equivalently
|
||||
# `fmodel.init_fast_params`). i.e. `fast_params` will always have
|
||||
# `grad_fn` as an attribute, and be part of the gradient tape.
|
||||
|
||||
# At the end of your inner loop you can obtain these e.g. ...
|
||||
#grad_of_grads = torch.autograd.grad(
|
||||
# meta_loss_fn(fmodel.parameters()), fmodel.parameters(time=0))
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
|
||||
|
||||
fmodel.augment(mode=False)
|
||||
val_logits = fmodel(xs_val) #Validation sans transfornations !
|
||||
val_loss = F.cross_entropy(val_logits, ys_val)
|
||||
#print('val_loss',val_loss.item())
|
||||
val_loss.backward()
|
||||
|
||||
#print('mag', fmodel['data_aug']['mag'], '/', fmodel['data_aug']['mag'].grad)
|
||||
|
||||
#model=copy.deepcopy(fmodel)
|
||||
aug_model.load_state_dict(fmodel.state_dict()) #Do not copy gradient !
|
||||
#Copie des gradients
|
||||
for paramName, paramValue, in fmodel.named_parameters():
|
||||
for netCopyName, netCopyValue, in aug_model.named_parameters():
|
||||
if paramName == netCopyName:
|
||||
netCopyValue.grad = paramValue.grad
|
||||
|
||||
#print('mag', aug_model['data_aug']['mag'], '/', aug_model['data_aug']['mag'].grad)
|
||||
meta_opt.step()
|
||||
|
||||
plot_res(log, fig_name="res/{}-{} epochs- {} in_it".format(str(aug_model),epochs,inner_it))
|
||||
print('-'*9)
|
||||
times = [x["time"] for x in log]
|
||||
print(str(aug_model),": acc", max([x["acc"] for x in log]), "in (ms):", np.mean(times), "+/-", np.std(times))
|
||||
|
||||
def run_dist_dataug(model, epochs=1, inner_it=1, dataug_epoch_start=0):
|
||||
|
||||
dl_train_it = iter(dl_train)
|
||||
dl_val_it = iter(dl_val)
|
||||
|
||||
meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=1e-3)
|
||||
inner_opt = torch.optim.SGD(model['model'].parameters(), lr=1e-2, momentum=0.9)
|
||||
|
||||
high_grad_track = True
|
||||
if dataug_epoch_start>0:
|
||||
model.augment(mode=False)
|
||||
high_grad_track = False
|
||||
|
||||
model.train()
|
||||
|
||||
log = []
|
||||
t0 = time.process_time()
|
||||
|
||||
countcopy=0
|
||||
val_loss=torch.tensor(0)
|
||||
opt_param=None
|
||||
|
||||
epoch = 0
|
||||
while epoch < epochs:
|
||||
meta_opt.zero_grad()
|
||||
with higher.innerloop_ctx(model, inner_opt, copy_initial_weights=True, override=opt_param, track_higher_grads=high_grad_track) as (fmodel, diffopt): #effet copy_initial_weight pas clair...
|
||||
|
||||
for i in range(n_inner_iter):
|
||||
try:
|
||||
xs, ys = next(dl_train_it)
|
||||
except StopIteration: #Fin epoch train
|
||||
tf = time.process_time()
|
||||
epoch +=1
|
||||
dl_train_it = iter(dl_train)
|
||||
xs, ys = next(dl_train_it)
|
||||
|
||||
#viz_sample_data(imgs=xs, labels=ys, fig_name='samples/data_sample_epoch{}_noTF'.format(epoch))
|
||||
#viz_sample_data(imgs=aug_model['data_aug'](xs), labels=ys, fig_name='samples/data_sample_epoch{}'.format(epoch))
|
||||
|
||||
accuracy=test(model)
|
||||
model.train()
|
||||
|
||||
#### Print ####
|
||||
print('-'*9)
|
||||
print('Epoch : %d/%d'%(epoch,epochs))
|
||||
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
|
||||
print('Accuracy :', accuracy)
|
||||
print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
|
||||
print('TF Proba :', model['data_aug']['prob'].data)
|
||||
#print('proba grad',aug_model['data_aug']['prob'].grad)
|
||||
#############
|
||||
#### Log ####
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": [p for p in model['data_aug']['prob']],
|
||||
}
|
||||
log.append(data)
|
||||
#############
|
||||
|
||||
if epoch == dataug_epoch_start:
|
||||
print('Starting Data Augmention...')
|
||||
model.augment(mode=True)
|
||||
high_grad_track = True
|
||||
|
||||
t0 = time.process_time()
|
||||
|
||||
xs, ys = xs.to(device), ys.to(device)
|
||||
|
||||
'''
|
||||
#Methode exacte
|
||||
final_loss = 0
|
||||
for tf_idx in range(fmodel['data_aug']._nb_tf):
|
||||
fmodel['data_aug'].transf_idx=tf_idx
|
||||
logits = fmodel(xs)
|
||||
loss = F.cross_entropy(logits, ys)
|
||||
#loss.backward(retain_graph=True)
|
||||
#print('idx', tf_idx)
|
||||
#print(fmodel['data_aug']['prob'][tf_idx], fmodel['data_aug']['prob'][tf_idx].grad)
|
||||
final_loss += loss*fmodel['data_aug']['prob'][tf_idx] #Take it in the forward function ?
|
||||
|
||||
loss = final_loss
|
||||
'''
|
||||
#Methode uniforme
|
||||
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
|
||||
loss = F.cross_entropy(logits, ys, reduction='none') # no need to call loss.backwards()
|
||||
if fmodel._data_augmentation: #Weight loss
|
||||
w_loss = fmodel['data_aug'].loss_weight().to(device)
|
||||
loss = loss * w_loss
|
||||
loss = loss.mean()
|
||||
#'''
|
||||
|
||||
#to visualize computational graph
|
||||
#print_graph(loss)
|
||||
|
||||
#loss.backward(retain_graph=True)
|
||||
#print(fmodel['model']._params['b4'].grad)
|
||||
#print('prob grad', fmodel['data_aug']['prob'].grad)
|
||||
|
||||
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
|
||||
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
|
||||
|
||||
fmodel.augment(mode=False) #Validation sans transfornations !
|
||||
val_loss = F.cross_entropy(fmodel(xs_val), ys_val)
|
||||
|
||||
#print_graph(val_loss)
|
||||
|
||||
val_loss.backward()
|
||||
|
||||
countcopy+=1
|
||||
model_copy(src=fmodel, dst=model)
|
||||
optim_copy(dopt=diffopt, opt=inner_opt)
|
||||
|
||||
meta_opt.step()
|
||||
model['data_aug'].adjust_prob() #Contrainte sum(proba)=1
|
||||
|
||||
print("Copy ", countcopy)
|
||||
return log
|
||||
|
||||
def run_dist_dataugV2(model, epochs=1, inner_it=0, dataug_epoch_start=0, print_freq=1, loss_patience=None):
|
||||
|
||||
log = []
|
||||
countcopy=0
|
||||
val_loss=torch.tensor(0) #Necessaire si pas de metastep sur une epoch
|
||||
dl_val_it = iter(dl_val)
|
||||
|
||||
meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=1e-2)
|
||||
inner_opt = torch.optim.SGD(model['model'].parameters(), lr=1e-2, momentum=0.9)
|
||||
|
||||
high_grad_track = True
|
||||
if inner_it == 0:
|
||||
high_grad_track=False
|
||||
if dataug_epoch_start!=0:
|
||||
model.augment(mode=False)
|
||||
high_grad_track = False
|
||||
|
||||
val_loss_monitor= None
|
||||
if loss_patience != None :
|
||||
if dataug_epoch_start==-1: val_loss_monitor = loss_monitor(patience=loss_patience, end_train=2) #1st limit = dataug start
|
||||
else: val_loss_monitor = loss_monitor(patience=loss_patience) #Val loss monitor
|
||||
|
||||
model.train()
|
||||
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel,track_higher_grads=high_grad_track)
|
||||
|
||||
for epoch in range(1, epochs+1):
|
||||
#print_torch_mem("Start epoch "+str(epoch))
|
||||
#print(high_grad_track, fmodel._data_augmentation, len(fmodel._fast_params))
|
||||
t0 = time.process_time()
|
||||
#with higher.innerloop_ctx(model, inner_opt, copy_initial_weights=True, override=opt_param, track_higher_grads=high_grad_track) as (fmodel, diffopt):
|
||||
|
||||
for i, (xs, ys) in enumerate(dl_train):
|
||||
xs, ys = xs.to(device), ys.to(device)
|
||||
'''
|
||||
#Methode exacte
|
||||
final_loss = 0
|
||||
for tf_idx in range(fmodel['data_aug']._nb_tf):
|
||||
fmodel['data_aug'].transf_idx=tf_idx
|
||||
logits = fmodel(xs)
|
||||
loss = F.cross_entropy(logits, ys)
|
||||
#loss.backward(retain_graph=True)
|
||||
#print('idx', tf_idx)
|
||||
#print(fmodel['data_aug']['prob'][tf_idx], fmodel['data_aug']['prob'][tf_idx].grad)
|
||||
final_loss += loss*fmodel['data_aug']['prob'][tf_idx] #Take it in the forward function ?
|
||||
|
||||
loss = final_loss
|
||||
'''
|
||||
#Methode uniforme
|
||||
|
||||
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
|
||||
loss = F.cross_entropy(logits, ys, reduction='none') # no need to call loss.backwards()
|
||||
#PAS PONDERE LOSS POUR DIST MIX
|
||||
if fmodel._data_augmentation: # and not fmodel['data_aug']._mix_dist: #Weight loss
|
||||
w_loss = fmodel['data_aug'].loss_weight().to(device)
|
||||
loss = loss * w_loss
|
||||
loss = loss.mean()
|
||||
#'''
|
||||
|
||||
#to visualize computational graph
|
||||
#print_graph(loss)
|
||||
|
||||
#loss.backward(retain_graph=True)
|
||||
#print(fmodel['model']._params['b4'].grad)
|
||||
#print('prob grad', fmodel['data_aug']['prob'].grad)
|
||||
|
||||
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
|
||||
|
||||
if(high_grad_track and i%inner_it==0): #Perform Meta step
|
||||
#print("meta")
|
||||
#Peu utile si high_grad_track = False
|
||||
val_loss = compute_vaLoss(model=fmodel, dl_val_it=dl_val_it)
|
||||
|
||||
#print_graph(val_loss)
|
||||
|
||||
val_loss.backward()
|
||||
|
||||
countcopy+=1
|
||||
model_copy(src=fmodel, dst=model)
|
||||
optim_copy(dopt=diffopt, opt=inner_opt)
|
||||
|
||||
meta_opt.step()
|
||||
model['data_aug'].adjust_prob(soft=False) #Contrainte sum(proba)=1
|
||||
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
|
||||
|
||||
tf = time.process_time()
|
||||
|
||||
#viz_sample_data(imgs=xs, labels=ys, fig_name='samples/data_sample_epoch{}_noTF'.format(epoch))
|
||||
#viz_sample_data(imgs=aug_model['data_aug'](xs), labels=ys, fig_name='samples/data_sample_epoch{}'.format(epoch))
|
||||
|
||||
if(not high_grad_track):
|
||||
countcopy+=1
|
||||
model_copy(src=fmodel, dst=model)
|
||||
optim_copy(dopt=diffopt, opt=inner_opt)
|
||||
val_loss = compute_vaLoss(model=fmodel, dl_val_it=dl_val_it)
|
||||
|
||||
#Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False)
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
|
||||
|
||||
accuracy=test(model)
|
||||
model.train()
|
||||
|
||||
#### Print ####
|
||||
if(print_freq and epoch%print_freq==0):
|
||||
print('-'*9)
|
||||
print('Epoch : %d/%d'%(epoch,epochs))
|
||||
print('Time : %.00f ms'%(tf - t0))
|
||||
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
|
||||
print('Accuracy :', accuracy)
|
||||
print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
|
||||
print('TF Proba :', model['data_aug']['prob'].data)
|
||||
#print('proba grad',aug_model['data_aug']['prob'].grad)
|
||||
#############
|
||||
#### Log ####
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": [p.item() for p in model['data_aug']['prob']],
|
||||
}
|
||||
log.append(data)
|
||||
#############
|
||||
if val_loss_monitor :
|
||||
val_loss_monitor.register(val_loss.item())
|
||||
if val_loss_monitor.end_training(): break #Stop training
|
||||
|
||||
|
||||
if not model.is_augmenting() and (epoch == dataug_epoch_start or (val_loss_monitor and val_loss_monitor.limit_reached()==1)):
|
||||
print('Starting Data Augmention...')
|
||||
dataug_epoch_start = epoch
|
||||
model.augment(mode=True)
|
||||
if inner_it != 0: high_grad_track = True
|
||||
|
||||
print("Copy ", countcopy)
|
||||
return log
|
||||
|
||||
##########################################
|
||||
if __name__ == "__main__":
|
||||
|
||||
n_inner_iter = 0
|
||||
epochs = 2
|
||||
dataug_epoch_start=0
|
||||
|
||||
#### Classic ####
|
||||
'''
|
||||
model = LeNet(3,10).to(device)
|
||||
#model = torchvision.models.resnet18()
|
||||
#model = Augmented_model(Data_augV3(mix_dist=0.0), LeNet(3,10)).to(device)
|
||||
#model.augment(mode=False)
|
||||
|
||||
print(str(model), 'on', device_name)
|
||||
log= train_classic_higher(model=model, epochs=epochs)
|
||||
|
||||
####
|
||||
plot_res(log, fig_name="res/{}-{} epochs".format(str(model),epochs))
|
||||
print('-'*9)
|
||||
times = [x["time"] for x in log]
|
||||
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times)), "Device": device_name, "Log": log}
|
||||
print(str(model),": acc", out["Accuracy"], "in (ms):", out["Time"][0], "+/-", out["Time"][1])
|
||||
with open("res/log/%s.json" % "{}-{} epochs".format(str(model),epochs), "w+") as f:
|
||||
json.dump(out, f, indent=True)
|
||||
print('Log :\"',f.name, '\" saved !')
|
||||
print('-'*9)
|
||||
'''
|
||||
#### Augmented Model ####
|
||||
#'''
|
||||
aug_model = Augmented_model(Data_augV4(TF_dict=TF.TF_dict, mix_dist=0.0), LeNet(3,10)).to(device)
|
||||
print(str(aug_model), 'on', device_name)
|
||||
#run_simple_dataug(inner_it=n_inner_iter, epochs=epochs)
|
||||
log= run_dist_dataugV2(model=aug_model, epochs=epochs, inner_it=n_inner_iter, dataug_epoch_start=dataug_epoch_start, print_freq=10, loss_patience=10)
|
||||
|
||||
####
|
||||
plot_res(log, fig_name="res/{}-{} epochs (dataug:{})- {} in_it".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter))
|
||||
print('-'*9)
|
||||
times = [x["time"] for x in log]
|
||||
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times)), "Device": device_name, "Param_names": aug_model.TF_names(), "Log": log}
|
||||
print(str(aug_model),": acc", out["Accuracy"], "in (ms):", out["Time"][0], "+/-", out["Time"][1])
|
||||
with open("res/log/%s.json" % "{}-{} epochs (dataug:{})- {} in_it".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter), "w+") as f:
|
||||
json.dump(out, f, indent=True)
|
||||
print('Log :\"',f.name, '\" saved !')
|
||||
print('-'*9)
|
||||
#'''
|
||||
|
||||
#### Comparison ####
|
||||
'''
|
||||
files=[
|
||||
#"res/log/LeNet-100 epochs.json",
|
||||
#"res/log/Aug_mod(Data_augV4(Uniform-4 TF)-LeNet)-100 epochs (dataug:0)- 0 in_it.json",
|
||||
#"res/log/Aug_mod(Data_augV4(Uniform-4 TF)-LeNet)-100 epochs (dataug:50)- 0 in_it.json",
|
||||
#"res/log/Aug_mod(Data_augV4(Uniform-3 TF)-LeNet)-100 epochs (dataug:0)- 0 in_it.json",
|
||||
#"res/log/Aug_mod(Data_augV3(Uniform-3 TF)-LeNet)-100 epochs (dataug:50)- 10 in_it.json",
|
||||
#"res/log/Aug_mod(Data_augV4(Mix 0,5-3 TF)-LeNet)-100 epochs (dataug:0)- 1 in_it.json",
|
||||
#"res/log/Aug_mod(Data_augV4(Mix 0.5-3 TF)-LeNet)-100 epochs (dataug:50)- 10 in_it.json",
|
||||
#"res/log/Aug_mod(Data_augV4(Uniform-3 TF)-LeNet)-100 epochs (dataug:0)- 10 in_it.json",
|
||||
"res/log/Aug_mod(Data_augV4(Uniform-10 TF)-LeNet)-100 epochs (dataug:50)- 10 in_it.json",
|
||||
"res/log/Aug_mod(Data_augV4(Uniform-10 TF)-LeNet)-100 epochs (dataug:50)- 0 in_it.json",
|
||||
]
|
||||
plot_compare(filenames=files, fig_name="res/compare")
|
||||
'''
|
150
higher/test_lr.py
Normal file
|
@ -0,0 +1,150 @@
|
|||
import numpy as np
|
||||
import json, math, time, os
|
||||
|
||||
from torch.utils.data import SubsetRandomSampler
|
||||
import torch.optim as optim
|
||||
import higher
|
||||
from model import *
|
||||
|
||||
import copy
|
||||
|
||||
BATCH_SIZE = 300
|
||||
TEST_SIZE = 300
|
||||
|
||||
mnist_train = torchvision.datasets.MNIST(
|
||||
"./data", train=True, download=True,
|
||||
transform=torchvision.transforms.Compose([
|
||||
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
||||
torchvision.transforms.ToTensor()
|
||||
])
|
||||
)
|
||||
|
||||
mnist_test = torchvision.datasets.MNIST(
|
||||
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
||||
)
|
||||
|
||||
#train_subset_indices=range(int(len(mnist_train)/2))
|
||||
train_subset_indices=range(BATCH_SIZE)
|
||||
val_subset_indices=range(int(len(mnist_train)/2),len(mnist_train))
|
||||
|
||||
dl_train = torch.utils.data.DataLoader(mnist_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
|
||||
dl_val = torch.utils.data.DataLoader(mnist_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
|
||||
dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=TEST_SIZE, shuffle=False)
|
||||
|
||||
|
||||
def test(model):
|
||||
model.eval()
|
||||
for i, (features, labels) in enumerate(dl_test):
|
||||
pred = model.forward(features)
|
||||
return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
|
||||
|
||||
def train_classic(model, optim, epochs=1):
|
||||
model.train()
|
||||
log = []
|
||||
for epoch in range(epochs):
|
||||
t0 = time.process_time()
|
||||
for i, (features, labels) in enumerate(dl_train):
|
||||
|
||||
optim.zero_grad()
|
||||
pred = model.forward(features)
|
||||
loss = F.cross_entropy(pred,labels)
|
||||
loss.backward()
|
||||
optim.step()
|
||||
|
||||
#### Log ####
|
||||
tf = time.process_time()
|
||||
data={
|
||||
"time": tf - t0,
|
||||
}
|
||||
log.append(data)
|
||||
|
||||
times = [x["time"] for x in log]
|
||||
print("Vanilla : acc", test(model), "in (ms):", np.mean(times), "+/-", np.std(times))
|
||||
##########################################
|
||||
if __name__ == "__main__":
|
||||
|
||||
device = torch.device('cpu')
|
||||
|
||||
model = LeNet(1,10)
|
||||
opt_param = {
|
||||
"lr": torch.tensor(1e-2).requires_grad_(),
|
||||
"momentum": torch.tensor(0.9).requires_grad_()
|
||||
}
|
||||
n_inner_iter = 1
|
||||
dl_train_it = iter(dl_train)
|
||||
dl_val_it = iter(dl_val)
|
||||
epoch = 0
|
||||
epochs = 10
|
||||
|
||||
####
|
||||
train_classic(model=model, optim=torch.optim.Adam(model.parameters(), lr=0.001), epochs=epochs)
|
||||
model = LeNet(1,10)
|
||||
|
||||
meta_opt = torch.optim.Adam(opt_param.values(), lr=1e-2)
|
||||
inner_opt = torch.optim.SGD(model.parameters(), lr=opt_param['lr'], momentum=opt_param['momentum'])
|
||||
#for xs_val, ys_val in dl_val:
|
||||
while epoch < epochs:
|
||||
#print(data_aug.params["mag"], data_aug.params["mag"].grad)
|
||||
meta_opt.zero_grad()
|
||||
model.train()
|
||||
with higher.innerloop_ctx(model, inner_opt, copy_initial_weights=True, track_higher_grads=True) as (fmodel, diffopt): #effet copy_initial_weight pas clair...
|
||||
|
||||
for param_group in diffopt.param_groups:
|
||||
param_group['lr'] = opt_param['lr']
|
||||
param_group['momentum'] = opt_param['momentum']
|
||||
|
||||
for i in range(n_inner_iter):
|
||||
try:
|
||||
xs, ys = next(dl_train_it)
|
||||
except StopIteration: #Fin epoch train
|
||||
epoch +=1
|
||||
dl_train_it = iter(dl_train)
|
||||
xs, ys = next(dl_train_it)
|
||||
|
||||
print('Epoch', epoch)
|
||||
print('train loss',loss.item(), '/ val loss', val_loss.item())
|
||||
print('acc', test(model))
|
||||
print('opt : lr', opt_param['lr'].item(), 'momentum', opt_param['momentum'].item())
|
||||
print('-'*9)
|
||||
model.train()
|
||||
|
||||
|
||||
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
|
||||
loss = F.cross_entropy(logits, ys) # no need to call loss.backwards()
|
||||
#print('loss',loss.item())
|
||||
diffopt.step(loss) # note that `step` must take `loss` as an argument!
|
||||
# The line above gets P[t+1] from P[t] and loss[t]. `step` also returns
|
||||
# these new parameters, as an alternative to getting them from
|
||||
# `fmodel.fast_params` or `fmodel.parameters()` after calling
|
||||
# `diffopt.step`.
|
||||
|
||||
# At this point, or at any point in the iteration, you can take the
|
||||
# gradient of `fmodel.parameters()` (or equivalently
|
||||
# `fmodel.fast_params`) w.r.t. `fmodel.parameters(time=0)` (equivalently
|
||||
# `fmodel.init_fast_params`). i.e. `fast_params` will always have
|
||||
# `grad_fn` as an attribute, and be part of the gradient tape.
|
||||
|
||||
# At the end of your inner loop you can obtain these e.g. ...
|
||||
#grad_of_grads = torch.autograd.grad(
|
||||
# meta_loss_fn(fmodel.parameters()), fmodel.parameters(time=0))
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val_it)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
|
||||
val_logits = fmodel(xs_val)
|
||||
val_loss = F.cross_entropy(val_logits, ys_val)
|
||||
#print('val_loss',val_loss.item())
|
||||
|
||||
val_loss.backward()
|
||||
#meta_grads = torch.autograd.grad(val_loss, opt_lr, allow_unused=True)
|
||||
#print(meta_grads)
|
||||
for param_group in diffopt.param_groups:
|
||||
print(param_group['lr'], '/',param_group['lr'].grad)
|
||||
print(param_group['momentum'], '/',param_group['momentum'].grad)
|
||||
|
||||
#model=copy.deepcopy(fmodel)
|
||||
model.load_state_dict(fmodel.state_dict())
|
||||
|
||||
meta_opt.step()
|
205
higher/transformations.py
Normal file
|
@ -0,0 +1,205 @@
|
|||
import torch
|
||||
import kornia
|
||||
import random
|
||||
|
||||
### Available TF for Dataug ###
|
||||
TF_dict={ #f(mag_normalise)=mag_reelle
|
||||
## Geometric TF ##
|
||||
'Identity' : (lambda mag: None),
|
||||
'FlipUD' : (lambda mag: None),
|
||||
'FlipLR' : (lambda mag: None),
|
||||
'Rotate': (lambda mag: random.randint(-int_parameter(mag, maxval=30), int_parameter(mag, maxval=30))),
|
||||
'TranslateX': (lambda mag: [random.randint(-int_parameter(mag, maxval=20), int_parameter(mag, maxval=20)), 0]),
|
||||
'TranslateY': (lambda mag: [0, random.randint(-int_parameter(mag, maxval=20), int_parameter(mag, maxval=20))]),
|
||||
'ShearX': (lambda mag: [random.uniform(-float_parameter(mag, maxval=0.3), float_parameter(mag, maxval=0.3)), 0]),
|
||||
'ShearY': (lambda mag: [0, random.uniform(-float_parameter(mag, maxval=0.3), float_parameter(mag, maxval=0.3))]),
|
||||
|
||||
## Color TF (Expect image in the range of [0, 1]) ##
|
||||
'Contrast': (lambda mag: random.uniform(0.1, float_parameter(mag, maxval=1.9))),
|
||||
'Color':(lambda mag: random.uniform(0.1, float_parameter(mag, maxval=1.9))),
|
||||
'Brightness':(lambda mag: random.uniform(1., float_parameter(mag, maxval=1.9))),
|
||||
'Sharpness':(lambda mag: random.uniform(0.1, float_parameter(mag, maxval=1.9))),
|
||||
'Posterize': (lambda mag: random.randint(4, int_parameter(mag, maxval=8))),
|
||||
'Solarize': (lambda mag: random.randint(1, int_parameter(mag, maxval=256))/256.), #=>Image entre [0,1] #Pas opti pour des batch
|
||||
|
||||
#Non fonctionnel
|
||||
#'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
|
||||
#'Equalize': (lambda mag: None),
|
||||
}
|
||||
|
||||
|
||||
def int_image(float_image): #ATTENTION : legere perte d'info (granularite : 1/256 = 0.0039)
|
||||
return (float_image*255.).type(torch.uint8)
|
||||
|
||||
def float_image(int_image):
|
||||
return int_image.type(torch.float)/255.
|
||||
|
||||
def rand_inverse(value):
|
||||
return value if random.random() < 0.5 else -value
|
||||
|
||||
#https://github.com/tensorflow/models/blob/fc2056bce6ab17eabdc139061fef8f4f2ee763ec/research/autoaugment/augmentation_transforms.py#L137
|
||||
PARAMETER_MAX = 10 # What is the max 'level' a transform could be predicted
|
||||
def float_parameter(level, maxval):
|
||||
"""Helper function to scale `val` between 0 and maxval .
|
||||
Args:
|
||||
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
|
||||
maxval: Maximum value that the operation can have. This will be scaled
|
||||
to level/PARAMETER_MAX.
|
||||
Returns:
|
||||
A float that results from scaling `maxval` according to `level`.
|
||||
"""
|
||||
return float(level) * maxval / PARAMETER_MAX
|
||||
|
||||
def int_parameter(level, maxval):
|
||||
"""Helper function to scale `val` between 0 and maxval .
|
||||
Args:
|
||||
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
|
||||
maxval: Maximum value that the operation can have. This will be scaled
|
||||
to level/PARAMETER_MAX.
|
||||
Returns:
|
||||
An int that results from scaling `maxval` according to `level`.
|
||||
"""
|
||||
return int(level * maxval / PARAMETER_MAX)
|
||||
|
||||
def flipLR(x):
|
||||
device = x.device
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
|
||||
M =torch.tensor( [[[-1., 0., w-1],
|
||||
[ 0., 1., 0.],
|
||||
[ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
|
||||
|
||||
# warp the original image by the found transform
|
||||
return kornia.warp_perspective(x, M, dsize=(h, w))
|
||||
|
||||
def flipUD(x):
|
||||
device = x.device
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
|
||||
M =torch.tensor( [[[ 1., 0., 0.],
|
||||
[ 0., -1., h-1],
|
||||
[ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
|
||||
|
||||
# warp the original image by the found transform
|
||||
return kornia.warp_perspective(x, M, dsize=(h, w))
|
||||
|
||||
def rotate(x, angle):
|
||||
return kornia.rotate(x, angle=angle.type(torch.float32)) #Kornia ne supporte pas les int
|
||||
|
||||
def translate(x, translation):
|
||||
return kornia.translate(x, translation=translation.type(torch.float32)) #Kornia ne supporte pas les int
|
||||
|
||||
def shear(x, shear):
|
||||
return kornia.shear(x, shear=shear)
|
||||
|
||||
def contrast(x, contrast_factor):
|
||||
return kornia.adjust_contrast(x, contrast_factor=contrast_factor) #Expect image in the range of [0, 1]
|
||||
|
||||
#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageEnhance.py
|
||||
def color(x, color_factor):
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
|
||||
gray_x = kornia.rgb_to_grayscale(x)
|
||||
gray_x = gray_x.repeat_interleave(channels, dim=1)
|
||||
return blend(gray_x, x, color_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
|
||||
|
||||
def brightness(x, brightness_factor):
|
||||
device = x.device
|
||||
|
||||
return blend(torch.zeros(x.size(), device=device), x, brightness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
|
||||
|
||||
def sharpeness(x, sharpness_factor):
|
||||
device = x.device
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
|
||||
k = torch.tensor([[[ 1., 1., 1.],
|
||||
[ 1., 5., 1.],
|
||||
[ 1., 1., 1.]]], device=device) #Smooth Filter : https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageFilter.py
|
||||
smooth_x = kornia.filter2D(x, kernel=k, border_type='reflect', normalized=True) #Peut etre necessaire de s'occuper du channel Alhpa differement
|
||||
|
||||
return blend(smooth_x, x, sharpness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
|
||||
|
||||
#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py
|
||||
def posterize(x, bits):
|
||||
x = int_image(x) #Expect image in the range of [0, 1]
|
||||
|
||||
mask = ~(2 ** (8 - bits) - 1).type(torch.uint8)
|
||||
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
mask = mask.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
|
||||
|
||||
return float_image(x & mask)
|
||||
|
||||
def auto_contrast(x): #PAS OPTIMISE POUR DES BATCH #EXTRA LENT
|
||||
# Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
|
||||
print("Warning : Pas encore check !")
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
x = int_image(x) #Expect image in the range of [0, 1]
|
||||
#print('Start',x[0])
|
||||
for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
|
||||
#print(img.shape)
|
||||
for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
|
||||
#print(chan.shape)
|
||||
hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
|
||||
|
||||
# find lowest/highest samples after preprocessing
|
||||
for lo in range(256):
|
||||
if hist[lo]:
|
||||
break
|
||||
for hi in range(255, -1, -1):
|
||||
if hist[hi]:
|
||||
break
|
||||
if hi <= lo:
|
||||
# don't bother
|
||||
pass
|
||||
else:
|
||||
scale = 255.0 / (hi - lo)
|
||||
offset = -lo * scale
|
||||
for ix in range(256):
|
||||
n_ix = int(ix * scale + offset)
|
||||
if n_ix < 0: n_ix = 0
|
||||
elif n_ix > 255: n_ix = 255
|
||||
|
||||
chan[chan==ix]=n_ix
|
||||
x[im_idx, chan_idx]=chan
|
||||
|
||||
#print('End',x[0])
|
||||
return float_image(x)
|
||||
|
||||
def equalize(x): #PAS OPTIMISE POUR DES BATCH
|
||||
raise Exception(self, "not implemented")
|
||||
# Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
x = int_image(x) #Expect image in the range of [0, 1]
|
||||
#print('Start',x[0])
|
||||
for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
|
||||
#print(img.shape)
|
||||
for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
|
||||
#print(chan.shape)
|
||||
hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
|
||||
|
||||
return float_image(x)
|
||||
|
||||
def solarize(x, thresholds): #PAS OPTIMISE POUR DES BATCH
|
||||
# Optimisation : Mask direct sur toute les donnees (Mask = (B,C,H,W)> (B))
|
||||
for idx, t in enumerate(thresholds): #Operation par image
|
||||
mask = x[idx] > t.item()
|
||||
inv_x = 1-x[idx][mask]
|
||||
x[idx][mask]=inv_x
|
||||
return x
|
||||
|
||||
#https://github.com/python-pillow/Pillow/blob/9c78c3f97291bd681bc8637922d6a2fa9415916c/src/PIL/Image.py#L2818
|
||||
def blend(x,y,alpha): #out = image1 * (1.0 - alpha) + image2 * alpha
|
||||
#return kornia.add_weighted(src1=x, alpha=(1-alpha), src2=y, beta=alpha, gamma=0) #out=src1∗alpha+src2∗beta+gamma #Ne fonctionne pas pour des batch de alpha
|
||||
|
||||
if not isinstance(x, torch.Tensor):
|
||||
raise TypeError("x should be a tensor. Got {}".format(type(x)))
|
||||
|
||||
if not isinstance(y, torch.Tensor):
|
||||
raise TypeError("y should be a tensor. Got {}".format(type(y)))
|
||||
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
alpha = alpha.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
|
||||
res = x*(1-alpha) + y*alpha
|
||||
|
||||
return res
|
184
higher/utils.py
Normal file
|
@ -0,0 +1,184 @@
|
|||
import numpy as np
|
||||
import json, math, time, os
|
||||
import matplotlib.pyplot as plt
|
||||
import copy
|
||||
import gc
|
||||
|
||||
from torchviz import make_dot
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
def print_graph(PyTorch_obj, fig_name='graph'):
|
||||
graph=make_dot(PyTorch_obj) #Loss give the whole graph
|
||||
graph.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
|
||||
graph.render(fig_name)
|
||||
|
||||
def plot_res(log, fig_name='res'):
|
||||
|
||||
epochs = [x["epoch"] for x in log]
|
||||
|
||||
fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
|
||||
|
||||
ax[0].set_title('Loss')
|
||||
ax[0].plot(epochs,[x["train_loss"] for x in log], label='Train')
|
||||
ax[0].plot(epochs,[x["val_loss"] for x in log], label='Val')
|
||||
ax[0].legend()
|
||||
|
||||
ax[1].set_title('Acc')
|
||||
ax[1].plot(epochs,[x["acc"] for x in log])
|
||||
|
||||
if log[0]["param"]!= None:
|
||||
if isinstance(log[0]["param"],float):
|
||||
ax[2].set_title('Mag')
|
||||
ax[2].plot(epochs,[x["param"] for x in log], label='Mag')
|
||||
ax[2].legend()
|
||||
else :
|
||||
ax[2].set_title('Prob')
|
||||
for idx, _ in enumerate(log[0]["param"]):
|
||||
ax[2].plot(epochs,[x["param"][idx] for x in log], label='P'+str(idx))
|
||||
ax[2].legend()
|
||||
#ax[2].legend(('P-0', 'P-45', 'P-180'))
|
||||
|
||||
fig_name = fig_name.replace('.',',')
|
||||
plt.savefig(fig_name)
|
||||
|
||||
def plot_compare(filenames, fig_name='res'):
|
||||
|
||||
all_data=[]
|
||||
legend=""
|
||||
for idx, file in enumerate(filenames):
|
||||
legend+=str(idx)+'-'+file+'\n'
|
||||
with open(file) as json_file:
|
||||
data = json.load(json_file)
|
||||
all_data.append(data)
|
||||
|
||||
fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
|
||||
|
||||
for data_idx, log in enumerate(all_data):
|
||||
log=log['Log']
|
||||
epochs = [x["epoch"] for x in log]
|
||||
|
||||
ax[0].plot(epochs,[x["train_loss"] for x in log], label=str(data_idx)+'-Train')
|
||||
ax[0].plot(epochs,[x["val_loss"] for x in log], label=str(data_idx)+'-Val')
|
||||
|
||||
ax[1].plot(epochs,[x["acc"] for x in log], label=str(data_idx))
|
||||
#ax[1].text(x=0.5,y=0,s=str(data_idx)+'-'+filenames[data_idx], transform=ax[1].transAxes)
|
||||
|
||||
if log[0]["param"]!= None:
|
||||
if isinstance(log[0]["param"],float):
|
||||
ax[2].plot(epochs,[x["param"] for x in log], label=str(data_idx)+'-Mag')
|
||||
|
||||
else :
|
||||
for idx, _ in enumerate(log[0]["param"]):
|
||||
ax[2].plot(epochs,[x["param"][idx] for x in log], label=str(data_idx)+'-P'+str(idx))
|
||||
|
||||
fig.suptitle(legend)
|
||||
ax[0].set_title('Loss')
|
||||
ax[1].set_title('Acc')
|
||||
ax[2].set_title('Param')
|
||||
for a in ax: a.legend()
|
||||
fig_name = fig_name.replace('.',',')
|
||||
|
||||
plt.savefig(fig_name, bbox_inches='tight')
|
||||
|
||||
def viz_sample_data(imgs, labels, fig_name='data_sample'):
|
||||
|
||||
sample = imgs[0:25,].permute(0, 2, 3, 1).squeeze().cpu()
|
||||
|
||||
plt.figure(figsize=(10,10))
|
||||
for i in range(25):
|
||||
plt.subplot(5,5,i+1)
|
||||
plt.xticks([])
|
||||
plt.yticks([])
|
||||
plt.grid(False)
|
||||
plt.imshow(sample[i,], cmap=plt.cm.binary)
|
||||
plt.xlabel(labels[i].item())
|
||||
|
||||
plt.savefig(fig_name)
|
||||
|
||||
def model_copy(src,dst, patch_copy=True, copy_grad=True):
|
||||
#model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats)
|
||||
|
||||
dst.load_state_dict(src.state_dict()) #Do not copy gradient !
|
||||
|
||||
if patch_copy:
|
||||
dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ?
|
||||
dst['data_aug'].load_state_dict(src['data_aug'].state_dict())
|
||||
|
||||
#Copie des gradients
|
||||
if copy_grad:
|
||||
for paramName, paramValue, in src.named_parameters():
|
||||
for netCopyName, netCopyValue, in dst.named_parameters():
|
||||
if paramName == netCopyName:
|
||||
netCopyValue.grad = paramValue.grad
|
||||
#netCopyValue=copy.deepcopy(paramValue)
|
||||
|
||||
try: #Data_augV4
|
||||
dst['data_aug']._input_info = src['data_aug']._input_info
|
||||
dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix
|
||||
except:
|
||||
pass
|
||||
|
||||
def optim_copy(dopt, opt):
|
||||
|
||||
#inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state...
|
||||
#opt_param=higher.optim.get_trainable_opt_params(diffopt)
|
||||
|
||||
for group_idx, group in enumerate(opt.param_groups):
|
||||
# print('gp idx',group_idx)
|
||||
for p_idx, p in enumerate(group['params']):
|
||||
opt.state[p]=dopt.state[group_idx][p_idx]
|
||||
|
||||
def print_torch_mem(add_info=''):
|
||||
|
||||
nb=0
|
||||
max_size=0
|
||||
for obj in gc.get_objects():
|
||||
#print(type(obj))
|
||||
try:
|
||||
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
|
||||
#print(i, type(obj), obj.size())
|
||||
size = np.sum(obj.size())
|
||||
if(size>max_size): max_size=size
|
||||
nb+=1
|
||||
except:
|
||||
pass
|
||||
print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
|
||||
|
||||
#print(add_info, "-Garbage size :",len(gc.garbage))
|
||||
|
||||
class loss_monitor(): #Voir https://github.com/pytorch/ignite
|
||||
def __init__(self, patience, end_train=1):
|
||||
self.patience = patience
|
||||
self.end_train = end_train
|
||||
self.counter = 0
|
||||
self.best_score = None
|
||||
self.reached_limit = 0
|
||||
|
||||
def register(self, loss):
|
||||
if self.best_score is None:
|
||||
self.best_score = loss
|
||||
elif loss > self.best_score:
|
||||
self.counter += 1
|
||||
#if not self.reached_limit:
|
||||
print("loss no improve counter", self.counter, self.reached_limit)
|
||||
else:
|
||||
self.best_score = loss
|
||||
self.counter = 0
|
||||
def limit_reached(self):
|
||||
if self.counter >= self.patience:
|
||||
self.counter = 0
|
||||
self.reached_limit +=1
|
||||
self.best_score = None
|
||||
return self.reached_limit
|
||||
|
||||
def end_training(self):
|
||||
if self.limit_reached() >= self.end_train:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def reset(self):
|
||||
self.__init__(self.patience, self.end_train)
|