Remove Old folder

This commit is contained in:
AntoineH 2024-08-20 11:55:02 +02:00
parent 18be4d85ca
commit 431252992c
38 changed files with 0 additions and 7821 deletions

View file

@ -1,456 +0,0 @@
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Transforms used in the Augmentation Policies."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import inspect
import random
import numpy as np
# pylint:disable=g-multiple-import
from PIL import ImageOps, ImageEnhance, ImageFilter, Image
# pylint:enable=g-multiple-import
IMAGE_SIZE = 28
# What is the dataset mean and std of the images on the training set
MEANS = [0.49139968, 0.48215841, 0.44653091]
STDS = [0.24703223, 0.24348513, 0.26158784]
PARAMETER_MAX = 10 # What is the max 'level' a transform could be predicted
def random_flip(x):
"""Flip the input x horizontally with 50% probability."""
if np.random.rand(1)[0] > 0.5:
return np.fliplr(x)
return x
def zero_pad_and_crop(img, amount=4):
"""Zero pad by `amount` zero pixels on each side then take a random crop.
Args:
img: numpy image that will be zero padded and cropped.
amount: amount of zeros to pad `img` with horizontally and verically.
Returns:
The cropped zero padded img. The returned numpy array will be of the same
shape as `img`.
"""
padded_img = np.zeros((img.shape[0] + amount * 2, img.shape[1] + amount * 2,
img.shape[2]))
padded_img[amount:img.shape[0] + amount, amount:
img.shape[1] + amount, :] = img
top = np.random.randint(low=0, high=2 * amount)
left = np.random.randint(low=0, high=2 * amount)
new_img = padded_img[top:top + img.shape[0], left:left + img.shape[1], :]
return new_img
def create_cutout_mask(img_height, img_width, num_channels, size):
"""Creates a zero mask used for cutout of shape `img_height` x `img_width`.
Args:
img_height: Height of image cutout mask will be applied to.
img_width: Width of image cutout mask will be applied to.
num_channels: Number of channels in the image.
size: Size of the zeros mask.
Returns:
A mask of shape `img_height` x `img_width` with all ones except for a
square of zeros of shape `size` x `size`. This mask is meant to be
elementwise multiplied with the original image. Additionally returns
the `upper_coord` and `lower_coord` which specify where the cutout mask
will be applied.
"""
assert img_height == img_width
# Sample center where cutout mask will be applied
height_loc = np.random.randint(low=0, high=img_height)
width_loc = np.random.randint(low=0, high=img_width)
# Determine upper right and lower left corners of patch
upper_coord = (max(0, height_loc - size // 2), max(0, width_loc - size // 2))
lower_coord = (min(img_height, height_loc + size // 2),
min(img_width, width_loc + size // 2))
mask_height = lower_coord[0] - upper_coord[0]
mask_width = lower_coord[1] - upper_coord[1]
assert mask_height > 0
assert mask_width > 0
mask = np.ones((img_height, img_width, num_channels))
zeros = np.zeros((mask_height, mask_width, num_channels))
mask[upper_coord[0]:lower_coord[0], upper_coord[1]:lower_coord[1], :] = (
zeros)
return mask, upper_coord, lower_coord
def cutout_numpy(img, size=16):
"""Apply cutout with mask of shape `size` x `size` to `img`.
The cutout operation is from the paper https://arxiv.org/abs/1708.04552.
This operation applies a `size`x`size` mask of zeros to a random location
within `img`.
Args:
img: Numpy image that cutout will be applied to.
size: Height/width of the cutout mask that will be
Returns:
A numpy tensor that is the result of applying the cutout mask to `img`.
"""
img_height, img_width, num_channels = (img.shape[0], img.shape[1],
img.shape[2])
assert len(img.shape) == 3
mask, _, _ = create_cutout_mask(img_height, img_width, num_channels, size)
return img * mask
def float_parameter(level, maxval):
"""Helper function to scale `val` between 0 and maxval .
Args:
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
maxval: Maximum value that the operation can have. This will be scaled
to level/PARAMETER_MAX.
Returns:
A float that results from scaling `maxval` according to `level`.
"""
return float(level) * maxval / PARAMETER_MAX
def int_parameter(level, maxval):
"""Helper function to scale `val` between 0 and maxval .
Args:
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
maxval: Maximum value that the operation can have. This will be scaled
to level/PARAMETER_MAX.
Returns:
An int that results from scaling `maxval` according to `level`.
"""
return int(level * maxval / PARAMETER_MAX)
def pil_wrap(img):
"""Convert the `img` numpy tensor to a PIL Image."""
return Image.fromarray(
np.uint8((img * STDS + MEANS) * 255.0)).convert('RGBA')
def pil_unwrap(pil_img):
"""Converts the PIL img to a numpy array."""
pic_array = (np.array(pil_img.getdata()).reshape((IMAGE_SIZE, IMAGE_SIZE, 4)) / 255.0)
i1, i2 = np.where(pic_array[:, :, 3] == 0)
pic_array = (pic_array[:, :, :3] - MEANS) / STDS
pic_array[i1, i2] = [0, 0, 0]
return pic_array
def apply_policy(policy, img):
"""Apply the `policy` to the numpy `img`.
Args:
policy: A list of tuples with the form (name, probability, level) where
`name` is the name of the augmentation operation to apply, `probability`
is the probability of applying the operation and `level` is what strength
the operation to apply.
img: Numpy image that will have `policy` applied to it.
Returns:
The result of applying `policy` to `img`.
"""
#print('img shape :',img.shape)
#print('Policy len :',len(policy))
pil_img = pil_wrap(img)
for xform in policy:
#print('xform :', len(xform))
assert len(xform) == 3
name, probability, level = xform
#xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability, level)
xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability.eval(), level)
pil_img = xform_fn(pil_img)
return pil_unwrap(pil_img)
class TransformFunction(object):
"""Wraps the Transform function for pretty printing options."""
def __init__(self, func, name):
self.f = func
self.name = name
def __repr__(self):
return '<' + self.name + '>'
def __call__(self, pil_img):
return self.f(pil_img)
class TransformT(object):
"""Each instance of this class represents a specific transform."""
def __init__(self, name, xform_fn):
self.name = name
self.xform = xform_fn
def pil_transformer(self, probability, level):
def return_function(im):
if random.random() < probability:
im = self.xform(im, level)
return im
name = self.name + '({:.1f},{})'.format(probability, level)
return TransformFunction(return_function, name)
def do_transform(self, image, level):
f = self.pil_transformer(PARAMETER_MAX, level)
return pil_unwrap(f(pil_wrap(image)))
################## Transform Functions ##################
identity = TransformT('identity', lambda pil_img, level: pil_img)
flip_lr = TransformT(
'FlipLR',
lambda pil_img, level: pil_img.transpose(Image.FLIP_LEFT_RIGHT))
flip_ud = TransformT(
'FlipUD',
lambda pil_img, level: pil_img.transpose(Image.FLIP_TOP_BOTTOM))
# pylint:disable=g-long-lambda
auto_contrast = TransformT(
'AutoContrast',
lambda pil_img, level: ImageOps.autocontrast(
pil_img.convert('RGB')).convert('RGBA'))
equalize = TransformT(
'Equalize',
lambda pil_img, level: ImageOps.equalize(
pil_img.convert('RGB')).convert('RGBA'))
invert = TransformT(
'Invert',
lambda pil_img, level: ImageOps.invert(
pil_img.convert('RGB')).convert('RGBA'))
# pylint:enable=g-long-lambda
blur = TransformT(
'Blur', lambda pil_img, level: pil_img.filter(ImageFilter.BLUR))
smooth = TransformT(
'Smooth',
lambda pil_img, level: pil_img.filter(ImageFilter.SMOOTH))
def _rotate_impl(pil_img, level):
"""Rotates `pil_img` from -30 to 30 degrees depending on `level`."""
degrees = int_parameter(level, 30)
if random.random() > 0.5:
degrees = -degrees
return pil_img.rotate(degrees)
rotate = TransformT('Rotate', _rotate_impl)
def _posterize_impl(pil_img, level):
"""Applies PIL Posterize to `pil_img`."""
level = int_parameter(level, 4)
return ImageOps.posterize(pil_img.convert('RGB'), 4 - level).convert('RGBA')
posterize = TransformT('Posterize', _posterize_impl)
def _shear_x_impl(pil_img, level):
"""Applies PIL ShearX to `pil_img`.
The ShearX operation shears the image along the horizontal axis with `level`
magnitude.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had ShearX applied to it.
"""
level = float_parameter(level, 0.3)
if random.random() > 0.5:
level = -level
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, level, 0, 0, 1, 0))
shear_x = TransformT('ShearX', _shear_x_impl)
def _shear_y_impl(pil_img, level):
"""Applies PIL ShearY to `pil_img`.
The ShearY operation shears the image along the vertical axis with `level`
magnitude.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had ShearX applied to it.
"""
level = float_parameter(level, 0.3)
if random.random() > 0.5:
level = -level
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, level, 1, 0))
shear_y = TransformT('ShearY', _shear_y_impl)
def _translate_x_impl(pil_img, level):
"""Applies PIL TranslateX to `pil_img`.
Translate the image in the horizontal direction by `level`
number of pixels.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had TranslateX applied to it.
"""
level = int_parameter(level, 10)
if random.random() > 0.5:
level = -level
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, level, 0, 1, 0))
translate_x = TransformT('TranslateX', _translate_x_impl)
def _translate_y_impl(pil_img, level):
"""Applies PIL TranslateY to `pil_img`.
Translate the image in the vertical direction by `level`
number of pixels.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had TranslateY applied to it.
"""
level = int_parameter(level, 10)
if random.random() > 0.5:
level = -level
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, 0, 1, level))
translate_y = TransformT('TranslateY', _translate_y_impl)
def _crop_impl(pil_img, level, interpolation=Image.BILINEAR):
"""Applies a crop to `pil_img` with the size depending on the `level`."""
cropped = pil_img.crop((level, level, IMAGE_SIZE - level, IMAGE_SIZE - level))
resized = cropped.resize((IMAGE_SIZE, IMAGE_SIZE), interpolation)
return resized
crop_bilinear = TransformT('CropBilinear', _crop_impl)
def _solarize_impl(pil_img, level):
"""Applies PIL Solarize to `pil_img`.
Translate the image in the vertical direction by `level`
number of pixels.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had Solarize applied to it.
"""
level = int_parameter(level, 256)
return ImageOps.solarize(pil_img.convert('RGB'), 256 - level).convert('RGBA')
solarize = TransformT('Solarize', _solarize_impl)
def _cutout_pil_impl(pil_img, level):
"""Apply cutout to pil_img at the specified level."""
size = int_parameter(level, 20)
if size <= 0:
return pil_img
img_height, img_width, num_channels = (IMAGE_SIZE, IMAGE_SIZE, 3)
_, upper_coord, lower_coord = (
create_cutout_mask(img_height, img_width, num_channels, size))
pixels = pil_img.load() # create the pixel map
for i in range(upper_coord[0], lower_coord[0]): # for every col:
for j in range(upper_coord[1], lower_coord[1]): # For every row
pixels[i, j] = (125, 122, 113, 0) # set the colour accordingly
return pil_img
cutout = TransformT('Cutout', _cutout_pil_impl)
def _enhancer_impl(enhancer):
"""Sets level to be between 0.1 and 1.8 for ImageEnhance transforms of PIL."""
def impl(pil_img, level):
v = float_parameter(level, 1.8) + .1 # going to 0 just destroys it
return enhancer(pil_img).enhance(v)
return impl
color = TransformT('Color', _enhancer_impl(ImageEnhance.Color))
contrast = TransformT('Contrast', _enhancer_impl(ImageEnhance.Contrast))
brightness = TransformT('Brightness', _enhancer_impl(
ImageEnhance.Brightness))
sharpness = TransformT('Sharpness', _enhancer_impl(ImageEnhance.Sharpness))
ALL_TRANSFORMS = [
flip_lr,
flip_ud,
auto_contrast,
equalize,
invert,
rotate,
posterize,
crop_bilinear,
solarize,
color,
contrast,
brightness,
sharpness,
shear_x,
shear_y,
translate_x,
translate_y,
cutout,
blur,
smooth
]
NAME_TO_TRANSFORM = {t.name: t for t in ALL_TRANSFORMS}
TRANSFORM_NAMES = NAME_TO_TRANSFORM.keys()

View file

@ -1,131 +0,0 @@
import matplotlib.pyplot as plt
from far_ho.examples.datasets import Datasets, Dataset
import os
import numpy as np
import tensorflow as tf
import augmentation_transforms as augmentation_transforms ##### ATTENTION FICHIER EN DOUBLE => A REGLER MIEUX ####
def viz_data(dataset, fig_name='data_sample',aug_policy=None):
plt.figure(figsize=(10,10))
for i in range(25):
plt.subplot(5,5,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
img = dataset.data[i][:,:,0]
if aug_policy :
img = augment_img(img,aug_policy)
#print('im shape',img.shape)
plt.imshow(img, cmap=plt.cm.binary)
plt.xlabel(np.nonzero(dataset.target[i])[0].item())
plt.savefig(fig_name)
def augment_img(data, policy):
#print('Im shape',data.shape)
data = np.stack((data,)*3, axis=-1) #BOF BOF juste pour forcer 3 channels
#print('Im shape',data.shape)
final_img = augmentation_transforms.apply_policy(policy, data)
#final_img = augmentation_transforms.random_flip(augmentation_transforms.zero_pad_and_crop(final_img, 4))
# Apply cutout
#final_img = augmentation_transforms.cutout_numpy(final_img)
im_rgb = np.array(final_img, np.float32)
im_gray = np.dot(im_rgb[...,:3], [0.2989, 0.5870, 0.1140]) #Just pour retourner a 1 channel
return im_gray
### https://www.kaggle.com/raoulma/mnist-image-class-tensorflow-cnn-99-51-test-acc#5.-Build-the-neural-network-with-tensorflow-
## build the neural network class
# weight initialization
def weight_variable(shape, name = None):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial, name = name)
# bias initialization
def bias_variable(shape, name = None):
initial = tf.constant(0.1, shape=shape) # positive bias
return tf.Variable(initial, name = name)
# 2D convolution
def conv2d(x, W, name = None):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name = name)
# max pooling
def max_pool_2x2(x, name = None):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
padding='SAME', name = name)
def cnn(x_data_tf,y_data_tf, name='model'):
# tunable hyperparameters for nn architecture
s_f_conv1 = 3; # filter size of first convolution layer (default = 3)
n_f_conv1 = 36; # number of features of first convolution layer (default = 36)
s_f_conv2 = 3; # filter size of second convolution layer (default = 3)
n_f_conv2 = 36; # number of features of second convolution layer (default = 36)
s_f_conv3 = 3; # filter size of third convolution layer (default = 3)
n_f_conv3 = 36; # number of features of third convolution layer (default = 36)
n_n_fc1 = 576; # number of neurons of first fully connected layer (default = 576)
# 1.layer: convolution + max pooling
W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, 1, n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
h_conv1_tf = tf.nn.relu(conv2d(x_data_tf,
W_conv1_tf) + b_conv1_tf,
name = 'h_conv1_tf') # (.,28,28,32)
h_pool1_tf = max_pool_2x2(h_conv1_tf,
name = 'h_pool1_tf') # (.,14,14,32)
# 2.layer: convolution + max pooling
W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2,
n_f_conv1, n_f_conv2],
name = 'W_conv2_tf')
b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf,
W_conv2_tf) + b_conv2_tf,
name ='h_conv2_tf') #(.,14,14,32)
h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
# 3.layer: convolution + max pooling
W_conv3_tf = weight_variable([s_f_conv3, s_f_conv3,
n_f_conv2, n_f_conv3],
name = 'W_conv3_tf')
b_conv3_tf = bias_variable([n_f_conv3], name = 'b_conv3_tf')
h_conv3_tf = tf.nn.relu(conv2d(h_pool2_tf,
W_conv3_tf) + b_conv3_tf,
name = 'h_conv3_tf') #(.,7,7,32)
h_pool3_tf = max_pool_2x2(h_conv3_tf,
name = 'h_pool3_tf') # (.,4,4,32)
# 4.layer: fully connected
W_fc1_tf = weight_variable([4*4*n_f_conv3,n_n_fc1],
name = 'W_fc1_tf') # (4*4*32, 1024)
b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
h_pool3_flat_tf = tf.reshape(h_pool3_tf, [-1,4*4*n_f_conv3],
name = 'h_pool3_flat_tf') # (.,1024)
h_fc1_tf = tf.nn.relu(tf.matmul(h_pool3_flat_tf,
W_fc1_tf) + b_fc1_tf,
name = 'h_fc1_tf') # (.,1024)
# add dropout
#keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
#h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
# 5.layer: fully connected
W_fc2_tf = weight_variable([n_n_fc1, 10], name = 'W_fc2_tf')
b_fc2_tf = bias_variable([10], name = 'b_fc2_tf')
z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf),
b_fc2_tf, name = 'z_pred_tf')# => (.,10)
# predicted probabilities in one-hot encoding
y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf')
# tensor of correct predictions
y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
tf.argmax(y_data_tf, 1),
name = 'y_pred_correct_tf')
return y_pred_proba_tf

View file

@ -1,166 +0,0 @@
#https://github.com/arcelien/pba/blob/master/autoaugment/train_cifar.py
from __future__ import absolute_import, print_function, division
import os
import numpy as np
import tensorflow as tf
#import tensorflow.contrib.layers as layers
import far_ho as far
import far_ho.examples as far_ex
#import pprint
import autoaugment.augmentation_transforms as augmentation_transforms
#import autoaugment.policies as found_policies
from autoaugment.wrn import build_wrn_model
def build_model(inputs, num_classes, is_training, hparams):
"""Constructs the vision model being trained/evaled.
Args:
inputs: input features/images being fed to the image model build built.
num_classes: number of output classes being predicted.
is_training: is the model training or not.
hparams: additional hyperparameters associated with the image model.
Returns:
The logits of the image model.
"""
scopes = setup_arg_scopes(is_training)
with contextlib.nested(*scopes):
if hparams.model_name == 'pyramid_net':
logits = build_shake_drop_model(
inputs, num_classes, is_training)
elif hparams.model_name == 'wrn':
logits = build_wrn_model(
inputs, num_classes, hparams.wrn_size)
elif hparams.model_name == 'shake_shake':
logits = build_shake_shake_model(
inputs, num_classes, hparams, is_training)
return logits
class CifarModel(object):
"""Builds an image model for Cifar10/Cifar100."""
def __init__(self, hparams):
self.hparams = hparams
def build(self, mode):
"""Construct the cifar model."""
assert mode in ['train', 'eval']
self.mode = mode
self._setup_misc(mode)
self._setup_images_and_labels()
self._build_graph(self.images, self.labels, mode)
self.init = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
def _setup_misc(self, mode):
"""Sets up miscellaneous in the cifar model constructor."""
self.lr_rate_ph = tf.Variable(0.0, name='lrn_rate', trainable=False)
self.reuse = None if (mode == 'train') else True
self.batch_size = self.hparams.batch_size
if mode == 'eval':
self.batch_size = 25
def _setup_images_and_labels(self):
"""Sets up image and label placeholders for the cifar model."""
if FLAGS.dataset == 'cifar10':
self.num_classes = 10
else:
self.num_classes = 100
self.images = tf.placeholder(tf.float32, [self.batch_size, 32, 32, 3])
self.labels = tf.placeholder(tf.float32,
[self.batch_size, self.num_classes])
def assign_epoch(self, session, epoch_value):
session.run(self._epoch_update, feed_dict={self._new_epoch: epoch_value})
def _build_graph(self, images, labels, mode):
"""Constructs the TF graph for the cifar model.
Args:
images: A 4-D image Tensor
labels: A 2-D labels Tensor.
mode: string indicating training mode ( e.g., 'train', 'valid', 'test').
"""
is_training = 'train' in mode
if is_training:
self.global_step = tf.train.get_or_create_global_step()
logits = build_model(
images,
self.num_classes,
is_training,
self.hparams)
self.predictions, self.cost = helper_utils.setup_loss(
logits, labels)
self.accuracy, self.eval_op = tf.metrics.accuracy(
tf.argmax(labels, 1), tf.argmax(self.predictions, 1))
self._calc_num_trainable_params()
# Adds L2 weight decay to the cost
self.cost = helper_utils.decay_weights(self.cost,
self.hparams.weight_decay_rate)
#### Attention: differe implem originale
self.init = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
########################################################
######## PBA ############
#Parallele Cifar model trainer
tf.flags.DEFINE_string('model_name', 'wrn',
'wrn, shake_shake_32, shake_shake_96, shake_shake_112, '
'pyramid_net')
tf.flags.DEFINE_string('checkpoint_dir', '/tmp/training', 'Training Directory.')
tf.flags.DEFINE_string('data_path', '/tmp/data',
'Directory where dataset is located.')
tf.flags.DEFINE_string('dataset', 'cifar10',
'Dataset to train with. Either cifar10 or cifar100')
tf.flags.DEFINE_integer('use_cpu', 1, '1 if use CPU, else GPU.')
## ???
FLAGS = tf.flags.FLAGS
FLAGS.dataset
FLAGS.data_path
FLAGS.model_name = 'wrn'
hparams = tf.contrib.training.HParams(
train_size=50000,
validation_size=0,
eval_test=1,
dataset=FLAGS.dataset,
data_path=FLAGS.data_path,
batch_size=128,
gradient_clipping_by_global_norm=5.0)
if FLAGS.model_name == 'wrn':
hparams.add_hparam('model_name', 'wrn')
hparams.add_hparam('num_epochs', 200)
hparams.add_hparam('wrn_size', 160)
hparams.add_hparam('lr', 0.1)
hparams.add_hparam('weight_decay_rate', 5e-4)
data_loader = data_utils.DataSet(hparams)
data_loader.reset()
with tf.Graph().as_default(): #, tf.device('/cpu:0' if FLAGS.use_cpu else '/gpu:0'):
"""Builds the image models for train and eval."""
# Determine if we should build the train and eval model. When using
# distributed training we only want to build one or the other and not both.
with tf.variable_scope('model', use_resource=False):
m = CifarModel(self.hparams)
m.build('train')
#self._num_trainable_params = m.num_trainable_params
#self._saver = m.saver
#with tf.variable_scope('model', reuse=True, use_resource=False):
# meval = CifarModel(self.hparams)
# meval.build('eval')
##### FAR-HO ####
for _ in range(n_hyper_iterations):

View file

@ -1,92 +0,0 @@
import os
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as layers
import far_ho as far
import far_ho.examples as far_ex
import matplotlib.pyplot as plt
sess = tf.InteractiveSession()
def get_data():
# load a small portion of mnist data
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=(.1, .1,))
return datasets.train, datasets.validation
def g_logits(x,y):
with tf.variable_scope('model'):
h1 = layers.fully_connected(x, 300)
logits = layers.fully_connected(h1, int(y.shape[1]))
return logits
x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
logits = g_logits(x,y)
train_set, validation_set = get_data()
lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
lr = far.get_hyperparameter('lr', initializer=0.01)
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
L = tf.reduce_mean(tf.sigmoid(lambdas)*ce)
E = tf.reduce_mean(ce)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
inner_optimizer = far.GradientDescentOptimizer(lr)
outer_optimizer = tf.train.AdamOptimizer()
rev_it =10
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
T = 20 # Number of inner iterations
train_set_supplier = train_set.create_supplier(x, y)
validation_set_supplier = validation_set.create_supplier(x, y)
tf.global_variables_initializer().run()
print('inner:', L.eval(train_set_supplier()))
print('outer:', E.eval(validation_set_supplier()))
# print('-'*50)
n_hyper_iterations = 200
inner_losses = []
outer_losses = []
train_accs = []
val_accs = []
for _ in range(n_hyper_iterations):
hyper_step(T,
inner_objective_feed_dicts=train_set_supplier,
outer_objective_feed_dicts=validation_set_supplier)
inner_obj = L.eval(train_set_supplier())
outer_obj = E.eval(validation_set_supplier())
inner_losses.append(inner_obj)
outer_losses.append(outer_obj)
print('inner:', inner_obj)
print('outer:', outer_obj)
train_acc = accuracy.eval(train_set_supplier())
val_acc = accuracy.eval(validation_set_supplier())
train_accs.append(train_acc)
val_accs.append(val_acc)
print('training accuracy', train_acc)
print('validation accuracy', val_acc)
print('learning rate', lr.eval())
print('norm of examples weight', tf.norm(lambdas).eval())
print('-'*50)
plt.subplot(211)
plt.plot(inner_losses, label='training loss')
plt.plot(outer_losses, label='validation loss')
plt.legend(loc=0, frameon=True)
#plt.xlim(0, 19)
plt.subplot(212)
plt.plot(train_accs, label='training accuracy')
plt.plot(val_accs, label='validation accuracy')
plt.legend(loc=0, frameon=True)
plt.savefig('H%d - I%d - R%d'%(n_hyper_iterations,T,rev_it))

View file

@ -1,126 +0,0 @@
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as layers
import far_ho as far
import far_ho.examples as far_ex
tf.logging.set_verbosity(tf.logging.ERROR)
import matplotlib.pyplot as plt
import blue_utils as butil
#Reset
try:
sess.close()
except: pass
rnd = np.random.RandomState(1)
tf.reset_default_graph()
sess = tf.InteractiveSession()
def get_data(data_split):
# load a small portion of mnist data
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False)
print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target)
[print("Nb samples : ", d.num_examples) for d in datasets]
return datasets.train, datasets.validation, datasets.test
#Model
# FC : reshape = True
def g_logits(x,y, name='model'):
with tf.variable_scope(name):
h1 = layers.fully_connected(x, 300)
logits = layers.fully_connected(h1, int(y.shape[1]))
return logits
#### Hyper-parametres ####
n_hyper_iterations = 500
T = 20 # Number of inner iterations
rev_it =10
hp_lr = 1.e-3
##########################
#MNIST
#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
#y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
#logits = g_logits(x, y)
#CNN : reshape = False
x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
logits = butil.cnn(x,y)
train_set, validation_set, test_set = get_data(data_split=(.05, .05,))
butil.viz_data(train_set)
print('Data sampled !')
# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, .1), 1.e-7))
#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5))
#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.))
lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4))
mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9))
rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.00001), 0.00001))
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
L = tf.reduce_mean(ce) + rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles
E = tf.reduce_mean(ce)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
inner_optimizer = far.MomentumOptimizer(lr, mu)
outer_optimizer = tf.train.AdamOptimizer(hp_lr)
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
train_set_supplier = train_set.create_supplier(x, y, batch_size=256) # stochastic GD
validation_set_supplier = validation_set.create_supplier(x, y)
his_params = []
tf.global_variables_initializer().run()
for hyt in range(n_hyper_iterations):
hyper_step(T,
inner_objective_feed_dicts=train_set_supplier,
outer_objective_feed_dicts=validation_set_supplier)
res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()),
E.eval(validation_set_supplier()),
accuracy.eval(train_set_supplier()),
accuracy.eval(validation_set_supplier())]
his_params.append(res)
print('Hyper-it :',hyt,'/',n_hyper_iterations)
print('inner:', L.eval(train_set_supplier()))
print('outer:', E.eval(validation_set_supplier()))
print('training accuracy:', res[5])
print('validation accuracy:', res[6])
#print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
print('-'*50)
test_set_supplier = test_set.create_supplier(x, y)
print('Test accuracy:',accuracy.eval(test_set_supplier()))
fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
ax[0].set_title('Learning rate')
ax[0].plot([e[0] for e in his_params])
ax[1].set_title('Momentum factor')
ax[1].plot([e[1] for e in his_params])
#ax[2].set_title('L2 regulariz.')
#ax[2].plot([e[2] for e in his_params])
ax[2].set_title('Tr. and val. acc')
ax[2].plot([e[5] for e in his_params])
ax[2].plot([e[6] for e in his_params])
ax[3].set_title('Tr. and val. errors')
ax[3].plot([e[3] for e in his_params])
ax[3].plot([e[4] for e in his_params])
plt.savefig('res_cnn_H{}_I{}'.format(n_hyper_iterations,T))

View file

@ -1,141 +0,0 @@
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as layers
import far_ho as far
import far_ho.examples as far_ex
tf.logging.set_verbosity(tf.logging.ERROR)
import matplotlib.pyplot as plt
import blue_utils as butil
#Reset
try:
sess.close()
except: pass
rnd = np.random.RandomState(1)
tf.reset_default_graph()
sess = tf.InteractiveSession()
def get_data(data_split):
# load a small portion of mnist data
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False)
print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target)
[print("Nb samples : ", d.num_examples) for d in datasets]
return datasets.train, datasets.validation, datasets.test
#Model
# FC : reshape = True
def g_logits(x,y, name='model'):
with tf.variable_scope(name):
h1 = layers.fully_connected(x, 300)
logits = layers.fully_connected(h1, int(y.shape[1]))
return logits
#### Hyper-parametres ####
n_hyper_iterations = 10
T = 10 # Number of inner iterations
rev_it =10
hp_lr = 0.02
##########################
#MNIST
#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
#y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
#logits = g_logits(x, y)
#CNN : reshape = False
x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
logits = butil.cnn(x,y)
train_set, validation_set, test_set = get_data(data_split=(.1, .1,))
probX = far.get_hyperparameter('probX', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9))
probY = far.get_hyperparameter('probY', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9))
#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4))
#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9))
#probX, probY = 0.5, 0.5
#policy = [('TranslateX', probX, 8), ('TranslateY', probY, 8)]
policy = [('TranslateX', probX, 8), ('FlipUD', probY, 8)]
print('Hyp :',far.utils.hyperparameters(scope=None))
#butil.viz_data(train_set, aug_policy= policy)
#print('Data sampled !')
#Ajout artificiel des transfo a la loss juste pour qu il soit compter dans la dynamique du graph
probX_loss = tf.sigmoid(probX)
probY_loss = tf.sigmoid(probY)
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
L = tf.reduce_mean(probX_loss*probY_loss*ce)
E = tf.reduce_mean(ce)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
inner_optimizer = far.AdamOptimizer()
outer_optimizer = tf.train.AdamOptimizer(hp_lr)
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
train_set_supplier = train_set.create_supplier(x, y, batch_size=256, aug_policy=policy) # stochastic GD
validation_set_supplier = validation_set.create_supplier(x, y)
#print(train_set.dim_data,validation_set.dim_data)
his_params = []
tf.global_variables_initializer().run()
butil.viz_data(train_set, fig_name= 'Start_sample',aug_policy= policy)
print('Data sampled !')
for hyt in range(n_hyper_iterations):
hyper_step(T,
inner_objective_feed_dicts=train_set_supplier,
outer_objective_feed_dicts=validation_set_supplier,
_skip_hyper_ts=True)
res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()),
E.eval(validation_set_supplier()),
accuracy.eval(train_set_supplier()),
accuracy.eval(validation_set_supplier())]
his_params.append(res)
butil.viz_data(train_set, fig_name= 'Train_sample_{}'.format(hyt),aug_policy= policy)
print('Data sampled !')
print('Hyper-it :',hyt,'/',n_hyper_iterations)
print('inner:', L.eval(train_set_supplier()))
print('outer:', E.eval(validation_set_supplier()))
print('training accuracy:', res[4])
print('validation accuracy:', res[5])
print('Transformation : ProbX -',res[0],'/ProbY -',res[1])
#print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
print('-'*50)
test_set_supplier = test_set.create_supplier(x, y)
print('Test accuracy:',accuracy.eval(test_set_supplier()))
fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
ax[0].set_title('ProbX')
ax[0].plot([e[0] for e in his_params])
ax[1].set_title('ProbY')
ax[1].plot([e[1] for e in his_params])
ax[2].set_title('Tr. and val. errors')
ax[2].plot([e[2] for e in his_params])
ax[2].plot([e[3] for e in his_params])
ax[3].set_title('Tr. and val. acc')
ax[3].plot([e[4] for e in his_params])
ax[3].plot([e[5] for e in his_params])
plt.savefig('res_cnn_aug_H{}_I{}'.format(n_hyper_iterations,T))

View file

@ -1,133 +0,0 @@
#https://github.com/lucfra/FAR-HO/blob/master/far_ho/examples/autoMLDemos/Far-HO%20Demo%2C%20AutoML%202018%2C%20ICML%20workshop.ipynb
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as layers
import far_ho as far
import far_ho.examples as far_ex
tf.logging.set_verbosity(tf.logging.ERROR)
import matplotlib.pyplot as plt
#import blue_utils as butil
#Reset
try:
sess.close()
except: pass
rnd = np.random.RandomState(1)
tf.reset_default_graph()
sess = tf.InteractiveSession()
def get_data(data_split):
# load a small portion of mnist data
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=True)
print("Data shape : ", datasets.train.dim_data, " / Label shape : ", datasets.train.dim_target)
[print("Nb samples : ", d.num_examples) for d in datasets]
return datasets.train, datasets.validation, datasets.test
#Model
# FC : reshape = True
def g_logits(x,y, name='model'):
with tf.variable_scope(name):
h1 = layers.fully_connected(x, 300)
logits = layers.fully_connected(h1, int(y.shape[1]))
return logits
#### Hyper-parametres ####
n_hyper_iterations = 90
T = 20 # Number of inner iterations
rev_it =10
hp_lr = 0.1
epochs =10
batch_size = 256
##########################
#MNIST
x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
logits = g_logits(x, y)
#CNN : reshape = False
#x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
#y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
#logits = butil.cnn(x,y)
train_set, validation_set, test_set = get_data(data_split=(.6, .3,))
#butil.viz_data(train_set)
# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
lr = far.get_hyperparameter('lr', initializer=1e-2, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 1.e-7))
mu = far.get_hyperparameter('mu', initializer=0.95, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5))
#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.))
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
L = tf.reduce_mean(ce) #+ rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles
E = tf.reduce_mean(ce)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
inner_optimizer = far.MomentumOptimizer(lr, mu)
#inner_optimizer = far.GradientDescentOptimizer(lr)
outer_optimizer = tf.train.AdamOptimizer(hp_lr)
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)#, global_step=tf.train.get_or_create_step())
train_set_supplier = train_set.create_supplier(x, y, batch_size=batch_size)#, epochs=1) # stochastic GD
validation_set_supplier = validation_set.create_supplier(x, y)
print('Hyper iterations par epochs',int(train_set.num_examples/batch_size*epochs/T))
his_params = []
tf.global_variables_initializer().run()
for hyt in range(n_hyper_iterations):
hyper_step(T,
inner_objective_feed_dicts=train_set_supplier,
outer_objective_feed_dicts=validation_set_supplier,
_skip_hyper_ts=False)
res = sess.run(far.hyperparameters()) + [0, L.eval(train_set_supplier()),
E.eval(validation_set_supplier()),
accuracy.eval(train_set_supplier()),
accuracy.eval(validation_set_supplier())]
his_params.append(res)
print('Hyper-it :',hyt,'/',n_hyper_iterations)
print('inner:', res[3])
print('outer:', res[4])
print('training accuracy:', res[5])
print('validation accuracy:', res[6])
#print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
print('-'*50)
test_set_supplier = test_set.create_supplier(x, y)
print('Test accuracy:',accuracy.eval(test_set_supplier()))
fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
ax[0].set_title('Learning rate')
ax[0].plot([e[0] for e in his_params])
ax[1].set_title('Momentum factor')
ax[1].plot([e[1] for e in his_params])
#ax[2].set_title('L2 regulariz.')
#ax[2].plot([e[2] for e in his_params])
ax[2].set_title('Tr. and val. acc')
ax[2].plot([e[5] for e in his_params])
ax[2].plot([e[6] for e in his_params])
ax[3].set_title('Tr. and val. errors')
ax[3].plot([e[3] for e in his_params])
ax[3].plot([e[4] for e in his_params])
plt.savefig('resultats/res_fc_H{}_I{}'.format(n_hyper_iterations,T))
#plt.savefig('resultats/res_fc_H{}_I{}_noHyp'.format(n_hyper_iterations,T))

View file

@ -1,5 +0,0 @@
venv/
__pycache__
data/
log/
.vscode/

View file

@ -1,33 +0,0 @@
# Gradient Descent: The Ultimate Optimizer
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black)
| ⚠️ WARNING: THIS IS NOT MY WORK ⚠️ |
| --- |
This repository contains the paper and code to the paper [Gradient Descent:
The Ultimate Optimizer](https://arxiv.org/abs/1909.13371).
I couldn't find the code (which is found in the appendix at the end of the
paper) anywhere on the web. What I present here is the code of the paper with
instructions on how to set it up.
Getting the code in a runnable state required some fixes on my part so the
code might be slightly different than that presented in the paper.
## Set up
```sh
git clone https://github.com/Rainymood/Gradient-Descent-The-Ultimate-Optimizer
cd Gradient-Descent-The-Ultimate-Optimizer
virtualenv -p python3 venv
source venv/bin/activate
pip install -r requirements.txt
python main.py
```
When you are done you can exit the virtualenv with
```shell
deactivate
```

View file

@ -1,244 +0,0 @@
from hyperopt import *
#from hyperopt_v2 import *
import torchvision.transforms.functional as TF
import torchvision.transforms as T
#from scipy import ndimage
import kornia
import random
class MNIST_FullyConnected_Augmented(Optimizable):
"""
A fully-connected NN for the MNIST task. This is Optimizable but not itself
an optimizer.
"""
def __init__(self, num_inp, num_hid, num_out, optimizer, device = torch.device('cuda')):
self.device = device
#print(self.device)
parameters = {
"w1": torch.zeros(num_inp, num_hid, device=self.device).t(),
"b1": torch.zeros(num_hid, device=self.device).t(),
"w2": torch.zeros(num_hid, num_out, device=self.device).t(),
"b2": torch.zeros(num_out, device=self.device).t(),
#Data augmentation
"prob": torch.tensor(0.5, device=self.device),
"mag": torch.tensor(180.0, device=self.device),
}
super().__init__(parameters, optimizer)
def initialize(self):
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
self.optimizer.initialize()
#print(self.device)
def forward(self, x):
"""Compute a prediction."""
#print("Prob:",self.parameters["prob"].item())
if random.random() < self.parameters["prob"]:
#angle = 45
#x = TF.rotate(x, angle)
#print(self.device)
#x = F.linear(x, torch.ones(28*28, 28*28, device=self.device).t()*self.parameters["mag"], bias=None)
x = x + self.parameters["mag"]
x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
x = torch.tanh(x)
x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
x = torch.tanh(x)
x = F.log_softmax(x, dim=1)
return x
def adjust(self):
self.optimizer.adjust(self.parameters)
def __str__(self):
return "mnist_FC_augmented / " + str(self.optimizer)
class LeNet(Optimizable, nn.Module):
def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
nn.Module.__init__(self)
self.device = device
parameters = {
"w1": torch.zeros(20, num_inp, 5, 5, device=self.device),
"b1": torch.zeros(20, device=self.device),
"w2": torch.zeros(50, 20, 5, 5, device=self.device),
"b2": torch.zeros(50, device=self.device),
"w3": torch.zeros(500,4*4*50, device=self.device),
"b3": torch.zeros(500, device=self.device),
"w4": torch.zeros(10, 500, device=self.device),
"b4": torch.zeros(10, device=self.device),
#Data augmentation
"prob": torch.tensor(1.0, device=self.device),
"mag": torch.tensor(180.0, device=self.device),
}
super().__init__(parameters, optimizer)
def initialize(self):
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.parameters["w3"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.parameters["w4"], a=math.sqrt(5))
self.optimizer.initialize()
def forward(self, x):
if random.random() < self.parameters["prob"]:
batch_size = x.shape[0]
# create transformation (rotation)
alpha = self.parameters["mag"] # in degrees
angle = torch.ones(batch_size, device=self.device) * alpha
# define the rotation center
center = torch.ones(batch_size, 2, device=self.device)
center[..., 0] = x.shape[3] / 2 # x
center[..., 1] = x.shape[2] / 2 # y
#print(x.shape, center)
# define the scale factor
scale = torch.ones(batch_size, device=self.device)
# compute the transformation matrix
M = kornia.get_rotation_matrix2d(center, angle, scale)
# apply the transformation to original image
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
#print("Start Shape ", x.shape)
out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = out.view(out.size(0), -1)
#print("Shape ", out.shape)
out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
#print("Shape ", out.shape)
out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
#print("Shape ", out.shape)
return F.log_softmax(out, dim=1)
def adjust(self):
self.optimizer.adjust(self.parameters)
def __str__(self):
return "mnist_CNN_augmented / " + str(self.optimizer)
class LeNet_v2(Optimizable, nn.Module):
def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
nn.Module.__init__(self)
self.device = device
self.conv1 = nn.Conv2d(num_inp, 20, 5, 1)
self.conv2 = nn.Conv2d(20, 50, 5, 1)
#self.fc1 = nn.Linear(4*4*50, 500)
self.fc1 = nn.Linear(1250, 500)
self.fc2 = nn.Linear(500, 10)
#print(self.conv1.weight)
parameters = {
"w1": self.conv1.weight,
"b1": self.conv1.bias,
"w2": self.conv2.weight,
"b2": self.conv2.bias,
"w3": self.fc1.weight,
"b3": self.fc1.bias,
"w4": self.fc2.weight,
"b4": self.fc2.bias,
#Data augmentation
"prob": torch.tensor(0.5, device=self.device),
"mag": torch.tensor(1.0, device=self.device),
}
Optimizable.__init__(self, parameters, optimizer)
'''
def forward(self, x): #Sature la memoire ???
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2, 2)
#x = x.view(-1, 4*4*50)
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
'''
def forward(self, x):
if random.random() < self.parameters["prob"].item():
#print(self.parameters["prob"])
#x = [T.ToTensor()(
# TF.affine(img=T.ToPILImage()(im), angle=self.parameters["mag"], translate=(0,0), scale=1, shear=0, resample=0, fillcolor=None))
# for im in torch.unbind(x,dim=0)]
#x = torch.stack(x,dim=0)
#x = [ndimage.rotate(im, self.parameters["mag"], reshape=False)
# for im in torch.unbind(x,dim=0)]
#x = torch.stack(x,dim=0)
#x = [im + self.parameters["mag"]
# for im in torch.unbind(x,dim=0)]
#x = torch.stack(x,dim=0)
batch_size = x.shape[0]
# create transformation (rotation)
alpha = self.parameters["mag"] * 180 # in degrees
angle = torch.ones(batch_size, device=self.device) * alpha
# define the rotation center
center = torch.ones(batch_size, 2, device=self.device)
center[..., 0] = x.shape[3] / 2 # x
center[..., 1] = x.shape[2] / 2 # y
#print(x.shape, center)
# define the scale factor
scale = torch.ones(batch_size, device=self.device)
# compute the transformation matrix
M = kornia.get_rotation_matrix2d(center, angle, scale)
# apply the transformation to original image
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
#print("Start Shape ", x.shape)
out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = out.view(out.size(0), -1)
#print("Shape ", out.shape)
out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
#print("Shape ", out.shape)
out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
#print("Shape ", out.shape)
return F.log_softmax(out, dim=1)
def initialize(self):
self.optimizer.initialize()
def adjust(self):
self.optimizer.adjust(self.parameters)
def adjust_val(self):
self.optimizer.adjust_val(self.parameters)
def eval(self):
self.parameters['prob']=torch.tensor(0.0, device=self.device)
def __str__(self):
return "mnist_CNN_augmented / " + str(self.optimizer)

View file

@ -1,52 +0,0 @@
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision.transforms.functional as TF
class MNIST_aug(Dataset):
training_file = 'training.pt'
test_file = 'test.pt'
classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four',
'5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
def __init__(self):
self.images = [TF.to_pil_image(x) for x in torch.ByteTensor(10, 3, 48, 48)]
self.set_stage(0) # initial stage
def __getitem__(self, index):
image = self.images[index]
# Just apply your transformations here
image = self.crop(image)
x = TF.to_tensor(image)
return x
def set_stage(self, stage):
if stage == 0:
print('Using (32, 32) crops')
self.crop = transforms.RandomCrop((32, 32))
elif stage == 1:
print('Using (28, 28) crops')
self.crop = transforms.RandomCrop((28, 28))
def __len__(self):
return len(self.images)
dataset = MyData()
loader = DataLoader(dataset,
batch_size=2,
num_workers=2,
shuffle=True)
for batch_idx, data in enumerate(loader):
print('Batch idx {}, data shape {}'.format(
batch_idx, data.shape))
loader.dataset.set_stage(1)
for batch_idx, data in enumerate(loader):
print('Batch idx {}, data shape {}'.format(
batch_idx, data.shape))

View file

@ -1,150 +0,0 @@
#from hyperopt import *
from hyperopt_v2 import *
import torchvision.transforms.functional as TF
import torchvision.transforms as T
#from scipy import ndimage
import kornia
import random
class LeNet_v3(nn.Module):
def __init__(self, num_inp, num_out):
super(LeNet_v3, self).__init__()
self.params = nn.ParameterDict({
'w1': nn.Parameter(torch.zeros(20, num_inp, 5, 5)),
'b1': nn.Parameter(torch.zeros(20)),
'w2': nn.Parameter(torch.zeros(50, 20, 5, 5)),
'b2': nn.Parameter(torch.zeros(50)),
'w3': nn.Parameter(torch.zeros(500,4*4*50)),
'b3': nn.Parameter(torch.zeros(500)),
'w4': nn.Parameter(torch.zeros(10, 500)),
'b4': nn.Parameter(torch.zeros(10))
})
def initialize(self):
nn.init.kaiming_uniform_(self.params["w1"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.params["w2"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.params["w3"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.params["w4"], a=math.sqrt(5))
def forward(self, x):
#print("Start Shape ", x.shape)
out = F.relu(F.conv2d(input=x, weight=self.params["w1"], bias=self.params["b1"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = F.relu(F.conv2d(input=out, weight=self.params["w2"], bias=self.params["b2"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = out.view(out.size(0), -1)
#print("Shape ", out.shape)
out = F.relu(F.linear(out, self.params["w3"], self.params["b3"]))
#print("Shape ", out.shape)
out = F.linear(out, self.params["w4"], self.params["b4"])
#print("Shape ", out.shape)
return F.log_softmax(out, dim=1)
def print_grad_fn(self):
for n, p in self.params.items():
print(n, p.grad_fn)
def __str__(self):
return "mnist_CNN_augmented / "
class Data_aug(nn.Module):
def __init__(self):
super(Data_aug, self).__init__()
self.data_augmentation = True
self.params = nn.ParameterDict({
"prob": nn.Parameter(torch.tensor(0.5)),
"mag": nn.Parameter(torch.tensor(180.0))
})
#self.params["mag"].register_hook(print)
def forward(self, x):
if self.data_augmentation and self.training and random.random() < self.params["prob"]:
#print('Aug')
batch_size = x.shape[0]
# create transformation (rotation)
alpha = self.params["mag"] # in degrees
angle = torch.ones(batch_size, device=x.device) * alpha
# define the rotation center
center = torch.ones(batch_size, 2, device=x.device)
center[..., 0] = x.shape[3] / 2 # x
center[..., 1] = x.shape[2] / 2 # y
#print(x.shape, center)
# define the scale factor
scale = torch.ones(batch_size, device=x.device)
# compute the transformation matrix
M = kornia.get_rotation_matrix2d(center, angle, scale)
# apply the transformation to original image
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
return x
def eval(self):
self.params['prob']=torch.tensor(0.0, device=self.device)
nn.Module.eval(self)
def data_augmentation(self, mode=True):
self.data_augmentation=mode
def print_grad_fn(self):
for n, p in self.params.items():
print(n, p.grad_fn)
def __str__(self):
return "Data_Augmenter / "
class Augmented_model(nn.Module):
def __init__(self, model, data_augmenter):
#self.model = model
#self.data_aug = data_augmenter
super(Augmented_model, self).__init__()#nn.Module.__init__(self)
#super().__init__()
self.mods = nn.ModuleDict({
'data_aug': data_augmenter,
'model': model
})
#for name, param in self.mods.named_parameters():
# print(name, type(param.data), param.size())
#params = self.mods.named_parameters() #self.parameters()
#parameters = [param for param in self.model.parameters()] + [param for param in self.data_aug.parameters()]
#Optimizable.__init__(self, params, optimizer)
def initialize(self):
self.mods['model'].initialize()
def forward(self, x):
return self.mods['model'](self.mods['data_aug'](x))
#def adjust(self):
# self.optimizer.adjust(self) #Parametres des dict
def data_augmentation(self, mode=True):
self.mods['data_aug'].data_augmentation=mode
def begin(self):
for param in self.parameters():
param.requires_grad_() # keep gradient information…
param.retain_grad() # even if not a leaf…
def print_grad_fn(self):
for n, m in self.mods.items():
m.print_grad_fn()
def __str__(self):
return str(self.mods['data_aug'])+ str(self.mods['model'])# + str(self.optimizer)

View file

@ -1,5 +0,0 @@
digraph {
graph [size="12,12"]
node [align=left fontsize=12 height=0.2 ranksep=0.1 shape=box style=filled]
94296775052080 [label=NoneType fillcolor=darkolivegreen1]
}

View file

@ -1,19 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 2.40.1 (20161225.0304)
-->
<!-- Title: %3 Pages: 1 -->
<svg width="75pt" height="30pt"
viewBox="0.00 0.00 74.65 30.40" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 26.4)">
<title>%3</title>
<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-26.4 70.6472,-26.4 70.6472,4 -4,4"/>
<!-- 94296775052080 -->
<g id="node1" class="node">
<title>94296775052080</title>
<polygon fill="#caff70" stroke="#000000" points="66.4717,-22.6036 .1755,-22.6036 .1755,.2036 66.4717,.2036 66.4717,-22.6036"/>
<text text-anchor="middle" x="33.3236" y="-7.6" font-family="Times,serif" font-size="12.00" fill="#000000">NoneType</text>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 937 B

View file

@ -1,345 +0,0 @@
import math
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Optimizable():#nn.Module):
"""
This is the interface for anything that has parameters that need to be
optimized, somewhat like torch.nn.Model but with the right plumbing for
hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
interface which does not give us enough control about the detachments.)
Nominal operation of an Optimizable at the lowest level is as follows:
o = MyOptimizable()
o.initialize()
loop {
o.begin()
o.zero_grad()
loss = compute loss function from parameters
loss.backward()
o.adjust()
}
Optimizables recursively handle updates to their optimiz*ers*.
"""
#def __init__(self):
# super(Optimizable, self).__init__()
# self.parameters = nn.Parameter(torch.zeros(()))
def __init__(self, parameters, optimizer):
#super(Optimizable, self).__init__()
self.parameters = parameters # a dict mapping names to tensors
self.optimizer = optimizer # which must itself be Optimizable!
self.all_params_with_gradients = []
#self.device = device
def initialize(self):
"""Initialize parameters, e.g. with a Kaiming initializer."""
pass
def begin(self):
"""Enable gradient tracking on current parameters."""
self.all_params_with_gradients = [] #Reintialisation pour eviter surcharge de la memoire
for name, param in self.parameters.items():
#for param in self.parameters:
param.requires_grad_() # keep gradient information…
param.retain_grad() # even if not a leaf…
#param.to(self.device)
#if param.device == torch.device('cuda:0'):
# print(name, param.device)
self.all_params_with_gradients.append(param)
self.optimizer.begin()
def zero_grad(self):
""" Set all gradients to zero. """
for param in self.all_params_with_gradients:
#param = param.to(self.device)
param.grad = torch.zeros(param.shape, device=param.device)
self.optimizer.zero_grad()
""" Note: at this point you would probably call .backwards() on the loss
function. """
def adjust(self):
""" Update parameters """
pass
def print_grad_fn(self):
self.optimizer.print_grad_fn()
for n, p in self.parameters.items():
print(n," - ", p.grad_fn)
def param_grad(self):
return self.all_params_with_gradients
def param(self, param_name):
return self.parameters[param_name].item()
class MNIST_FullyConnected(Optimizable):
"""
A fully-connected NN for the MNIST task. This is Optimizable but not itself
an optimizer.
"""
def __init__(self, num_inp, num_hid, num_out, optimizer):
parameters = {
"w1": torch.zeros(num_inp, num_hid).t(),
"b1": torch.zeros(num_hid).t(),
"w2": torch.zeros(num_hid, num_out).t(),
"b2": torch.zeros(num_out).t(),
}
super().__init__(parameters, optimizer)
def initialize(self):
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
self.optimizer.initialize()
def forward(self, x):
"""Compute a prediction."""
x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
x = torch.tanh(x)
x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
x = torch.tanh(x)
x = F.log_softmax(x, dim=1)
return x
def adjust(self):
self.optimizer.adjust(self.parameters)
def __str__(self):
return "mnist / " + str(self.optimizer)
class NoOpOptimizer(Optimizable):#, nn.Module):
"""
NoOpOptimizer sits on top of a stack, and does not affect what lies below.
"""
def __init__(self):
#super(Optimizable, self).__init__()
pass
def initialize(self):
pass
def begin(self):
pass
def zero_grad(self):
pass
def adjust(self, params):
pass
def adjust_val(self, params):
pass
def print_grad_fn(self):
pass
def __str__(self):
return "static"
class Adam(Optimizable):
"""
A fully hyperoptimizable Adam optimizer
"""
def clamp(x):
return (x.tanh() + 1.0) / 2.0
def unclamp(y):
z = y * 2.0 - 1.0
return ((1.0 + z) / (1.0 - z)).log() / 2.0
def __init__(
self,
alpha=0.001,
beta1=0.9,
beta2=0.999,
log_eps=-8.0,
optimizer=NoOpOptimizer(),
device = torch.device('cuda')
):
self.device = device
parameters = {
"alpha": torch.tensor(alpha, device=self.device),
"beta1": Adam.unclamp(torch.tensor(beta1, device=self.device)),
"beta2": Adam.unclamp(torch.tensor(beta2, device=self.device)),
"log_eps": torch.tensor(log_eps, device=self.device),
}
super().__init__(parameters, optimizer)
self.num_adjustments = 0
self.num_adjustments_val = 0
self.cache = {}
for name, param in parameters.items():
param.requires_grad_() # keep gradient information…
param.retain_grad() # even if not a leaf…
#param.to(self.device)
#if param.device == torch.device('cuda:0'):
# print(name, param.device)
def adjust(self, params): #Update param d'apprentissage
self.num_adjustments += 1
self.optimizer.adjust(self.parameters)
#print('Adam update')
t = self.num_adjustments
beta1 = Adam.clamp(self.parameters["beta1"])
beta2 = Adam.clamp(self.parameters["beta2"])
for name, param in params.items():
if name == "mag": continue
if name not in self.cache:
self.cache[name] = {
"m": torch.zeros(param.shape, device=self.device),
"v": torch.zeros(param.shape, device=self.device)
+ 10.0 ** self.parameters["log_eps"].data
# NOTE that we add a little fudge factor' here because sqrt is not
# differentiable at exactly zero
}
#print(name, param.device)
g = param.grad.detach()
self.cache[name]["m"] = m = (
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
)
self.cache[name]["v"] = v = (
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
)
self.all_params_with_gradients.append(m)
self.all_params_with_gradients.append(v)
m_hat = m / (1.0 - beta1 ** float(t))
v_hat = v / (1.0 - beta2 ** float(t))
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
params[name] = param.detach() - self.parameters["alpha"] * dparam
#print(name)
def adjust_val(self, params): #Update param Transformations
self.num_adjustments_val += 1
self.optimizer.adjust_val(self.parameters)
#print('Adam update')
t = self.num_adjustments_val
beta1 = Adam.clamp(self.parameters["beta1"])
beta2 = Adam.clamp(self.parameters["beta2"])
for name, param in params.items():
if name != "mag": continue
if name not in self.cache:
self.cache[name] = {
"m": torch.zeros(param.shape, device=self.device),
"v": torch.zeros(param.shape, device=self.device)
+ 10.0 ** self.parameters["log_eps"].data
# NOTE that we add a little fudge factor' here because sqrt is not
# differentiable at exactly zero
}
#print(name, param.device)
g = param.grad.detach()
self.cache[name]["m"] = m = (
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
)
self.cache[name]["v"] = v = (
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
)
self.all_params_with_gradients.append(m)
self.all_params_with_gradients.append(v)
m_hat = m / (1.0 - beta1 ** float(t))
v_hat = v / (1.0 - beta2 ** float(t))
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
params[name] = param.detach() - self.parameters["alpha"] * dparam
#print(name)
def __str__(self):
return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
'''
class SGD(Optimizable):
"""
A hyperoptimizable SGD
"""
def __init__(self, alpha=0.01, optimizer=NoOpOptimizer()):
parameters = {"alpha": torch.tensor(alpha)}
super().__init__(parameters, optimizer)
def adjust(self, params):
self.optimizer.adjust(self.parameters)
for name, param in params.items():
g = param.grad.detach()
params[name] = param.detach() - g * self.parameters["alpha"]
def __str__(self):
return "sgd(%f) / " % self.parameters["alpha"] + str(self.optimizer)
class SGDPerParam(Optimizable):
"""
Like above, but can be taught a separate step size for each parameter it
tunes.
"""
def __init__(self, alpha=0.01, params=[], optimizer=NoOpOptimizer()):
parameters = {name + "_alpha": torch.tensor(alpha) for name in params}
super().__init__(parameters, optimizer)
def adjust(self, params):
self.optimizer.adjust(self.parameters)
for name, param in params.items():
g = param.grad.detach()
params[name] = param.detach() - g * self.parameters[name + "_alpha"]
def __str__(self):
return "sgd(%s) / " % str(
{k: t.item() for k, t in self.parameters.items()}
) + str(self.optimizer)
'''
'''
class AdamBaydin(Optimizable):
""" Same as above, but only optimizes the learning rate, treating the
remaining hyperparameters as constants. """
def __init__(
self,
alpha=0.001,
beta1=0.9,
beta2=0.999,
log_eps=-8.0,
optimizer=NoOpOptimizer(),
):
parameters = {"alpha": torch.tensor(alpha)}
self.beta1 = beta1
self.beta2 = beta2
self.log_eps = log_eps
super().__init__(parameters, optimizer)
self.num_adjustments = 0
self.cache = {}
def adjust(self, params):
self.num_adjustments += 1
self.optimizer.adjust(self.parameters)
t = self.num_adjustments
beta1 = self.beta1
beta2 = self.beta2
for name, param in params.items():
if name not in self.cache:
self.cache[name] = {
"m": torch.zeros(param.shape),
"v": torch.zeros(param.shape) + 10.0 ** self.log_eps,
}
g = param.grad.detach()
self.cache[name]["m"] = m = (
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
)
self.cache[name]["v"] = v = (
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
)
self.all_params_with_gradients.append(m)
self.all_params_with_gradients.append(v)
m_hat = m / (1.0 - beta1 ** float(t))
v_hat = v / (1.0 - beta2 ** float(t))
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.log_eps)
params[name] = param.detach() - self.parameters["alpha"] * dparam
def __str__(self):
return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
'''

View file

@ -1,296 +0,0 @@
import math
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer
class Optimizable():
"""
This is the interface for anything that has parameters that need to be
optimized, somewhat like torch.nn.Model but with the right plumbing for
hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
interface which does not give us enough control about the detachments.)
Nominal operation of an Optimizable at the lowest level is as follows:
o = MyOptimizable()
o.initialize()
loop {
o.begin()
o.zero_grad()
loss = compute loss function from parameters
loss.backward()
o.adjust()
}
Optimizables recursively handle updates to their optimiz*ers*.
"""
#def __init__(self):
# super(Optimizable, self).__init__()
# self.parameters = nn.Parameter(torch.zeros(()))
def __init__(self, parameters, optimizer):
self.params = parameters # a dict mapping names to tensors
self.optimizer = optimizer # which must itself be Optimizable!
self.all_params_with_gradients = []
#self.device = device
def initialize(self):
"""Initialize parameters, e.g. with a Kaiming initializer."""
pass
def begin(self):
"""Enable gradient tracking on current parameters."""
self.all_params_with_gradients = nn.ParameterList() #Reintialisation pour eviter surcharge de la memoire
print("Opti param :", type(self.params))
#for name, param in self.params:
if isinstance(self.params,dict): #Dict
for name, param in self.params:
param.requires_grad_() # keep gradient information…
param.retain_grad() # even if not a leaf…
self.all_params_with_gradients.append(param)
if isinstance(self.params,list): #List
for param in self.params:
param.requires_grad_() # keep gradient information…
param.retain_grad() # even if not a leaf…
self.all_params_with_gradients.append(param)
self.optimizer.begin()
def zero_grad(self):
""" Set all gradients to zero. """
for param in self.all_params_with_gradients:
param.grad = torch.zeros(param.shape, device=param.device)
self.optimizer.zero_grad()
""" Note: at this point you would probably call .backwards() on the loss
function. """
def adjust(self):
""" Update parameters """
pass
class NoOpOptimizer(Optimizable):#, nn.Module):
"""
NoOpOptimizer sits on top of a stack, and does not affect what lies below.
"""
def __init__(self):
#super(Optimizable, self).__init__()
pass
def initialize(self):
pass
def begin(self):
#print("NoOpt begin")
pass
def zero_grad(self):
pass
def adjust(self, params):
pass
def step(self):
pass
def print_grad_fn(self):
pass
def __str__(self):
return "static"
class SGD(Optimizer, nn.Module): #Eviter Optimizer
"""
A hyperoptimizable SGD
"""
def __init__(self, params, lr=0.01, height=0):
self.height=height
#params : a optimiser
#reste (defaults) param de l'opti
print('SGD - H', height)
nn.Module.__init__(self)
optim_keys = ('lr','') #A mettre dans Optimizable ? #'' pour eviter iteration dans la chaine de charactere...
'''
self_params = {"lr": torch.tensor(lr),
"momentum": 0,
"dampening":0,
"weight_decay":0,
"nesterov": False}
'''
#self_params = dict(lr=torch.tensor(lr),
# momentum=0, dampening=0, weight_decay=0, nesterov=False)
self_params = nn.ParameterDict({
"lr": nn.Parameter(torch.tensor(lr)),
"momentum": nn.Parameter(torch.tensor(0.0)),
"dampening": nn.Parameter(torch.tensor(0.0)),
"weight_decay": nn.Parameter(torch.tensor(0.0)),
})
for k in self_params.keys() & optim_keys:
self_params[k].requires_grad_() # keep gradient information…
self_params[k].retain_grad() # even if not a leaf…
#self_params[k].register_hook(print)
if height==0:
optimizer = NoOpOptimizer()
else:
#def dict_generator(): yield {k: self_params[k] for k in self_params.keys() & optim_keys}
#(dict for dict in {k: self_params[k] for k in self_params.keys() & optim_keys}) #Devrait mar
optimizer = SGD(params=(self_params[k]for k in self_params.keys() & optim_keys), lr=lr, height=height-1)
#optimizer.register_backward_hook(print)
self.optimizer = optimizer
#if(height==0):
# for n,p in params.items():
# print(n,p)
#Optimizable.__init__(self, self_params, optimizer)
#print(type(params))
#for p in params:
# print(type(p))
Optimizer.__init__(self, params, self_params)
for group in self.param_groups:
for p in group['params']:
print(type(p.data), p.size())
print('End SGD-H', height)
def begin(self):
for group in self.param_groups:
for p in group['params']:
#print(type(p.data), p.size())
p.requires_grad_() # keep gradient information…
p.retain_grad() # even if not a leaf…
#p.register_hook(lambda x: print(self.height, x.grad_fn))
self.optimizer.begin()
def print_grad_fn(self):
self.optimizer.print_grad_fn()
for group in self.param_groups:
for i, p in enumerate(group['params']):
print(self.height," - ", i, p.grad_fn)
#def adjust(self, params):
# self.optimizer.adjust(self.params)
# for name, param in params.items():
# g = param.grad.detach()
# params[name] = param.detach() - g * self.params["lr"]
def step(self):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
print('SGD start')
self.optimizer.step()
for group in self.param_groups:
for i, p in enumerate(group['params']):
if p.grad is None:
continue
#d_p = p.grad.data
d_p = p.grad.detach()
#print(group['lr'])
p.data.add_(-group['lr'].item(), d_p)
#group['params'][i] = p.detach() - d_p * group['lr']
p.data-= group['lr']*d_p #Data ne pas utiliser perte info
for p in group['params']:
if p.grad is None:
print(p, p.grad)
continue
print("SGD end")
#return loss
def __str__(self):
return "sgd(%f) / " % self.params["lr"] + str(self.optimizer)
class Adam(Optimizable, nn.Module):
"""
A fully hyperoptimizable Adam optimizer
"""
def clamp(x):
return (x.tanh() + 1.0) / 2.0
def unclamp(y):
z = y * 2.0 - 1.0
return ((1.0 + z) / (1.0 - z)).log() / 2.0
def __init__(
self,
alpha=0.001,
beta1=0.9,
beta2=0.999,
log_eps=-8.0,
optimizer=NoOpOptimizer(),
device = torch.device('cuda')
):
#super(Adam, self).__init__()
nn.Module.__init__(self)
self.device = device
params = nn.ParameterDict({
"alpha": nn.Parameter(torch.tensor(alpha, device=self.device)),
"beta1": nn.Parameter(Adam.unclamp(torch.tensor(beta1, device=self.device))),
"beta2": nn.Parameter(Adam.unclamp(torch.tensor(beta2, device=self.device))),
"log_eps": nn.Parameter(torch.tensor(log_eps, device=self.device)),
})
Optimizable.__init__(self, params, optimizer)
self.num_adjustments = 0
self.cache = {}
for name, param in params.items():
param.requires_grad_() # keep gradient information…
param.retain_grad() # even if not a leaf…
def adjust(self, params, pytorch_mod=False):
self.num_adjustments += 1
self.optimizer.adjust(self.params)
t = self.num_adjustments
beta1 = Adam.clamp(self.params["beta1"])
beta2 = Adam.clamp(self.params["beta2"])
updated_param = []
if pytorch_mod:
params = params.named_parameters(prefix='') #Changer nom d'input...
for name, param in params:
if name not in self.cache:
self.cache[name] = {
"m": torch.zeros(param.shape, device=self.device),
"v": torch.zeros(param.shape, device=self.device)
+ 10.0 ** self.params["log_eps"].data
# NOTE that we add a little fudge factor' here because sqrt is not
# differentiable at exactly zero
}
#print(name, param.device)
g = param.grad.detach()
self.cache[name]["m"] = m = (
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
)
self.cache[name]["v"] = v = (
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
)
self.all_params_with_gradients.append(nn.Parameter(m)) #Risque de surcharger la memoire => Dict mieux ?
self.all_params_with_gradients.append(nn.Parameter(v))
m_hat = m / (1.0 - beta1 ** float(t))
v_hat = v / (1.0 - beta2 ** float(t))
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.params["log_eps"])
updated_param[name] = param.detach() - self.params["alpha"] * dparam
if pytorch_mod: params.update(updated_param) #Changer nom d'input...
else: params = updated_param
def __str__(self):
return "adam(" + str(self.params) + ") / " + str(self.optimizer)

View file

@ -1,182 +0,0 @@
import numpy as np
import json, math, time, os
from hyperopt import *
import gc
BATCH_SIZE = 300
mnist_train = torchvision.datasets.MNIST(
"./data", train=True, download=True, transform=torchvision.transforms.ToTensor()
)
mnist_test = torchvision.datasets.MNIST(
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
)
dl_train = torch.utils.data.DataLoader(
mnist_train, batch_size=BATCH_SIZE, shuffle=False
)
dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=10000, shuffle=False)
def test(model):
for i, (features_, labels_) in enumerate(dl_test):
features, labels = torch.reshape(features_, (10000, 28 * 28)), labels_
pred = model.forward(features)
return pred.argmax(dim=1).eq(labels).sum().item() / 10000 * 100
def train(model, epochs=3, height=1):
stats = []
for epoch in range(epochs):
for i, (features_, labels_) in enumerate(dl_train):
t0 = time.process_time()
model.begin()
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
pred = model.forward(
features
) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
loss = F.nll_loss(pred, labels)
model.zero_grad()
loss.backward(create_graph=True)
model.adjust()
tf = time.process_time()
data = {
"time": tf - t0,
"iter": epoch * len(dl_train) + i,
"loss": loss.item(),
"params": {
k: v.item()
for k, v in model.optimizer.parameters.items()
if "." not in k
},
}
stats.append(data)
return stats
def run(opt, name="out", usr={}, epochs=3, height=1):
torch.manual_seed(0x42)
model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
print("Running...", str(model))
model.initialize()
log = train(model, epochs, height)
acc = test(model)
out = {"acc": acc, "log": log, "usr": usr}
with open("log/%s.json" % name, "w+") as f:
json.dump(out, f, indent=True)
times = [x["time"] for x in log]
print("Times (ms):", np.mean(times), "+/-", np.std(times))
print("Final accuracy:", acc)
return out
def sgd_experiments():
run(SGD(0.01), "sgd", epochs=1)
out = run(SGD(0.01, optimizer=SGD(0.01)), "sgd+sgd", epochs=1)
alpha = out["log"][-1]["params"]["alpha"]
print(alpha)
run(SGD(alpha), "sgd-final", epochs=1)
def adam_experiments():
run(Adam(), "adam", epochs=1)
print()
mo = SGDPerParam(
0.001, ["alpha", "beta1", "beta2", "log_eps"], optimizer=SGD(0.0001)
)
out = run(Adam(optimizer=mo), "adam+sgd", epochs=1)
p = out["log"][-1]["params"]
alpha = p["alpha"]
beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
log_eps = p["log_eps"]
print(alpha, beta1, beta2, log_eps)
print(mo)
run(
Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
"adam+sgd-final",
epochs=1,
)
print()
out = run(Adam(optimizer=Adam()), "adam2", epochs=1)
p = out["log"][-1]["params"]
alpha = p["alpha"]
beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
log_eps = p["log_eps"]
print(alpha, beta1, beta2, log_eps)
run(
Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
"adam2-final",
epochs=1,
)
print()
mo = SGDPerParam(0.001, ["alpha"], optimizer=SGD(0.0001))
out = run(AdamBaydin(optimizer=mo), "adambaydin+sgd", epochs=1)
p = out["log"][-1]["params"]
alpha = p["alpha"]
print(alpha)
print(mo)
run(Adam(alpha=p["alpha"]), "adambaydin+sgd-final", epochs=1)
print()
out = run(AdamBaydin(optimizer=Adam()), "adambaydin2", epochs=1)
p = out["log"][-1]["params"]
alpha = p["alpha"]
print(alpha)
run(Adam(alpha=p["alpha"]), "adambaydin2-final", epochs=1)
def surface():
run(SGD(10 ** -3, optimizer=SGD(10 ** -1)), "tst", epochs=1)
for log_alpha in np.linspace(-3, 2, 10):
run(SGD(10 ** log_alpha), "sgd@1e%+.2f" % log_alpha, epochs=1)
def make_sgd_stack(height, top):
if height == 0:
return SGD(alpha=top)
return SGD(alpha=top, optimizer=make_sgd_stack(height - 1, top))
def make_adam_stack(height, top=0.0000001):
if height == 0:
return Adam(alpha=top)
return Adam(alpha=top, optimizer=make_adam_stack(height - 1))
def stack_test():
for top in np.linspace(-7, 3, 20):
for height in range(6):
print("height =", height, "to p=", top)
opt = make_sgd_stack(height, 10 ** top)
run(
opt,
"metasgd3-%d@%+.2f" % (height, top),
{"height": height, "top": top},
epochs=1,
height=height,
)
gc.collect()
def perf_test():
for h in range(51):
print("height:", h)
# opt = make_sgd_stack(h, 0.01)
opt = make_adam_stack(h)
run(opt, "adamperf-%d" % h, {"height": h}, epochs=1)
gc.collect()
if __name__ == "__main__":
try:
os.mkdir("log")
except:
print("log/ exists already")
surface()
sgd_experiments()
adam_experiments()
stack_test()
perf_test()

View file

@ -1,5 +0,0 @@
numpy==1.17.2
Pillow==6.2.0
six==1.12.0
torch==1.2.0
torchvision==0.4.0

View file

@ -1,344 +0,0 @@
import numpy as np
import json, math, time, os
from data_aug import *
#from data_aug_v2 import *
import gc
import matplotlib.pyplot as plt
from torchviz import make_dot, make_dot_from_trace
from torch.utils.data import SubsetRandomSampler
BATCH_SIZE = 300
#TEST_SIZE = 10000
TEST_SIZE = 300
DATA_LIMIT = 10
'''
data_train = torchvision.datasets.MNIST(
"./data", train=True, download=True,
transform=torchvision.transforms.Compose([
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
torchvision.transforms.ToTensor()
])
)
data_test = torchvision.datasets.MNIST(
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
)
'''
data_train = torchvision.datasets.CIFAR10(
"./data", train=True, download=True,
transform=torchvision.transforms.Compose([
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
torchvision.transforms.ToTensor()
])
)
data_test = torchvision.datasets.CIFAR10(
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
)
train_subset_indices=range(int(len(data_train)/2))
val_subset_indices=range(int(len(data_train)/2),len(data_train))
dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False)
def test(model, reshape_in=True, device = torch.device('cuda')):
for i, (features_, labels_) in enumerate(dl_test):
if reshape_in :
features, labels = torch.reshape(features_, (TEST_SIZE, 28 * 28)), labels_
else:
features, labels =features_, labels_
features, labels = features.to(device), labels.to(device)
pred = model.forward(features)
return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
def train_one_epoch(model, optimizer, epoch=0, reshape_in=True, device = torch.device('cuda'), train_data=True):
if train_data: dl = dl_train
else: dl = dl_val
for i, (features_, labels_) in enumerate(dl):
if i > DATA_LIMIT : break
#t0 = time.process_time()
if reshape_in :
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
else:
features, labels =features_, labels_
features, labels = features.to(device), labels.to(device)
#optimizer.begin()
#optimizer.zero_grad()
model.begin()
model.zero_grad()
pred = model.forward(features)
#loss = F.nll_loss(pred, labels)
loss = F.cross_entropy(pred,labels)
#model.print_grad_fn()
#optimizer.print_grad_fn()
#print('-'*50)
loss.backward(create_graph=True)
#optimizer.step()
if train_data: model.adjust()
else: model.adjust_val()
#tf = time.process_time()
#data = {
# "time": tf - t0,
# "iter": epoch * len(dl_train) + i,
# "loss": loss.item(),
# "params": {
# k: v.item()
# for k, v in model.optimizer.parameters.items()
# if "." not in k
# },
#}
#stats.append(data)
#print_torch_mem(i)
return loss.item()
def train_v2(model, optimizer, epochs=3, reshape_in=True, device = torch.device('cuda')):
log = []
for epoch in range(epochs):
#dl_train.dataset.transform=torchvision.transforms.Compose([
# torchvision.transforms.RandomAffine(degrees=model.param('mag'), translate=None, scale=None, shear=None, resample=False, fillcolor=0),
# torchvision.transforms.ToTensor()
#])
viz_data(fig_name='res/data_sample')
t0 = time.process_time()
loss = train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device)
train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device,train_data=False)
#acc = test(model=model, reshape_in=reshape_in, device=device)
acc = 0
tf = time.process_time()
data = {
"time": tf - t0,
"epoch": epoch,
"loss": loss,
"acc": acc,
"params": {
k: v.item()
for k, v in model.optimizer.parameters.items()
#for k, v in model.mods.data_aug.params.named_parameters()
if "." not in k
},
}
log.append(data)
print("Epoch :",epoch+1, "/",epochs, "- Loss :",log[-1]["loss"])
param = [p for p in model.param_grad() if p.grad is not None]
if(len(param)!=0):
print(param[-2],' / ', param[-2].grad)
print(param[-1],' / ', param[-1].grad)
return log
def train(model, epochs=3, height=1, reshape_in=True, device = torch.device('cuda')):
stats = []
for epoch in range(epochs):
for i, (features_, labels_) in enumerate(dl_train):
t0 = time.process_time()
model.begin()
if reshape_in :
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
else:
features, labels =features_, labels_
features, labels = features.to(device), labels.to(device)
pred = model.forward(
features
) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
#loss = F.nll_loss(pred, labels)
loss = F.cross_entropy(pred,labels)
#print('-'*50)
#param = [p for p in model.param_grad() if p.grad is not None]
#if(len(param)!=0):
# print(param[-2],' / ', param[-2].grad)
# print(param[-1],' / ', param[-1].grad)
model.zero_grad()
loss.backward(create_graph=True)
model.adjust()
tf = time.process_time()
data = {
"time": tf - t0,
"iter": epoch * len(dl_train) + i,
"loss": loss.item(),
"params": {
k: v.item()
for k, v in model.optimizer.parameters.items()
if "." not in k
},
}
stats.append(data)
print('-'*50)
i=0
for obj in gc.get_objects():
try:
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)) and len(obj.size())>1:
print(i, type(obj), obj.size())
i+=1
except:
pass
print("Epoch :",epoch+1, "/",epochs, "- Loss :",stats[-1]["loss"])
param = [p for p in model.param_grad() if p.grad is not None]
if(len(param)!=0):
print(param[-2],' / ', param[-2].grad)
print(param[-1],' / ', param[-1].grad)
return stats
def run(opt, name="out", usr={}, epochs=10, height=1, cnn=True, device = torch.device('cuda')):
torch.manual_seed(0x42)
if not cnn:
reshape_in = True
#model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
model = MNIST_FullyConnected_Augmented(28 * 28, 128, 10, opt, device=device)
else:
reshape_in = False
#model = LeNet(1, 10,opt, device)
#model = LeNet_v2(1, 10,opt, device).to(device=device)
model = LeNet_v2(3, 10,opt, device).to(device=device)
optimizer=None
'''
m = LeNet_v3(1, 10)
a = Data_aug()
model = Augmented_model(model=m,
data_augmenter=a,
optimizer=opt).to(device) #deux fois le meme optimizer ?...
'''
'''
m = LeNet_v3(1, 10)
a = Data_aug()
model = Augmented_model(model=m, data_augmenter=a).to(device)
#optimizer = SGD(model.parameters())
optimizer = SGD(model.parameters(), lr=0.01, height=1)
'''
#for idx, m in enumerate(model.modules()):
# print(idx, '->', m)
print("Running...", str(model))
model.initialize()
#print_model(model)
#model.data_augmentation(False)
#model.eval()
log = train_v2(model=model, optimizer=optimizer, epochs=epochs, reshape_in=reshape_in, device=device)
model.eval()
acc = test(model, reshape_in, device=device)
#param = [p for p in model.param_grad() if p.grad is not None]
#if(len(param)!=0):
# print(param[-2],' / ', param[-2].grad)
# print(param[-1],' / ', param[-1].grad)
out = {"acc": acc, "log": log, "usr": usr}
with open("log/%s.json" % name, "w+") as f:
json.dump(out, f, indent=True)
times = [x["time"] for x in log]
print("Times (ms):", np.mean(times), "+/-", np.std(times))
print("Final accuracy:", acc)
#plot_res(log, fig_name='res/'+name)
return out
def make_adam_stack(height, top=0.0000001, device = torch.device('cuda')):
#print(height,device)
if height == 0:
return Adam(alpha=top, device=device)
return Adam(alpha=top, optimizer=make_adam_stack(height - 1, top, device=device), device=device)
def plot_res(log, fig_name='res'):
fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
ax[0].set_title('Loss')
ax[0].plot([x["loss"] for x in log])
ax[1].set_title('Acc')
ax[1].plot([x["acc"] for x in log])
ax[2].set_title('mag')
ax[2].plot([x["data_aug"] for x in log])
plt.savefig(fig_name)
def print_torch_mem(add_info=''):
nb=0
max_size=0
for obj in gc.get_objects():
try:
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
#print(i, type(obj), obj.size())
size = np.sum(obj.size())
if(size>max_size): max_size=size
nb+=1
except:
pass
print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
def print_model(model, fig_name='graph/graph'): #Semble ne pas marcher pour les models en fonctionnel
x = torch.randn(1,1,28,28, device=device)
dot=make_dot(model(x), params=dict(model.named_parameters()))
dot.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
dot.render(fig_name)
print("Model graph generated !")
def viz_data(fig_name='data_sample'):
features_, labels_ = next(iter(dl_train))
plt.figure(figsize=(10,10))
#for i, (features_, labels_) in enumerate(dl_train):
for i in range(25):
if i==25: break
#print(features_.size(), labels_.size())
plt.subplot(5,5,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
img = features_[i,0,:,:]
#print('im shape',img.shape)
plt.imshow(img, cmap=plt.cm.binary)
plt.xlabel(labels_[i].item())
plt.savefig(fig_name)
##########################################
if __name__ == "__main__":
try:
os.mkdir("log")
except:
print("log/ exists already")
device = torch.device('cuda')
run(make_adam_stack(height=1, top=0.001, device=device),
"Augmented_MNIST",
epochs=100,
cnn=True,
device = device)
print()

View file

@ -1,73 +0,0 @@
import numpy as np
import tensorflow as tf
## build the neural network class
# weight initialization
def weight_variable(shape, name = None):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial, name = name)
# bias initialization
def bias_variable(shape, name = None):
initial = tf.constant(0.1, shape=shape) # positive bias
return tf.Variable(initial, name = name)
# 2D convolution
def conv2d(x, W, name = None):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID', name = name)
# max pooling
def max_pool_2x2(x, name = None):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
padding='SAME', name = name)
def LeNet(images, num_classes):
# tunable hyperparameters for nn architecture
s_f_conv1 = 5; # filter size of first convolution layer (default = 3)
n_f_conv1 = 20; # number of features of first convolution layer (default = 36)
s_f_conv2 = 5; # filter size of second convolution layer (default = 3)
n_f_conv2 = 50; # number of features of second convolution layer (default = 36)
n_n_fc1 = 500; # number of neurons of first fully connected layer (default = 576)
n_n_fc2 = 500; # number of neurons of first fully connected layer (default = 576)
#print(images.shape)
# 1.layer: convolution + max pooling
W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, int(images.shape[3]), n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
h_conv1_tf = tf.nn.relu(conv2d(images, W_conv1_tf) + b_conv1_tf, name = 'h_conv1_tf') # (.,28,28,32)
h_pool1_tf = max_pool_2x2(h_conv1_tf, name = 'h_pool1_tf') # (.,14,14,32)
#print(h_conv1_tf.shape)
#print(h_pool1_tf.shape)
# 2.layer: convolution + max pooling
W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2, n_f_conv1, n_f_conv2], name = 'W_conv2_tf')
b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf, W_conv2_tf) + b_conv2_tf, name ='h_conv2_tf') #(.,14,14,32)
h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
#print(h_pool2_tf.shape)
# 4.layer: fully connected
W_fc1_tf = weight_variable([5*5*n_f_conv2,n_n_fc1], name = 'W_fc1_tf') # (4*4*32, 1024)
b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
h_pool2_flat_tf = tf.reshape(h_pool2_tf, [int(h_pool2_tf.shape[0]), -1], name = 'h_pool3_flat_tf') # (.,1024)
h_fc1_tf = tf.nn.relu(tf.matmul(h_pool2_flat_tf, W_fc1_tf) + b_fc1_tf,
name = 'h_fc1_tf') # (.,1024)
# add dropout
#keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
#h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
print(h_fc1_tf.shape)
# 5.layer: fully connected
W_fc2_tf = weight_variable([n_n_fc1, num_classes], name = 'W_fc2_tf')
b_fc2_tf = bias_variable([num_classes], name = 'b_fc2_tf')
z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf), b_fc2_tf, name = 'z_pred_tf')# => (.,10)
# predicted probabilities in one-hot encoding
#y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf')
# tensor of correct predictions
#y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
# tf.argmax(y_data_tf, 1),
# name = 'y_pred_correct_tf')
logits = z_pred_tf
return logits #y_pred_proba_tf

View file

@ -1,353 +0,0 @@
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""PBA & AutoAugment Train/Eval module.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import os
import time
import numpy as np
import tensorflow as tf
import autoaugment.custom_ops as ops
from autoaugment.shake_drop import build_shake_drop_model
from autoaugment.shake_shake import build_shake_shake_model
import pba.data_utils as data_utils
import pba.helper_utils as helper_utils
from pba.wrn import build_wrn_model
from pba.resnet import build_resnet_model
from pba.LeNet import LeNet
arg_scope = tf.contrib.framework.arg_scope
def setup_arg_scopes(is_training):
"""Sets up the argscopes that will be used when building an image model.
Args:
is_training: Is the model training or not.
Returns:
Arg scopes to be put around the model being constructed.
"""
batch_norm_decay = 0.9
batch_norm_epsilon = 1e-5
batch_norm_params = {
# Decay for the moving averages.
'decay': batch_norm_decay,
# epsilon to prevent 0s in variance.
'epsilon': batch_norm_epsilon,
'scale': True,
# collection containing the moving mean and moving variance.
'is_training': is_training,
}
scopes = []
scopes.append(arg_scope([ops.batch_norm], **batch_norm_params))
return scopes
def build_model(inputs, num_classes, is_training, hparams):
"""Constructs the vision model being trained/evaled.
Args:
inputs: input features/images being fed to the image model build built.
num_classes: number of output classes being predicted.
is_training: is the model training or not.
hparams: additional hyperparameters associated with the image model.
Returns:
The logits of the image model.
"""
scopes = setup_arg_scopes(is_training)
if len(scopes) != 1:
raise ValueError('Nested scopes depreciated in py3.')
with scopes[0]:
if hparams.model_name == 'pyramid_net':
logits = build_shake_drop_model(inputs, num_classes, is_training)
elif hparams.model_name == 'wrn':
logits = build_wrn_model(inputs, num_classes, hparams.wrn_size)
elif hparams.model_name == 'shake_shake':
logits = build_shake_shake_model(inputs, num_classes, hparams,
is_training)
elif hparams.model_name == 'resnet':
logits = build_resnet_model(inputs, num_classes, hparams,
is_training)
elif hparams.model_name == 'LeNet':
logits = LeNet(inputs, num_classes)
else:
raise ValueError("Unknown model name.")
return logits
class Model(object):
"""Builds an model."""
def __init__(self, hparams, num_classes, image_size):
self.hparams = hparams
self.num_classes = num_classes
self.image_size = image_size
def build(self, mode):
"""Construct the model."""
assert mode in ['train', 'eval']
self.mode = mode
self._setup_misc(mode)
self._setup_images_and_labels(self.hparams.dataset)
self._build_graph(self.images, self.labels, mode)
self.init = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
def _setup_misc(self, mode):
"""Sets up miscellaneous in the model constructor."""
self.lr_rate_ph = tf.Variable(0.0, name='lrn_rate', trainable=False)
self.reuse = None if (mode == 'train') else True
self.batch_size = self.hparams.batch_size
if mode == 'eval':
self.batch_size = self.hparams.test_batch_size
def _setup_images_and_labels(self, dataset):
"""Sets up image and label placeholders for the model."""
if dataset == 'cifar10' or dataset == 'cifar100' or self.mode == 'train':
self.images = tf.placeholder(tf.float32,
[self.batch_size, self.image_size, self.image_size, 3])
self.labels = tf.placeholder(tf.float32,
[self.batch_size, self.num_classes])
else:
self.images = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3])
self.labels = tf.placeholder(tf.float32, [None, self.num_classes])
def assign_epoch(self, session, epoch_value):
session.run(
self._epoch_update, feed_dict={self._new_epoch: epoch_value})
def _build_graph(self, images, labels, mode):
"""Constructs the TF graph for the model.
Args:
images: A 4-D image Tensor
labels: A 2-D labels Tensor.
mode: string indicating training mode ( e.g., 'train', 'valid', 'test').
"""
is_training = 'train' in mode
if is_training:
self.global_step = tf.train.get_or_create_global_step()
logits = build_model(images, self.num_classes, is_training,
self.hparams)
self.predictions, self.cost = helper_utils.setup_loss(logits, labels)
self._calc_num_trainable_params()
# Adds L2 weight decay to the cost
self.cost = helper_utils.decay_weights(self.cost,
self.hparams.weight_decay_rate)
if is_training:
self._build_train_op()
# Setup checkpointing for this child model
# Keep 2 or more checkpoints around during training.
with tf.device('/cpu:0'):
self.saver = tf.train.Saver(max_to_keep=10)
self.init = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
def _calc_num_trainable_params(self):
self.num_trainable_params = np.sum([
np.prod(var.get_shape().as_list())
for var in tf.trainable_variables()
])
tf.logging.info('number of trainable params: {}'.format(
self.num_trainable_params))
def _build_train_op(self):
"""Builds the train op for the model."""
hparams = self.hparams
tvars = tf.trainable_variables()
grads = tf.gradients(self.cost, tvars)
if hparams.gradient_clipping_by_global_norm > 0.0:
grads, norm = tf.clip_by_global_norm(
grads, hparams.gradient_clipping_by_global_norm)
tf.summary.scalar('grad_norm', norm)
# Setup the initial learning rate
initial_lr = self.lr_rate_ph
optimizer = tf.train.MomentumOptimizer(
initial_lr, 0.9, use_nesterov=True)
self.optimizer = optimizer
apply_op = optimizer.apply_gradients(
zip(grads, tvars), global_step=self.global_step, name='train_step')
train_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies([apply_op]):
self.train_op = tf.group(*train_ops)
class ModelTrainer(object):
"""Trains an instance of the Model class."""
def __init__(self, hparams):
self._session = None
self.hparams = hparams
# Set the random seed to be sure the same validation set
# is used for each model
np.random.seed(0)
self.data_loader = data_utils.DataSet(hparams)
np.random.seed() # Put the random seed back to random
self.data_loader.reset()
# extra stuff for ray
self._build_models()
self._new_session()
self._session.__enter__()
def save_model(self, checkpoint_dir, step=None):
"""Dumps model into the backup_dir.
Args:
step: If provided, creates a checkpoint with the given step
number, instead of overwriting the existing checkpoints.
"""
model_save_name = os.path.join(checkpoint_dir,
'model.ckpt') + '-' + str(step)
save_path = self.saver.save(self.session, model_save_name)
tf.logging.info('Saved child model')
return model_save_name
def extract_model_spec(self, checkpoint_path):
"""Loads a checkpoint with the architecture structure stored in the name."""
self.saver.restore(self.session, checkpoint_path)
tf.logging.warning(
'Loaded child model checkpoint from {}'.format(checkpoint_path))
def eval_child_model(self, model, data_loader, mode):
"""Evaluate the child model.
Args:
model: image model that will be evaluated.
data_loader: dataset object to extract eval data from.
mode: will the model be evalled on train, val or test.
Returns:
Accuracy of the model on the specified dataset.
"""
tf.logging.info('Evaluating child model in mode {}'.format(mode))
while True:
try:
accuracy = helper_utils.eval_child_model(
self.session, model, data_loader, mode)
tf.logging.info(
'Eval child model accuracy: {}'.format(accuracy))
# If epoch trained without raising the below errors, break
# from loop.
break
except (tf.errors.AbortedError, tf.errors.UnavailableError) as e:
tf.logging.info(
'Retryable error caught: {}. Retrying.'.format(e))
return accuracy
@contextlib.contextmanager
def _new_session(self):
"""Creates a new session for model m."""
# Create a new session for this model, initialize
# variables, and save / restore from checkpoint.
sess_cfg = tf.ConfigProto(
allow_soft_placement=True, log_device_placement=False)
sess_cfg.gpu_options.allow_growth = True
self._session = tf.Session('', config=sess_cfg)
self._session.run([self.m.init, self.meval.init])
return self._session
def _build_models(self):
"""Builds the image models for train and eval."""
# Determine if we should build the train and eval model. When using
# distributed training we only want to build one or the other and not both.
with tf.variable_scope('model', use_resource=False):
m = Model(self.hparams, self.data_loader.num_classes, self.data_loader.image_size)
m.build('train')
self._num_trainable_params = m.num_trainable_params
self._saver = m.saver
with tf.variable_scope('model', reuse=True, use_resource=False):
meval = Model(self.hparams, self.data_loader.num_classes, self.data_loader.image_size)
meval.build('eval')
self.m = m
self.meval = meval
def _run_training_loop(self, curr_epoch):
"""Trains the model `m` for one epoch."""
start_time = time.time()
while True:
try:
train_accuracy = helper_utils.run_epoch_training(
self.session, self.m, self.data_loader, curr_epoch)
break
except (tf.errors.AbortedError, tf.errors.UnavailableError) as e:
tf.logging.info(
'Retryable error caught: {}. Retrying.'.format(e))
tf.logging.info('Finished epoch: {}'.format(curr_epoch))
tf.logging.info('Epoch time(min): {}'.format(
(time.time() - start_time) / 60.0))
return train_accuracy
def _compute_final_accuracies(self, iteration):
"""Run once training is finished to compute final test accuracy."""
if (iteration >= self.hparams.num_epochs - 1):
test_accuracy = self.eval_child_model(self.meval, self.data_loader,
'test')
else:
test_accuracy = 0
tf.logging.info('Test Accuracy: {}'.format(test_accuracy))
return test_accuracy
def run_model(self, epoch):
"""Trains and evalutes the image model."""
valid_accuracy = 0.
training_accuracy = self._run_training_loop(epoch)
if self.hparams.validation_size > 0:
valid_accuracy = self.eval_child_model(self.meval,
self.data_loader, 'val')
tf.logging.info('Train Acc: {}, Valid Acc: {}'.format(
training_accuracy, valid_accuracy))
return training_accuracy, valid_accuracy
def reset_config(self, new_hparams):
self.hparams = new_hparams
self.data_loader.reset_policy(new_hparams)
return
@property
def saver(self):
return self._saver
@property
def session(self):
return self._session
@property
def num_trainable_params(self):
return self._num_trainable_params

View file

@ -1,59 +0,0 @@
#!/bin/bash
export PYTHONPATH="$(pwd)"
cifar10_LeNet_search() {
local_dir="$PWD/results/"
data_path="$PWD/datasets/cifar-10-batches-py"
python pba/search.py \
--local_dir "$local_dir" \
--model_name LeNet \
--data_path "$data_path" --dataset cifar10 \
--train_size 4000 --val_size 46000 \
--checkpoint_freq 0 \
--name "cifar10_search" --gpu 0.15 --cpu 2 \
--num_samples 16 --perturbation_interval 3 --epochs 150 \
--explore cifar10 --aug_policy cifar10 \
--lr 0.1 --wd 0.0005
}
cifar10_search() {
local_dir="$PWD/results/"
data_path="$PWD/datasets/cifar-10-batches-py"
python pba/search.py \
--local_dir "$local_dir" \
--model_name wrn_40_2 \
--data_path "$data_path" --dataset cifar10 \
--train_size 4000 --val_size 46000 \
--checkpoint_freq 0 \
--name "cifar10_search" --gpu 0.15 --cpu 2 \
--num_samples 16 --perturbation_interval 3 --epochs 200 \
--explore cifar10 --aug_policy cifar10 \
--lr 0.1 --wd 0.0005
}
svhn_search() {
local_dir="$PWD/results/"
data_path="$PWD/datasets/"
python pba/search.py \
--local_dir "$local_dir" --data_path "$data_path" \
--model_name wrn_40_2 --dataset svhn \
--train_size 1000 --val_size 7325 \
--checkpoint_freq 0 \
--name "svhn_search" --gpu 0.19 --cpu 2 \
--num_samples 16 --perturbation_interval 3 --epochs 160 \
--explore cifar10 --aug_policy cifar10 --no_cutout \
--lr 0.1 --wd 0.005
}
if [ "$1" = "rcifar10" ]; then
cifar10_search
elif [ "$1" = "rsvhn" ]; then
svhn_search
elif [ "$1" = "LeNet" ]; then
cifar10_LeNet_search
else
echo "invalid args"
fi

View file

@ -1,210 +0,0 @@
"""Parse flags and set up hyperparameters."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import random
import tensorflow as tf
from pba.augmentation_transforms_hp import NUM_HP_TRANSFORM
def create_parser(state):
"""Create arg parser for flags."""
parser = argparse.ArgumentParser()
parser.add_argument(
'--model_name',
default='wrn',
choices=('wrn_28_10', 'wrn_40_2', 'shake_shake_32', 'shake_shake_96',
'shake_shake_112', 'pyramid_net', 'resnet', 'LeNet'))
parser.add_argument(
'--data_path',
default='/tmp/datasets/',
help='Directory where dataset is located.')
parser.add_argument(
'--dataset',
default='cifar10',
choices=('cifar10', 'cifar100', 'svhn', 'svhn-full', 'test'))
parser.add_argument(
'--recompute_dset_stats',
action='store_true',
help='Instead of using hardcoded mean/std, recompute from dataset.')
parser.add_argument('--local_dir', type=str, default='/tmp/ray_results/', help='Ray directory.')
parser.add_argument('--restore', type=str, default=None, help='If specified, tries to restore from given path.')
parser.add_argument('--train_size', type=int, default=5000, help='Number of training examples.')
parser.add_argument('--val_size', type=int, default=45000, help='Number of validation examples.')
parser.add_argument('--checkpoint_freq', type=int, default=50, help='Checkpoint frequency.')
parser.add_argument(
'--cpu', type=float, default=4, help='Allocated by Ray')
parser.add_argument(
'--gpu', type=float, default=1, help='Allocated by Ray')
parser.add_argument(
'--aug_policy',
type=str,
default='cifar10',
help=
'which augmentation policy to use (in augmentation_transforms_hp.py)')
# search-use only
parser.add_argument(
'--explore',
type=str,
default='cifar10',
help='which explore function to use')
parser.add_argument(
'--epochs',
type=int,
default=0,
help='Number of epochs, or <=0 for default')
parser.add_argument(
'--no_cutout', action='store_true', help='turn off cutout')
parser.add_argument('--lr', type=float, default=0.1, help='learning rate')
parser.add_argument('--wd', type=float, default=0.0005, help='weight decay')
parser.add_argument('--bs', type=int, default=128, help='batch size')
parser.add_argument('--test_bs', type=int, default=25, help='test batch size')
parser.add_argument('--num_samples', type=int, default=1, help='Number of Ray samples')
if state == 'train':
parser.add_argument(
'--use_hp_policy',
action='store_true',
help='otherwise use autoaug policy')
parser.add_argument(
'--hp_policy',
type=str,
default=None,
help='either a comma separated list of values or a file')
parser.add_argument(
'--hp_policy_epochs',
type=int,
default=200,
help='number of epochs/iterations policy trained for')
parser.add_argument(
'--no_aug',
action='store_true',
help=
'no additional augmentation at all (besides cutout if not toggled)'
)
parser.add_argument(
'--flatten',
action='store_true',
help='randomly select aug policy from schedule')
parser.add_argument('--name', type=str, default='autoaug')
elif state == 'search':
parser.add_argument('--perturbation_interval', type=int, default=10)
parser.add_argument('--name', type=str, default='autoaug_pbt')
else:
raise ValueError('unknown state')
args = parser.parse_args()
tf.logging.info(str(args))
return args
def create_hparams(state, FLAGS): # pylint: disable=invalid-name
"""Creates hyperparameters to pass into Ray config.
Different options depending on search or eval mode.
Args:
state: a string, 'train' or 'search'.
FLAGS: parsed command line flags.
Returns:
tf.hparams object.
"""
epochs = 0
tf.logging.info('data path: {}'.format(FLAGS.data_path))
hparams = tf.contrib.training.HParams(
train_size=FLAGS.train_size,
validation_size=FLAGS.val_size,
dataset=FLAGS.dataset,
data_path=FLAGS.data_path,
batch_size=FLAGS.bs,
gradient_clipping_by_global_norm=5.0,
explore=FLAGS.explore,
aug_policy=FLAGS.aug_policy,
no_cutout=FLAGS.no_cutout,
recompute_dset_stats=FLAGS.recompute_dset_stats,
lr=FLAGS.lr,
weight_decay_rate=FLAGS.wd,
test_batch_size=FLAGS.test_bs)
if state == 'train':
hparams.add_hparam('no_aug', FLAGS.no_aug)
hparams.add_hparam('use_hp_policy', FLAGS.use_hp_policy)
if FLAGS.use_hp_policy:
if FLAGS.hp_policy == 'random':
tf.logging.info('RANDOM SEARCH')
parsed_policy = []
for i in range(NUM_HP_TRANSFORM * 4):
if i % 2 == 0:
parsed_policy.append(random.randint(0, 10))
else:
parsed_policy.append(random.randint(0, 9))
elif FLAGS.hp_policy.endswith('.txt') or FLAGS.hp_policy.endswith(
'.p'):
# will be loaded in in data_utils
parsed_policy = FLAGS.hp_policy
else:
# parse input into a fixed augmentation policy
parsed_policy = FLAGS.hp_policy.split(', ')
parsed_policy = [int(p) for p in parsed_policy]
hparams.add_hparam('hp_policy', parsed_policy)
hparams.add_hparam('hp_policy_epochs', FLAGS.hp_policy_epochs)
hparams.add_hparam('flatten', FLAGS.flatten)
elif state == 'search':
hparams.add_hparam('no_aug', False)
hparams.add_hparam('use_hp_policy', True)
# default start value of 0
hparams.add_hparam('hp_policy',
[0 for _ in range(4 * NUM_HP_TRANSFORM)])
else:
raise ValueError('unknown state')
if FLAGS.model_name == 'wrn_40_2':
hparams.add_hparam('model_name', 'wrn')
epochs = 200
hparams.add_hparam('wrn_size', 32)
hparams.add_hparam('wrn_depth', 40)
elif FLAGS.model_name == 'wrn_28_10':
hparams.add_hparam('model_name', 'wrn')
epochs = 200
hparams.add_hparam('wrn_size', 160)
hparams.add_hparam('wrn_depth', 28)
elif FLAGS.model_name == 'resnet':
hparams.add_hparam('model_name', 'resnet')
epochs = 200
hparams.add_hparam('resnet_size', 20)
hparams.add_hparam('num_filters', 32)
elif FLAGS.model_name == 'shake_shake_32':
hparams.add_hparam('model_name', 'shake_shake')
epochs = 1800
hparams.add_hparam('shake_shake_widen_factor', 2)
elif FLAGS.model_name == 'shake_shake_96':
hparams.add_hparam('model_name', 'shake_shake')
epochs = 1800
hparams.add_hparam('shake_shake_widen_factor', 6)
elif FLAGS.model_name == 'shake_shake_112':
hparams.add_hparam('model_name', 'shake_shake')
epochs = 1800
hparams.add_hparam('shake_shake_widen_factor', 7)
elif FLAGS.model_name == 'pyramid_net':
hparams.add_hparam('model_name', 'pyramid_net')
epochs = 1800
hparams.set_hparam('batch_size', 64)
elif FLAGS.model_name == 'LeNet':
hparams.add_hparam('model_name', 'LeNet')
epochs = 200
else:
raise ValueError('Not Valid Model Name: %s' % FLAGS.model_name)
if FLAGS.epochs > 0:
tf.logging.info('overwriting with custom epochs')
epochs = FLAGS.epochs
hparams.add_hparam('num_epochs', epochs)
tf.logging.info('epochs: {}, lr: {}, wd: {}'.format(
hparams.num_epochs, hparams.lr, hparams.weight_decay_rate))
return hparams

View file

@ -1,41 +0,0 @@
#!/bin/bash
export PYTHONPATH="$(pwd)"
# args: [model name] [lr] [wd] #Learning rate / weight decay
eval_cifar10() {
hp_policy="$PWD/schedules/rcifar10_16_wrn.txt"
local_dir="$PWD/results/"
data_path="$PWD/datasets/cifar-10-batches-py"
size=50000
dataset="cifar10"
name="eval_cifar10_$1" # has 8 cutout size
python pba/train.py \
--local_dir "$local_dir" --data_path "$data_path" \
--model_name "$1" --dataset "$dataset" \
--train_size "$size" --val_size 0 \
--checkpoint_freq 25 --gpu 1 --cpu 4 \
--use_hp_policy --hp_policy "$hp_policy" \
--hp_policy_epochs 200 \
--aug_policy cifar10 --name "$name" \
--lr "$2" --wd "$3"
}
if [ "$@" = "wrn_28_10" ]; then
eval_cifar10 wrn_28_10 0.1 0.0005
elif [ "$@" = "ss_32" ]; then
eval_cifar10 shake_shake_32 0.01 0.001
elif [ "$@" = "ss_96" ]; then
eval_cifar10 shake_shake_96 0.01 0.001
elif [ "$@" = "ss_112" ]; then
eval_cifar10 shake_shake_112 0.01 0.001
elif [ "$@" = "pyramid_net" ]; then
eval_cifar10 pyramid_net 0.05 0.00005
elif [ "$@" = "LeNet" ]; then
eval_cifar10 LeNet 0.05 0.0
else
echo "invalid args"
fi

View file

@ -1,73 +0,0 @@
import numpy as np
import tensorflow as tf
## build the neural network class
# weight initialization
def weight_variable(shape, name = None):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial, name = name)
# bias initialization
def bias_variable(shape, name = None):
initial = tf.constant(0.1, shape=shape) # positive bias
return tf.Variable(initial, name = name)
# 2D convolution
def conv2d(x, W, name = None):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID', name = name)
# max pooling
def max_pool_2x2(x, name = None):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
padding='SAME', name = name)
def LeNet(images, num_classes):
# tunable hyperparameters for nn architecture
s_f_conv1 = 5; # filter size of first convolution layer (default = 3)
n_f_conv1 = 20; # number of features of first convolution layer (default = 36)
s_f_conv2 = 5; # filter size of second convolution layer (default = 3)
n_f_conv2 = 50; # number of features of second convolution layer (default = 36)
n_n_fc1 = 500; # number of neurons of first fully connected layer (default = 576)
n_n_fc2 = 500; # number of neurons of first fully connected layer (default = 576)
#print(images.shape)
# 1.layer: convolution + max pooling
W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, int(images.shape[3]), n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
h_conv1_tf = tf.nn.relu(conv2d(images, W_conv1_tf) + b_conv1_tf, name = 'h_conv1_tf') # (.,28,28,32)
h_pool1_tf = max_pool_2x2(h_conv1_tf, name = 'h_pool1_tf') # (.,14,14,32)
#print(h_conv1_tf.shape)
#print(h_pool1_tf.shape)
# 2.layer: convolution + max pooling
W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2, n_f_conv1, n_f_conv2], name = 'W_conv2_tf')
b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf, W_conv2_tf) + b_conv2_tf, name ='h_conv2_tf') #(.,14,14,32)
h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
#print(h_pool2_tf.shape)
# 4.layer: fully connected
W_fc1_tf = weight_variable([5*5*n_f_conv2,n_n_fc1], name = 'W_fc1_tf') # (4*4*32, 1024)
b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
h_pool2_flat_tf = tf.reshape(h_pool2_tf, [int(h_pool2_tf.shape[0]), -1], name = 'h_pool3_flat_tf') # (.,1024)
h_fc1_tf = tf.nn.relu(tf.matmul(h_pool2_flat_tf, W_fc1_tf) + b_fc1_tf,
name = 'h_fc1_tf') # (.,1024)
# add dropout
#keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
#h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
print(h_fc1_tf.shape)
# 5.layer: fully connected
W_fc2_tf = weight_variable([n_n_fc1, num_classes], name = 'W_fc2_tf')
b_fc2_tf = bias_variable([num_classes], name = 'b_fc2_tf')
z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf), b_fc2_tf, name = 'z_pred_tf')# => (.,10)
# predicted probabilities in one-hot encoding
#y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf')
# tensor of correct predictions
#y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
# tf.argmax(y_data_tf, 1),
# name = 'y_pred_correct_tf')
logits = z_pred_tf
return logits #y_pred_proba_tf

View file

@ -1,620 +0,0 @@
# coding=utf-8
# Copyright 2019 The Google UDA Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""UDA on CIFAR-10 and SVHN.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import os
import time
import json
import numpy as np
from absl import flags
import absl.logging as _logging # pylint: disable=unused-import
import tensorflow as tf
from randaugment import custom_ops as ops
import data
import utils
from randaugment.wrn import build_wrn_model
from randaugment.shake_drop import build_shake_drop_model
from randaugment.shake_shake import build_shake_shake_model
from randaugment.LeNet import LeNet
# TPU related
flags.DEFINE_string(
"master", default=None,
help="the TPU address. This should be set when using Cloud TPU")
flags.DEFINE_string(
"tpu", default=None,
help="The Cloud TPU to use for training. This should be either the name "
"used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 url.")
flags.DEFINE_string(
"gcp_project", default=None,
help="Project name for the Cloud TPU-enabled project. If not specified, "
"we will attempt to automatically detect the GCE project from metadata.")
flags.DEFINE_string(
"tpu_zone", default=None,
help="GCE zone where the Cloud TPU is located in. If not specified, we "
"will attempt to automatically detect the GCE project from metadata.")
flags.DEFINE_bool(
"use_tpu", default=False,
help="Use TPUs rather than GPU/CPU.")
flags.DEFINE_enum(
"task_name", "cifar10",
enum_values=["cifar10", "svhn"],
help="The task to use")
# UDA config:
flags.DEFINE_integer(
"sup_size", default=4000,
help="Number of supervised pairs to use. "
"-1: all training samples. 4000: 4000 supervised examples.")
flags.DEFINE_integer(
"aug_copy", default=0,
help="Number of different augmented data generated.")
flags.DEFINE_integer(
"unsup_ratio", default=0,
help="The ratio between batch size of unlabeled data and labeled data, "
"i.e., unsup_ratio * train_batch_size is the batch_size for unlabeled data."
"Do not use the unsupervised objective if set to 0.")
flags.DEFINE_enum(
"tsa", "",
enum_values=["", "linear_schedule", "log_schedule", "exp_schedule"],
help="anneal schedule of training signal annealing. "
"tsa='' means not using TSA. See the paper for other schedules.")
flags.DEFINE_float(
"uda_confidence_thresh", default=-1,
help="The threshold on predicted probability on unsupervised data. If set,"
"UDA loss will only be calculated on unlabeled examples whose largest"
"probability is larger than the threshold")
flags.DEFINE_float(
"uda_softmax_temp", -1,
help="The temperature of the Softmax when making prediction on unlabeled"
"examples. -1 means to use normal Softmax")
flags.DEFINE_float(
"ent_min_coeff", default=0,
help="")
flags.DEFINE_integer(
"unsup_coeff", default=1,
help="The coefficient on the UDA loss. "
"setting unsup_coeff to 1 works for most settings. "
"When you have extermely few samples, consider increasing unsup_coeff")
# Experiment (data/checkpoint/directory) config
flags.DEFINE_string(
"data_dir", default=None,
help="Path to data directory containing `*.tfrecords`.")
flags.DEFINE_string(
"model_dir", default=None,
help="model dir of the saved checkpoints.")
flags.DEFINE_bool(
"do_train", default=True,
help="Whether to run training.")
flags.DEFINE_bool(
"do_eval", default=False,
help="Whether to run eval on the test set.")
flags.DEFINE_integer(
"dev_size", default=-1,
help="dev set size.")
flags.DEFINE_bool(
"verbose", default=False,
help="Whether to print additional information.")
# Training config
flags.DEFINE_integer(
"train_batch_size", default=32,
help="Size of train batch.")
flags.DEFINE_integer(
"eval_batch_size", default=8,
help="Size of evalation batch.")
flags.DEFINE_integer(
"train_steps", default=100000,
help="Total number of training steps.")
flags.DEFINE_integer(
"iterations", default=10000,
help="Number of iterations per repeat loop.")
flags.DEFINE_integer(
"save_steps", default=10000,
help="number of steps for model checkpointing.")
flags.DEFINE_integer(
"max_save", default=10,
help="Maximum number of checkpoints to save.")
# Model config
flags.DEFINE_enum(
"model_name", default="wrn",
enum_values=["wrn", "shake_shake_32", "shake_shake_96", "shake_shake_112", "pyramid_net", "LeNet"],
help="Name of the model")
flags.DEFINE_integer(
"num_classes", default=10,
help="Number of categories for classification.")
flags.DEFINE_integer(
"wrn_size", default=32,
help="The size of WideResNet. It should be set to 32 for WRN-28-2"
"and should be set to 160 for WRN-28-10")
# Optimization config
flags.DEFINE_float(
"learning_rate", default=0.03,
help="Maximum learning rate.")
flags.DEFINE_float(
"weight_decay_rate", default=5e-4,
help="Weight decay rate.")
flags.DEFINE_float(
"min_lr_ratio", default=0.004,
help="Minimum ratio learning rate.")
flags.DEFINE_integer(
"warmup_steps", default=20000,
help="Number of steps for linear lr warmup.")
FLAGS = tf.flags.FLAGS
arg_scope = tf.contrib.framework.arg_scope
def get_tsa_threshold(schedule, global_step, num_train_steps, start, end):
step_ratio = tf.to_float(global_step) / tf.to_float(num_train_steps)
if schedule == "linear_schedule":
coeff = step_ratio
elif schedule == "exp_schedule":
scale = 5
# [exp(-5), exp(0)] = [1e-2, 1]
coeff = tf.exp((step_ratio - 1) * scale)
elif schedule == "log_schedule":
scale = 5
# [1 - exp(0), 1 - exp(-5)] = [0, 0.99]
coeff = 1 - tf.exp((-step_ratio) * scale)
return coeff * (end - start) + start
def setup_arg_scopes(is_training):
"""Sets up the argscopes that will be used when building an image model.
Args:
is_training: Is the model training or not.
Returns:
Arg scopes to be put around the model being constructed.
"""
batch_norm_decay = 0.9
batch_norm_epsilon = 1e-5
batch_norm_params = {
# Decay for the moving averages.
"decay": batch_norm_decay,
# epsilon to prevent 0s in variance.
"epsilon": batch_norm_epsilon,
"scale": True,
# collection containing the moving mean and moving variance.
"is_training": is_training,
}
scopes = []
scopes.append(arg_scope([ops.batch_norm], **batch_norm_params))
return scopes
def build_model(inputs, num_classes, is_training, update_bn, hparams):
"""Constructs the vision model being trained/evaled.
Args:
inputs: input features/images being fed to the image model build built.
num_classes: number of output classes being predicted.
is_training: is the model training or not.
hparams: additional hyperparameters associated with the image model.
Returns:
The logits of the image model.
"""
scopes = setup_arg_scopes(is_training)
try:
from contextlib import nested
except ImportError:
from contextlib import ExitStack, contextmanager
@contextmanager
def nested(*contexts):
with ExitStack() as stack:
for ctx in contexts:
stack.enter_context(ctx)
yield contexts
with nested(*scopes):
if hparams.model_name == "pyramid_net":
logits = build_shake_drop_model(
inputs, num_classes, is_training)
elif hparams.model_name == "wrn":
logits = build_wrn_model(
inputs, num_classes, hparams.wrn_size, update_bn)
elif hparams.model_name == "shake_shake":
logits = build_shake_shake_model(
inputs, num_classes, hparams, is_training)
elif hparams.model_name == "LeNet":
logits = LeNet(inputs, num_classes)
return logits
def _kl_divergence_with_logits(p_logits, q_logits):
p = tf.nn.softmax(p_logits)
log_p = tf.nn.log_softmax(p_logits)
log_q = tf.nn.log_softmax(q_logits)
kl = tf.reduce_sum(p * (log_p - log_q), -1)
return kl
def anneal_sup_loss(sup_logits, sup_labels, sup_loss, global_step, metric_dict):
tsa_start = 1. / FLAGS.num_classes
eff_train_prob_threshold = get_tsa_threshold(
FLAGS.tsa, global_step, FLAGS.train_steps,
tsa_start, end=1)
one_hot_labels = tf.one_hot(
sup_labels, depth=FLAGS.num_classes, dtype=tf.float32)
sup_probs = tf.nn.softmax(sup_logits, axis=-1)
correct_label_probs = tf.reduce_sum(
one_hot_labels * sup_probs, axis=-1)
larger_than_threshold = tf.greater(
correct_label_probs, eff_train_prob_threshold)
loss_mask = 1 - tf.cast(larger_than_threshold, tf.float32)
loss_mask = tf.stop_gradient(loss_mask)
sup_loss = sup_loss * loss_mask
avg_sup_loss = (tf.reduce_sum(sup_loss) /
tf.maximum(tf.reduce_sum(loss_mask), 1))
metric_dict["sup/sup_trained_ratio"] = tf.reduce_mean(loss_mask)
metric_dict["sup/eff_train_prob_threshold"] = eff_train_prob_threshold
return sup_loss, avg_sup_loss
def get_ent(logits, return_mean=True):
log_prob = tf.nn.log_softmax(logits, axis=-1)
prob = tf.exp(log_prob)
ent = tf.reduce_sum(-prob * log_prob, axis=-1)
if return_mean:
ent = tf.reduce_mean(ent)
return ent
def get_model_fn(hparams):
def model_fn(features, labels, mode, params):
sup_labels = tf.reshape(features["label"], [-1])
#### Configuring the optimizer
global_step = tf.train.get_global_step()
metric_dict = {}
is_training = (mode == tf.estimator.ModeKeys.TRAIN)
if FLAGS.unsup_ratio > 0 and is_training:
all_images = tf.concat([features["image"],
features["ori_image"],
features["aug_image"]], 0)
else:
all_images = features["image"]
with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
all_logits = build_model(
inputs=all_images,
num_classes=FLAGS.num_classes,
is_training=is_training,
update_bn=True and is_training,
hparams=hparams,
)
sup_bsz = tf.shape(features["image"])[0]
sup_logits = all_logits[:sup_bsz]
sup_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=sup_labels,
logits=sup_logits)
sup_prob = tf.nn.softmax(sup_logits, axis=-1)
metric_dict["sup/pred_prob"] = tf.reduce_mean(
tf.reduce_max(sup_prob, axis=-1))
if FLAGS.tsa:
sup_loss, avg_sup_loss = anneal_sup_loss(sup_logits, sup_labels, sup_loss,
global_step, metric_dict)
else:
avg_sup_loss = tf.reduce_mean(sup_loss)
total_loss = avg_sup_loss
if FLAGS.unsup_ratio > 0 and is_training:
aug_bsz = tf.shape(features["ori_image"])[0]
ori_logits = all_logits[sup_bsz : sup_bsz + aug_bsz]
aug_logits = all_logits[sup_bsz + aug_bsz:]
if FLAGS.uda_softmax_temp != -1:
ori_logits_tgt = ori_logits / FLAGS.uda_softmax_temp
else:
ori_logits_tgt = ori_logits
ori_prob = tf.nn.softmax(ori_logits, axis=-1)
aug_prob = tf.nn.softmax(aug_logits, axis=-1)
metric_dict["unsup/ori_prob"] = tf.reduce_mean(
tf.reduce_max(ori_prob, axis=-1))
metric_dict["unsup/aug_prob"] = tf.reduce_mean(
tf.reduce_max(aug_prob, axis=-1))
aug_loss = _kl_divergence_with_logits(
p_logits=tf.stop_gradient(ori_logits_tgt),
q_logits=aug_logits)
if FLAGS.uda_confidence_thresh != -1:
ori_prob = tf.nn.softmax(ori_logits, axis=-1)
largest_prob = tf.reduce_max(ori_prob, axis=-1)
loss_mask = tf.cast(tf.greater(
largest_prob, FLAGS.uda_confidence_thresh), tf.float32)
metric_dict["unsup/high_prob_ratio"] = tf.reduce_mean(loss_mask)
loss_mask = tf.stop_gradient(loss_mask)
aug_loss = aug_loss * loss_mask
metric_dict["unsup/high_prob_loss"] = tf.reduce_mean(aug_loss)
if FLAGS.ent_min_coeff > 0:
ent_min_coeff = FLAGS.ent_min_coeff
metric_dict["unsup/ent_min_coeff"] = ent_min_coeff
per_example_ent = get_ent(ori_logits)
ent_min_loss = tf.reduce_mean(per_example_ent)
total_loss = total_loss + ent_min_coeff * ent_min_loss
avg_unsup_loss = tf.reduce_mean(aug_loss)
total_loss += FLAGS.unsup_coeff * avg_unsup_loss
metric_dict["unsup/loss"] = avg_unsup_loss
total_loss = utils.decay_weights(
total_loss,
FLAGS.weight_decay_rate)
#### Check model parameters
num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
tf.logging.info("#params: {}".format(num_params))
if FLAGS.verbose:
format_str = "{{:<{0}s}}\t{{}}".format(
max([len(v.name) for v in tf.trainable_variables()]))
for v in tf.trainable_variables():
tf.logging.info(format_str.format(v.name, v.get_shape()))
#### Evaluation mode
if mode == tf.estimator.ModeKeys.EVAL:
#### Metric function for classification
def metric_fn(per_example_loss, label_ids, logits):
# classification loss & accuracy
loss = tf.metrics.mean(per_example_loss)
predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
accuracy = tf.metrics.accuracy(label_ids, predictions)
ret_dict = {
"eval/classify_loss": loss,
"eval/classify_accuracy": accuracy
}
return ret_dict
eval_metrics = (metric_fn, [sup_loss, sup_labels, sup_logits])
#### Constucting evaluation TPUEstimatorSpec.
eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
mode=mode,
loss=total_loss,
eval_metrics=eval_metrics)
return eval_spec
# increase the learning rate linearly
if FLAGS.warmup_steps > 0:
warmup_lr = tf.to_float(global_step) / tf.to_float(FLAGS.warmup_steps) \
* FLAGS.learning_rate
else:
warmup_lr = 0.0
# decay the learning rate using the cosine schedule
decay_lr = tf.train.cosine_decay(
FLAGS.learning_rate,
global_step=global_step-FLAGS.warmup_steps,
decay_steps=FLAGS.train_steps-FLAGS.warmup_steps,
alpha=FLAGS.min_lr_ratio)
learning_rate = tf.where(global_step < FLAGS.warmup_steps,
warmup_lr, decay_lr)
optimizer = tf.train.MomentumOptimizer(
learning_rate=learning_rate,
momentum=0.9,
use_nesterov=True)
if FLAGS.use_tpu:
optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
grads_and_vars = optimizer.compute_gradients(total_loss)
gradients, variables = zip(*grads_and_vars)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = optimizer.apply_gradients(
zip(gradients, variables), global_step=tf.train.get_global_step())
#### Creating training logging hook
# compute accuracy
sup_pred = tf.argmax(sup_logits, axis=-1, output_type=sup_labels.dtype)
is_correct = tf.to_float(tf.equal(sup_pred, sup_labels))
acc = tf.reduce_mean(is_correct)
metric_dict["sup/sup_loss"] = avg_sup_loss
metric_dict["training/loss"] = total_loss
metric_dict["sup/acc"] = acc
metric_dict["training/lr"] = learning_rate
metric_dict["training/step"] = global_step
if not FLAGS.use_tpu:
log_info = ("step [{training/step}] lr {training/lr:.6f} "
"loss {training/loss:.4f} "
"sup/acc {sup/acc:.4f} sup/loss {sup/sup_loss:.6f} ")
if FLAGS.unsup_ratio > 0:
log_info += "unsup/loss {unsup/loss:.6f} "
formatter = lambda kwargs: log_info.format(**kwargs)
logging_hook = tf.train.LoggingTensorHook(
tensors=metric_dict,
every_n_iter=FLAGS.iterations,
formatter=formatter)
training_hooks = [logging_hook]
#### Constucting training TPUEstimatorSpec.
train_spec = tf.contrib.tpu.TPUEstimatorSpec(
mode=mode, loss=total_loss, train_op=train_op,
training_hooks=training_hooks)
else:
#### Constucting training TPUEstimatorSpec.
host_call = utils.construct_scalar_host_call(
metric_dict=metric_dict,
model_dir=params["model_dir"],
prefix="",
reduce_fn=tf.reduce_mean)
train_spec = tf.contrib.tpu.TPUEstimatorSpec(
mode=mode, loss=total_loss, train_op=train_op,
host_call=host_call)
return train_spec
return model_fn
def train(hparams):
##### Create input function
if FLAGS.unsup_ratio == 0:
FLAGS.aug_copy = 0
if FLAGS.dev_size != -1:
FLAGS.do_train = True
FLAGS.do_eval = True
if FLAGS.do_train:
train_input_fn = data.get_input_fn(
data_dir=FLAGS.data_dir,
split="train",
task_name=FLAGS.task_name,
sup_size=FLAGS.sup_size,
unsup_ratio=FLAGS.unsup_ratio,
aug_copy=FLAGS.aug_copy,
)
if FLAGS.do_eval:
if FLAGS.dev_size != -1:
eval_input_fn = data.get_input_fn(
data_dir=FLAGS.data_dir,
split="dev",
task_name=FLAGS.task_name,
sup_size=FLAGS.dev_size,
unsup_ratio=0,
aug_copy=0)
eval_size = FLAGS.dev_size
else:
eval_input_fn = data.get_input_fn(
data_dir=FLAGS.data_dir,
split="test",
task_name=FLAGS.task_name,
sup_size=-1,
unsup_ratio=0,
aug_copy=0)
if FLAGS.task_name == "cifar10":
eval_size = 10000
elif FLAGS.task_name == "svhn":
eval_size = 26032
else:
assert False, "You need to specify the size of your test set."
eval_steps = eval_size // FLAGS.eval_batch_size
##### Get model function
model_fn = get_model_fn(hparams)
estimator = utils.get_TPU_estimator(FLAGS, model_fn)
#### Training
if FLAGS.dev_size != -1:
tf.logging.info("***** Running training and validation *****")
tf.logging.info(" Supervised batch size = %d", FLAGS.train_batch_size)
tf.logging.info(" Unsupervised batch size = %d",
FLAGS.train_batch_size * FLAGS.unsup_ratio)
tf.logging.info(" Num train steps = %d", FLAGS.train_steps)
curr_step = 0
while True:
if curr_step >= FLAGS.train_steps:
break
tf.logging.info("Current step {}".format(curr_step))
train_step = min(FLAGS.save_steps, FLAGS.train_steps - curr_step)
estimator.train(input_fn=train_input_fn, steps=train_step)
estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
curr_step += FLAGS.save_steps
else:
if FLAGS.do_train:
tf.logging.info("***** Running training *****")
tf.logging.info(" Supervised batch size = %d", FLAGS.train_batch_size)
tf.logging.info(" Unsupervised batch size = %d",
FLAGS.train_batch_size * FLAGS.unsup_ratio)
estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)
if FLAGS.do_eval:
tf.logging.info("***** Running evaluation *****")
results = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
tf.logging.info(">> Results:")
for key in results.keys():
tf.logging.info(" %s = %s", key, str(results[key]))
results[key] = results[key].item()
acc = results["eval/classify_accuracy"]
with tf.gfile.Open("{}/results.txt".format(FLAGS.model_dir), "w") as ouf:
ouf.write(str(acc))
def main(_):
if FLAGS.do_train:
tf.gfile.MakeDirs(FLAGS.model_dir)
flags_dict = tf.app.flags.FLAGS.flag_values_dict()
with tf.gfile.Open(os.path.join(FLAGS.model_dir, "FLAGS.json"), "w") as ouf:
json.dump(flags_dict, ouf)
hparams = tf.contrib.training.HParams()
if FLAGS.model_name == "wrn":
hparams.add_hparam("model_name", "wrn")
hparams.add_hparam("wrn_size", FLAGS.wrn_size)
elif FLAGS.model_name == "shake_shake_32":
hparams.add_hparam("model_name", "shake_shake")
hparams.add_hparam("shake_shake_widen_factor", 2)
elif FLAGS.model_name == "shake_shake_96":
hparams.add_hparam("model_name", "shake_shake")
hparams.add_hparam("shake_shake_widen_factor", 6)
elif FLAGS.model_name == "shake_shake_112":
hparams.add_hparam("model_name", "shake_shake")
hparams.add_hparam("shake_shake_widen_factor", 7)
elif FLAGS.model_name == "pyramid_net":
hparams.add_hparam("model_name", "pyramid_net")
elif FLAGS.model_name == "LeNet":
hparams.add_hparam("model_name", "LeNet")
else:
raise ValueError("Not Valid Model Name: %s" % FLAGS.model_name)
train(hparams)
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
tf.app.run()

View file

@ -1,31 +0,0 @@
# coding=utf-8
# Copyright 2019 The Google UDA Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#!/bin/bash
task_name=cifar10
python main.py \
--model_name="LeNet"\
--use_tpu=False \
--do_train=True \
--do_eval=True \
--task_name=${task_name} \
--sup_size=4000 \
--unsup_ratio=5 \
--train_batch_size=32 \
--data_dir=data/proc_data/${task_name} \
--model_dir=ckpt/cifar10_gpu \
--train_steps=400000 \
$@

View file

@ -1,271 +0,0 @@
# code in this file is adpated from rpmcruz/autoaugment
# https://github.com/rpmcruz/autoaugment/blob/master/transformations.py
import random
import PIL, PIL.ImageOps, PIL.ImageEnhance, PIL.ImageDraw
import numpy as np
import torch
from PIL import Image
def ShearX(img, v): # [-0.3, 0.3]
assert -0.3 <= v <= 0.3
if random.random() > 0.5:
v = -v
return img.transform(img.size, PIL.Image.AFFINE, (1, v, 0, 0, 1, 0))
def ShearY(img, v): # [-0.3, 0.3]
assert -0.3 <= v <= 0.3
if random.random() > 0.5:
v = -v
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, v, 1, 0))
def TranslateX(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
assert -0.45 <= v <= 0.45
if random.random() > 0.5:
v = -v
v = v * img.size[0]
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))
def TranslateXabs(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
assert 0 <= v
if random.random() > 0.5:
v = -v
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))
def TranslateY(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
assert -0.45 <= v <= 0.45
if random.random() > 0.5:
v = -v
v = v * img.size[1]
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))
def TranslateYabs(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
assert 0 <= v
if random.random() > 0.5:
v = -v
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))
def Rotate(img, v): # [-30, 30]
assert -30 <= v <= 30
if random.random() > 0.5:
v = -v
return img.rotate(v)
def AutoContrast(img, _):
return PIL.ImageOps.autocontrast(img)
def Invert(img, _):
return PIL.ImageOps.invert(img)
def Equalize(img, _):
return PIL.ImageOps.equalize(img)
def Flip(img, _): # not from the paper
return PIL.ImageOps.mirror(img)
def FlipLR(img, v):
return img.transpose(Image.FLIP_LEFT_RIGHT)
def FlipUD(img, v):
return img.transpose(Image.FLIP_TOP_BOTTOM)
def Solarize(img, v): # [0, 256]
assert 0 <= v <= 256
return PIL.ImageOps.solarize(img, v)
def SolarizeAdd(img, addition=0, threshold=128):
img_np = np.array(img).astype(np.int)
img_np = img_np + addition
img_np = np.clip(img_np, 0, 255)
img_np = img_np.astype(np.uint8)
img = Image.fromarray(img_np)
return PIL.ImageOps.solarize(img, threshold)
def Posterize(img, v): # [4, 8]
v = int(v)
v = max(1, v)
return PIL.ImageOps.posterize(img, v)
def Contrast(img, v): # [0.1,1.9]
assert 0.1 <= v <= 1.9
return PIL.ImageEnhance.Contrast(img).enhance(v)
def Color(img, v): # [0.1,1.9]
assert 0.1 <= v <= 1.9
return PIL.ImageEnhance.Color(img).enhance(v)
def Brightness(img, v): # [0.1,1.9]
assert 0.1 <= v <= 1.9
return PIL.ImageEnhance.Brightness(img).enhance(v)
def Sharpness(img, v): # [0.1,1.9]
assert 0.1 <= v <= 1.9
return PIL.ImageEnhance.Sharpness(img).enhance(v)
def Cutout(img, v): # [0, 60] => percentage: [0, 0.2]
assert 0.0 <= v <= 0.2
if v <= 0.:
return img
v = v * img.size[0]
return CutoutAbs(img, v)
def CutoutAbs(img, v): # [0, 60] => percentage: [0, 0.2]
# assert 0 <= v <= 20
if v < 0:
return img
w, h = img.size
x0 = np.random.uniform(w)
y0 = np.random.uniform(h)
x0 = int(max(0, x0 - v / 2.))
y0 = int(max(0, y0 - v / 2.))
x1 = min(w, x0 + v)
y1 = min(h, y0 + v)
xy = (x0, y0, x1, y1)
color = (125, 123, 114)
# color = (0, 0, 0)
img = img.copy()
PIL.ImageDraw.Draw(img).rectangle(xy, color)
return img
def SamplePairing(imgs): # [0, 0.4]
def f(img1, v):
i = np.random.choice(len(imgs))
img2 = PIL.Image.fromarray(imgs[i])
return PIL.Image.blend(img1, img2, v)
return f
def Identity(img, v):
return img
def augment_list(): # 16 oeprations and their ranges
# https://github.com/google-research/uda/blob/master/image/randaugment/policies.py#L57
l = [
(Identity, 0., 1.0),
(FlipUD, 0., 1.0),
(FlipLR, 0., 1.0),
(Rotate, 0, 30), # 4
(TranslateX, 0., 0.33), # 2
(TranslateY, 0., 0.33), # 3
(ShearX, 0., 0.3), # 0
(ShearY, 0., 0.3), # 1
#(AutoContrast, 0, 1), # 5
#(Invert, 0, 1), # 6
#(Equalize, 0, 1), # 7
(Contrast, 0.1, 1.9), # 10
(Color, 0.1, 1.9), # 11
(Brightness, 0.1, 1.9), # 12
(Sharpness, 0.1, 1.9), # 13
(Posterize, 4, 8), # 9
(Solarize, 1, 256), # 8
# (Cutout, 0, 0.2), # 14
# (SamplePairing(imgs), 0, 0.4), # 15
]
# https://github.com/tensorflow/tpu/blob/8462d083dd89489a79e3200bcc8d4063bf362186/models/official/efficientnet/autoaugment.py#L505
#l = [
# (AutoContrast, 0, 1),
# (Equalize, 0, 1),
# (Invert, 0, 1),
# (Rotate, 0, 30),
# (Posterize, 0, 4),
# (Solarize, 0, 256),
# (SolarizeAdd, 0, 110),
# (Color, 0.1, 1.9),
# (Contrast, 0.1, 1.9),
# (Brightness, 0.1, 1.9),
# (Sharpness, 0.1, 1.9),
# (ShearX, 0., 0.3),
# (ShearY, 0., 0.3),
# (CutoutAbs, 0, 40),
# (TranslateXabs, 0., 100),
# (TranslateYabs, 0., 100),
#]
return l
class Lighting(object):
"""Lighting noise(AlexNet - style PCA - based noise)"""
def __init__(self, alphastd, eigval, eigvec):
self.alphastd = alphastd
self.eigval = torch.Tensor(eigval)
self.eigvec = torch.Tensor(eigvec)
def __call__(self, img):
if self.alphastd == 0:
return img
alpha = img.new().resize_(3).normal_(0, self.alphastd)
rgb = self.eigvec.type_as(img).clone() \
.mul(alpha.view(1, 3).expand(3, 3)) \
.mul(self.eigval.view(1, 3).expand(3, 3)) \
.sum(1).squeeze()
return img.add(rgb.view(3, 1, 1).expand_as(img))
class CutoutDefault(object):
"""
Reference : https://github.com/quark0/darts/blob/master/cnn/utils.py
"""
def __init__(self, length):
self.length = length
def __call__(self, img):
h, w = img.size(1), img.size(2)
mask = np.ones((h, w), np.float32)
y = np.random.randint(h)
x = np.random.randint(w)
y1 = np.clip(y - self.length // 2, 0, h)
y2 = np.clip(y + self.length // 2, 0, h)
x1 = np.clip(x - self.length // 2, 0, w)
x2 = np.clip(x + self.length // 2, 0, w)
mask[y1: y2, x1: x2] = 0.
mask = torch.from_numpy(mask)
mask = mask.expand_as(img)
img *= mask
return img
PARAMETER_MAX = 1
class RandAugment:
def __init__(self, n, m):
self.n = n
self.m = m # [0, PARAMETER_MAX]
self.augment_list = augment_list()
def __call__(self, img):
ops = random.choices(self.augment_list, k=self.n)
for op, minval, maxval in ops:
val = (float(self.m) / PARAMETER_MAX) * float(maxval - minval) + minval
img = op(img, val)
return img

View file

@ -1,98 +0,0 @@
import torch
import numpy as np
import torchvision
from PIL import Image
from torch import topk
import torch.nn.functional as F
from torch import topk
import cv2
from torchvision import transforms
import os
class SaveFeatures():
features=None
def __init__(self, m): self.hook = m.register_forward_hook(self.hook_fn)
def hook_fn(self, module, input, output): self.features = ((output.cpu()).data).numpy()
def remove(self): self.hook.remove()
def getCAM(feature_conv, weight_fc, class_idx):
_, nc, h, w = feature_conv.shape
cam = weight_fc[class_idx].dot(feature_conv.reshape((nc, h*w)))
cam = cam.reshape(h, w)
cam = cam - np.min(cam)
cam_img = cam / np.max(cam)
# cam_img = np.uint8(255 * cam_img)
return cam_img
def main(cam):
device = 'cuda:0'
model_name = 'resnet50'
root = 'NEW_SS'
os.makedirs(os.path.join(root + '_CAM', 'OK'), exist_ok=True)
os.makedirs(os.path.join(root + '_CAM', 'NOK'), exist_ok=True)
train_transform = transforms.Compose([
transforms.ToTensor(),
])
dataset = torchvision.datasets.ImageFolder(
root=root, transform=train_transform,
)
loader = torch.utils.data.DataLoader(dataset, batch_size=1)
model = torchvision.models.__dict__[model_name](pretrained=False)
model.fc = torch.nn.Linear(model.fc.in_features, 2)
model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
model = model.to(device)
model.eval()
weight_softmax_params = list(model._modules.get('fc').parameters())
weight_softmax = np.squeeze(weight_softmax_params[0].cpu().data.numpy())
final_layer = model._modules.get('layer4')
activated_features = SaveFeatures(final_layer)
for i, (img, target ) in enumerate(loader):
img = img.to(device)
prediction = model(img)
pred_probabilities = F.softmax(prediction, dim=1).data.squeeze()
class_idx = topk(pred_probabilities,1)[1].int()
# if target.item() != class_idx:
# print(dataset.imgs[i][0])
if cam:
overlay = getCAM(activated_features.features, weight_softmax, class_idx )
import ipdb; ipdb.set_trace()
import PIL
from torchvision.transforms import ToPILImage
img = ToPILImage()(overlay).resize(size=(1280, 1024), resample=PIL.Image.BILINEAR)
img.save('heat-pil.jpg')
img = cv2.imread(dataset.imgs[i][0])
height, width, _ = img.shape
overlay = cv2.resize(overlay, (width, height))
heatmap = cv2.applyColorMap(overlay, cv2.COLORMAP_JET)
cv2.imwrite('heat-cv2.jpg', heatmap)
img = cv2.imread(dataset.imgs[i][0])
height, width, _ = img.shape
overlay = cv2.resize(overlay, (width, height))
heatmap = cv2.applyColorMap(overlay, cv2.COLORMAP_JET)
result = heatmap * 0.3 + img * 0.5
clss = dataset.imgs[i][0].split(os.sep)[1]
name = dataset.imgs[i][0].split(os.sep)[2].split('.')[0]
cv2.imwrite(os.path.join(root+"_CAM", clss, name + '.jpg'), result)
print(f'{os.path.join(root+"_CAM", clss, name + ".jpg")} saved')
activated_features.remove()
if __name__ == "__main__":
main(cam=True)

Binary file not shown.

File diff suppressed because it is too large Load diff

View file

@ -1,314 +0,0 @@
import numpy as np
import json, math, time, os
import matplotlib.pyplot as plt
import copy
import gc
from torchviz import make_dot
import torch
import torch.nn.functional as F
import time
class timer():
def __init__(self):
self._start_time=time.time()
def exec_time(self):
end = time.time()
res = end-self._start_time
self._start_time=end
return res
def print_graph(PyTorch_obj, fig_name='graph'):
graph=make_dot(PyTorch_obj) #Loss give the whole graph
graph.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
graph.render(fig_name)
def plot_res(log, fig_name='res', param_names=None):
epochs = [x["epoch"] for x in log]
fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
ax[0].set_title('Loss')
ax[0].plot(epochs,[x["train_loss"] for x in log], label='Train')
ax[0].plot(epochs,[x["val_loss"] for x in log], label='Val')
ax[0].legend()
ax[1].set_title('Acc')
ax[1].plot(epochs,[x["acc"] for x in log])
if log[0]["param"]!= None:
if isinstance(log[0]["param"],float):
ax[2].set_title('Mag')
ax[2].plot(epochs,[x["param"] for x in log], label='Mag')
ax[2].legend()
else :
ax[2].set_title('Prob')
#for idx, _ in enumerate(log[0]["param"]):
#ax[2].plot(epochs,[x["param"][idx] for x in log], label='P'+str(idx))
if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])]
proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])]
ax[2].stackplot(epochs, proba, labels=param_names)
ax[2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name)
plt.close()
def plot_resV2(log, fig_name='res', param_names=None):
epochs = [x["epoch"] for x in log]
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(30, 15))
ax[0, 0].set_title('Loss')
ax[0, 0].plot(epochs,[x["train_loss"] for x in log], label='Train')
ax[0, 0].plot(epochs,[x["val_loss"] for x in log], label='Val')
ax[0, 0].legend()
ax[1, 0].set_title('Acc')
ax[1, 0].plot(epochs,[x["acc"] for x in log])
if log[0]["param"]!= None:
if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])]
#proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])]
proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])]
mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])]
ax[0, 1].set_title('Prob =f(epoch)')
ax[0, 1].stackplot(epochs, proba, labels=param_names)
#ax[0, 1].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
ax[1, 1].set_title('Prob =f(TF)')
mean = np.mean(proba, axis=1)
std = np.std(proba, axis=1)
ax[1, 1].bar(param_names, mean, yerr=std)
plt.sca(ax[1, 1]), plt.xticks(rotation=90)
ax[0, 2].set_title('Mag =f(epoch)')
ax[0, 2].stackplot(epochs, mag, labels=param_names)
ax[0, 2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
ax[1, 2].set_title('Mag =f(TF)')
mean = np.mean(mag, axis=1)
std = np.std(mag, axis=1)
ax[1, 2].bar(param_names, mean, yerr=std)
plt.sca(ax[1, 2]), plt.xticks(rotation=90)
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name, bbox_inches='tight')
plt.close()
def plot_compare(filenames, fig_name='res'):
all_data=[]
legend=""
for idx, file in enumerate(filenames):
legend+=str(idx)+'-'+file+'\n'
with open(file) as json_file:
data = json.load(json_file)
all_data.append(data)
fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
for data_idx, log in enumerate(all_data):
log=log['Log']
epochs = [x["epoch"] for x in log]
ax[0].plot(epochs,[x["train_loss"] for x in log], label=str(data_idx)+'-Train')
ax[0].plot(epochs,[x["val_loss"] for x in log], label=str(data_idx)+'-Val')
ax[1].plot(epochs,[x["acc"] for x in log], label=str(data_idx))
#ax[1].text(x=0.5,y=0,s=str(data_idx)+'-'+filenames[data_idx], transform=ax[1].transAxes)
if log[0]["param"]!= None:
if isinstance(log[0]["param"],float):
ax[2].plot(epochs,[x["param"] for x in log], label=str(data_idx)+'-Mag')
else :
for idx, _ in enumerate(log[0]["param"]):
ax[2].plot(epochs,[x["param"][idx] for x in log], label=str(data_idx)+'-P'+str(idx))
fig.suptitle(legend)
ax[0].set_title('Loss')
ax[1].set_title('Acc')
ax[2].set_title('Param')
for a in ax: a.legend()
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name, bbox_inches='tight')
plt.close()
def plot_res_compare(filenames, fig_name='res'):
all_data=[]
#legend=""
for idx, file in enumerate(filenames):
#legend+=str(idx)+'-'+file+'\n'
with open(file) as json_file:
data = json.load(json_file)
all_data.append(data)
n_tf = [len(x["Param_names"]) for x in all_data]
acc = [x["Accuracy"] for x in all_data]
time = [x["Time"][0] for x in all_data]
fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
ax[0].plot(n_tf, acc)
ax[1].plot(n_tf, time)
ax[0].set_title('Acc')
ax[1].set_title('Time')
#for a in ax: a.legend()
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name, bbox_inches='tight')
plt.close()
def plot_TF_res(log, tf_names, fig_name='res'):
mean = np.mean([x["param"] for x in log], axis=0)
std = np.std([x["param"] for x in log], axis=0)
fig, ax = plt.subplots(1, 1, figsize=(30, 8), sharey=True)
ax.bar(tf_names, mean, yerr=std)
#ax.bar(tf_names, log[-1]["param"])
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name, bbox_inches='tight')
plt.close()
def viz_sample_data(imgs, labels, fig_name='data_sample'):
sample = imgs[0:25,].permute(0, 2, 3, 1).squeeze().cpu()
plt.figure(figsize=(10,10))
for i in range(25):
plt.subplot(5,5,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(sample[i,].detach().numpy(), cmap=plt.cm.binary)
plt.xlabel(labels[i].item())
plt.savefig(fig_name)
print("Sample saved :", fig_name)
plt.close()
def model_copy(src,dst, patch_copy=True, copy_grad=True):
#model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats)
dst.load_state_dict(src.state_dict()) #Do not copy gradient !
if patch_copy:
dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ?
dst['data_aug'].load_state_dict(src['data_aug'].state_dict())
#Copie des gradients
if copy_grad:
for paramName, paramValue, in src.named_parameters():
for netCopyName, netCopyValue, in dst.named_parameters():
if paramName == netCopyName:
netCopyValue.grad = paramValue.grad
#netCopyValue=copy.deepcopy(paramValue)
try: #Data_augV4
dst['data_aug']._input_info = src['data_aug']._input_info
dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix
except:
pass
def optim_copy(dopt, opt):
#inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state...
#opt_param=higher.optim.get_trainable_opt_params(diffopt)
for group_idx, group in enumerate(opt.param_groups):
# print('gp idx',group_idx)
for p_idx, p in enumerate(group['params']):
opt.state[p]=dopt.state[group_idx][p_idx]
def print_torch_mem(add_info=''):
nb=0
max_size=0
for obj in gc.get_objects():
#print(type(obj))
try:
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
#print(i, type(obj), obj.size())
size = np.sum(obj.size())
if(size>max_size): max_size=size
nb+=1
except:
pass
print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
#print(add_info, "-Garbage size :",len(gc.garbage))
"""Simple GPU memory report."""
mega_bytes = 1024.0 * 1024.0
string = add_info + ' memory (MB)'
string += ' | allocated: {}'.format(
torch.cuda.memory_allocated() / mega_bytes)
string += ' | max allocated: {}'.format(
torch.cuda.max_memory_allocated() / mega_bytes)
string += ' | cached: {}'.format(torch.cuda.memory_cached() / mega_bytes)
string += ' | max cached: {}'.format(
torch.cuda.max_memory_cached()/ mega_bytes)
print(string)
def plot_TF_influence(log, fig_name='TF_influence', param_names=None):
proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])]
mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])]
plt.figure()
mean = np.mean(proba, axis=1)*np.mean(mag, axis=1) #Pourrait etre interessant de multiplier avant le mean
std = np.std(proba, axis=1)*np.std(mag, axis=1)
plt.bar(param_names, mean, yerr=std)
plt.xticks(rotation=90)
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name, bbox_inches='tight')
plt.close()
class loss_monitor(): #Voir https://github.com/pytorch/ignite
def __init__(self, patience, end_train=1):
self.patience = patience
self.end_train = end_train
self.counter = 0
self.best_score = None
self.reached_limit = 0
def register(self, loss):
if self.best_score is None:
self.best_score = loss
elif loss > self.best_score:
self.counter += 1
#if not self.reached_limit:
print("loss no improve counter", self.counter, self.reached_limit)
else:
self.best_score = loss
self.counter = 0
def limit_reached(self):
if self.counter >= self.patience:
self.counter = 0
self.reached_limit +=1
self.best_score = None
return self.reached_limit
def end_training(self):
if self.limit_reached() >= self.end_train:
return True
else:
return False
def reset(self):
self.__init__(self.patience, self.end_train)

View file

@ -1,102 +0,0 @@
import torch
import numpy as np
import torchvision
from PIL import Image
from torch import topk
from torch import nn
import torch.nn.functional as F
from torch import topk
import cv2
from torchvision import transforms
import os
class Lambda(nn.Module):
"Create a layer that simply calls `func` with `x`"
def __init__(self, func):
super().__init__()
self.func=func
def forward(self, x): return self.func(x)
class SaveFeatures():
activations, gradients = None, None
def __init__(self, m):
self.forward = m.register_forward_hook(self.forward_hook_fn)
self.backward = m.register_backward_hook(self.backward_hook_fn)
def forward_hook_fn(self, module, input, output):
self.activations = output.cpu().detach()
def backward_hook_fn(self, module, grad_input, grad_output):
self.gradients = grad_output[0].cpu().detach()
def remove(self):
self.forward.remove()
self.backward.remove()
def main(cam):
device = 'cuda:0'
model_name = 'resnet50'
root = '/mnt/md0/data/cifar10/tmp/cifar/train'
_root = 'cifar'
os.makedirs(os.path.join(_root + '_CAM'), exist_ok=True)
os.makedirs(os.path.join(_root + '_CAM'), exist_ok=True)
train_transform = transforms.Compose([
transforms.ToTensor(),
])
dataset = torchvision.datasets.ImageFolder(
root=root, transform=train_transform,
)
loader = torch.utils.data.DataLoader(dataset, batch_size=1)
model = torchvision.models.__dict__[model_name](pretrained=True)
flat = list(model.children())
body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(loader.dataset.classes)))
model = nn.Sequential(body, head)
model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
model = model.to(device)
model.eval()
activated_features = SaveFeatures(model[0])
for i, (img, target ) in enumerate(loader):
img = img.to(device)
pred = model(img)
import ipdb; ipdb.set_trace()
# get the gradient of the output with respect to the parameters of the model
pred[:, target.item()].backward()
# import ipdb; ipdb.set_trace()
# pull the gradients out of the model
gradients = activated_features.gradients[0]
pooled_gradients = gradients.mean(1).mean(1)
# get the activations of the last convolutional layer
activations = activated_features.activations[0]
heatmap = F.relu(((activations*pooled_gradients[...,None,None])).sum(0))
heatmap /= torch.max(heatmap)
heatmap = heatmap.numpy()
image = cv2.imread(dataset.imgs[i][0])
heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
# superimposed_img = heatmap * 0.3 + image * 0.5
superimposed_img = heatmap
clss = dataset.imgs[i][0].split(os.sep)[1]
name = dataset.imgs[i][0].split(os.sep)[2].split('.')[0]
cv2.imwrite(os.path.join(_root+"_CAM", name + '.jpg'), superimposed_img)
print(f'{os.path.join(_root+"_CAM", name + ".jpg")} saved')
activated_features.remove()
if __name__ == "__main__":
main(cam=True)

View file

@ -1,382 +0,0 @@
import datetime
import os
import time
import sys
import torch
import torch.utils.data
from torch import nn
import torchvision
from torchvision import transforms
from PIL import ImageEnhance
import random
import utils
from fastprogress import master_bar, progress_bar
import numpy as np
## DATA AUG ##
import higher
from dataug import *
from dataug_utils import *
tf_names = [
## Geometric TF ##
'Identity',
'FlipUD',
'FlipLR',
'Rotate',
'TranslateX',
'TranslateY',
'ShearX',
'ShearY',
## Color TF (Expect image in the range of [0, 1]) ##
#'Contrast',
#'Color',
#'Brightness',
#'Sharpness',
#'Posterize',
#'Solarize', #=>Image entre [0,1] #Pas opti pour des batch
]
class Lambda(nn.Module):
"Create a layer that simply calls `func` with `x`"
def __init__(self, func):
super().__init__()
self.func=func
def forward(self, x): return self.func(x)
class SubsetSampler(torch.utils.data.SubsetRandomSampler):
def __init__(self, indices):
super().__init__(indices)
def __iter__(self):
return (self.indices[i] for i in range(len(self.indices)))
def __len__(self):
return len(self.indices)
def sharpness(img, factor):
sharpness_factor = random.uniform(1, factor)
sharp = ImageEnhance.Sharpness(img)
sharped = sharp.enhance(sharpness_factor)
return sharped
def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, master_bar, Kldiv=False):
model.train()
metric_logger = utils.MetricLogger(delimiter=" ")
confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
header = 'Epoch: {}'.format(epoch)
for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=master_bar):
image, target = image.to(device), target.to(device)
if not Kldiv :
output = model(image)
#output = F.log_softmax(output, dim=1)
loss = criterion(output, target) #Pas de softmax ?
else : #Consume x2 memory
model.augment(mode=False)
output = model(image)
model.augment(mode=True)
log_sup=F.log_softmax(output, dim=1)
sup_loss = F.cross_entropy(log_sup, target)
aug_output = model(image)
log_aug=F.log_softmax(aug_output, dim=1)
aug_loss=F.cross_entropy(log_aug, target)
#KL div w/ logits - Similarite predictions (distributions)
KL_loss = F.softmax(output, dim=1)*(log_sup-log_aug)
KL_loss = KL_loss.sum(dim=-1)
#KL_loss = F.kl_div(aug_logits, sup_logits, reduction='none')
KL_loss = KL_loss.mean()
unsupp_coeff = 1
loss = sup_loss + (aug_loss + KL_loss) * unsupp_coeff
#print(sup_loss.item(), (aug_loss + KL_loss).item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc1 = utils.accuracy(output, target)[0]
batch_size = image.shape[0]
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
metric_logger.update(loss=loss.item())
confmat.update(target.flatten(), output.argmax(1).flatten())
return metric_logger.loss.global_avg, confmat
def evaluate(model, criterion, data_loader, device):
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
header = 'Test:'
missed = []
with torch.no_grad():
for i, (image, target) in metric_logger.log_every(data_loader, leave=False, header=header, parent=None):
image, target = image.to(device), target.to(device)
output = model(image)
loss = criterion(output, target)
if target.item() != output.topk(1)[1].item():
missed.append(data_loader.dataset.imgs[data_loader.sampler.indices[i]])
confmat.update(target.flatten(), output.argmax(1).flatten())
acc1 = utils.accuracy(output, target)[0]
batch_size = image.shape[0]
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
metric_logger.update(loss=loss.item())
return metric_logger.loss.global_avg, missed, confmat
def get_train_valid_loader(args, augment, random_seed, valid_size=0.1, shuffle=True, num_workers=4, pin_memory=True):
"""
Utility function for loading and returning train and valid
multi-process iterators over the CIFAR-10 dataset. A sample
9x9 grid of the images can be optionally displayed.
If using CUDA, num_workers should be set to 1 and pin_memory to True.
Params
------
- data_dir: path directory to the dataset.
- batch_size: how many samples per batch to load.
- augment: whether to apply the data augmentation scheme
mentioned in the paper. Only applied on the train split.
- random_seed: fix seed for reproducibility.
- valid_size: percentage split of the training set used for
the validation set. Should be a float in the range [0, 1].
- shuffle: whether to shuffle the train/validation indices.
- show_sample: plot 9x9 sample grid of the dataset.
- num_workers: number of subprocesses to use when loading the dataset.
- pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
True if using GPU.
Returns
-------
- train_loader: training set iterator.
- valid_loader: validation set iterator.
"""
error_msg = "[!] valid_size should be in the range [0, 1]."
assert ((valid_size >= 0) and (valid_size <= 1)), error_msg
# normalize = transforms.Normalize(
# mean=[0.4914, 0.4822, 0.4465],
# std=[0.2023, 0.1994, 0.2010],
# )
# define transforms
if augment:
train_transform = transforms.Compose([
# transforms.ColorJitter(brightness=0.3),
# transforms.Lambda(lambda img: sharpness(img, 5)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
# normalize,
])
valid_transform = transforms.Compose([
# transforms.ColorJitter(brightness=0.3),
# transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
# normalize,
])
else:
train_transform = transforms.Compose([
transforms.ToTensor(),
# normalize,
])
valid_transform = transforms.Compose([
transforms.ToTensor(),
# normalize,
])
# load the dataset
train_dataset = torchvision.datasets.ImageFolder(
root=args.data_path, transform=train_transform
)
valid_dataset = torchvision.datasets.ImageFolder(
root=args.data_path, transform=valid_transform
)
num_train = len(train_dataset)
indices = list(range(num_train))
split = int(np.floor(valid_size * num_train))
if shuffle:
np.random.seed(random_seed)
np.random.shuffle(indices)
train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx) if not args.test_only else SubsetSampler(train_idx)
valid_sampler = SubsetSampler(valid_idx)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=train_sampler,
num_workers=num_workers, pin_memory=pin_memory,
)
valid_loader = torch.utils.data.DataLoader(
valid_dataset, batch_size=1, sampler=valid_sampler,
num_workers=num_workers, pin_memory=pin_memory,
)
imgs = np.asarray(train_dataset.imgs)
# print('Train')
# print(imgs[train_idx])
#print('Valid')
#print(imgs[valid_idx])
tgt = [0,0]
for _, targets in train_loader:
for target in targets:
tgt[target]+=1
print("Train targets :", tgt)
tgt = [0,0]
for _, targets in valid_loader:
for target in targets:
tgt[target]+=1
print("Valid targets :", tgt)
return (train_loader, valid_loader)
def main(args):
print(args)
device = torch.device(args.device)
torch.backends.cudnn.benchmark = True
#augment = True if not args.test_only else False
if not args.test_only and args.augment=='flip' : augment = True
else : augment = False
print("Augment", augment)
data_loader, data_loader_test = get_train_valid_loader(args=args, pin_memory=True, augment=augment,
num_workers=args.workers, valid_size=0.3, random_seed=999)
print("Creating model")
model = torchvision.models.__dict__[args.model](pretrained=True)
flat = list(model.children())
body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(data_loader.dataset.classes)))
model = nn.Sequential(body, head)
Kldiv=False
if not args.test_only and (args.augment=='Rand' or args.augment=='RandKL'):
tf_dict = {k: TF.TF_dict[k] for k in tf_names}
model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
if args.augment=='RandKL': Kldiv=True
model['data_aug']['mag'].data = model['data_aug']['mag'].data * args.magnitude
print("Augmodel")
# model.fc = nn.Linear(model.fc.in_features, 2)
# import ipdb; ipdb.set_trace()
criterion = nn.CrossEntropyLoss().to(device)
# optimizer = torch.optim.SGD(
# model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
optimizer = torch.optim.Adam(
model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
optimizer,
lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9)
es = utils.EarlyStopping() if not (args.augment=='Rand' or args.augment=='RandKL') else utils.EarlyStopping(augmented_model=True)
if args.test_only:
model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
model = model.to(device)
print('TEST')
_, missed, _ = evaluate(model, criterion, data_loader_test, device=device)
print(missed)
print('TRAIN')
_, missed, _ = evaluate(model, criterion, data_loader, device=device)
print(missed)
return
model = model.to(device)
print("Start training")
start_time = time.time()
mb = master_bar(range(args.epochs))
for epoch in mb:
_, train_confmat = train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, mb, Kldiv)
lr_scheduler.step( (epoch+1)*len(data_loader) )
val_loss, _, valid_confmat = evaluate(model, criterion, data_loader_test, device=device)
es(val_loss, model)
# print('Valid Missed')
# print(valid_missed)
# print('Train')
# print(train_confmat)
#print('Valid')
#print(valid_confmat)
# if es.early_stop:
# break
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str))
def parse_args():
import argparse
parser = argparse.ArgumentParser(description='PyTorch Classification Training')
parser.add_argument('--data-path', default='/github/smart_augmentation/salvador/data', help='dataset')
parser.add_argument('--model', default='resnet18', help='model') #'resnet18'
parser.add_argument('--device', default='cuda:0', help='device')
parser.add_argument('-b', '--batch-size', default=8, type=int)
parser.add_argument('--epochs', default=3, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('-j', '--workers', default=0, type=int, metavar='N',
help='number of data loading workers (default: 16)')
parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--wd', '--weight-decay', default=4e-5, type=float,
metavar='W', help='weight decay (default: 1e-4)',
dest='weight_decay')
parser.add_argument(
"--test-only",
dest="test_only",
help="Only test the model",
action="store_true",
)
parser.add_argument('-a', '--augment', default='None', type=str,
metavar='N', help='Data augment',
dest='augment')
parser.add_argument('-m', '--magnitude', default=1.0, type=float,
metavar='N', help='Augmentation magnitude',
dest='magnitude')
args = parser.parse_args()
return args
if __name__ == "__main__":
args = parse_args()
main(args)

View file

@ -1,585 +0,0 @@
import datetime
import os
import time
import sys
import torch
import torch.utils.data
from torch import nn
import torchvision
from torchvision import transforms
from PIL import ImageEnhance
import random
import utils
from fastprogress import master_bar, progress_bar
import numpy as np
## DATA AUG ##
import higher
from dataug import *
from dataug_utils import *
tf_names = [
## Geometric TF ##
'Identity',
'FlipUD',
'FlipLR',
'Rotate',
'TranslateX',
'TranslateY',
'ShearX',
'ShearY',
## Color TF (Expect image in the range of [0, 1]) ##
'Contrast',
'Color',
'Brightness',
'Sharpness',
'Posterize',
'Solarize', #=>Image entre [0,1] #Pas opti pour des batch
]
def compute_vaLoss(model, dl_it, dl):
device = next(model.parameters()).device
try:
xs, ys = next(dl_it)
except StopIteration: #Fin epoch val
dl_it = iter(dl)
xs, ys = next(dl_it)
xs, ys = xs.to(device), ys.to(device)
model.eval() #Validation sans transfornations !
return F.cross_entropy(model(xs), ys)
def model_copy(src,dst, patch_copy=True, copy_grad=True):
#model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats)
dst.load_state_dict(src.state_dict()) #Do not copy gradient !
if patch_copy:
dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ?
dst['data_aug'].load_state_dict(src['data_aug'].state_dict())
#Copie des gradients
if copy_grad:
for paramName, paramValue, in src.named_parameters():
for netCopyName, netCopyValue, in dst.named_parameters():
if paramName == netCopyName:
netCopyValue.grad = paramValue.grad
#netCopyValue=copy.deepcopy(paramValue)
try: #Data_augV4
dst['data_aug']._input_info = src['data_aug']._input_info
dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix
except:
pass
def optim_copy(dopt, opt):
#inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state...
#opt_param=higher.optim.get_trainable_opt_params(diffopt)
for group_idx, group in enumerate(opt.param_groups):
# print('gp idx',group_idx)
for p_idx, p in enumerate(group['params']):
opt.state[p]=dopt.state[group_idx][p_idx]
#############
class Lambda(nn.Module):
"Create a layer that simply calls `func` with `x`"
def __init__(self, func):
super().__init__()
self.func=func
def forward(self, x): return self.func(x)
class SubsetSampler(torch.utils.data.SubsetRandomSampler):
def __init__(self, indices):
super().__init__(indices)
def __iter__(self):
return (self.indices[i] for i in range(len(self.indices)))
def __len__(self):
return len(self.indices)
def sharpness(img, factor):
sharpness_factor = random.uniform(1, factor)
sharp = ImageEnhance.Sharpness(img)
sharped = sharp.enhance(sharpness_factor)
return sharped
def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, master_bar):
model.train()
metric_logger = utils.MetricLogger(delimiter=" ")
confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
header = 'Epoch: {}'.format(epoch)
for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=master_bar):
image, target = image.to(device), target.to(device)
output = model(image)
loss = criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc1 = utils.accuracy(output, target)[0]
batch_size = image.shape[0]
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
metric_logger.update(loss=loss.item())
confmat.update(target.flatten(), output.argmax(1).flatten())
return metric_logger.loss.global_avg, confmat
def evaluate(model, criterion, data_loader, device):
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
header = 'Test:'
missed = []
with torch.no_grad():
for i, (image, target) in metric_logger.log_every(data_loader, leave=False, header=header, parent=None):
image, target = image.to(device), target.to(device)
output = model(image)
loss = criterion(output, target)
if target.item() != output.topk(1)[1].item():
missed.append(data_loader.dataset.imgs[data_loader.sampler.indices[i]])
confmat.update(target.flatten(), output.argmax(1).flatten())
acc1 = utils.accuracy(output, target)[0]
batch_size = image.shape[0]
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
metric_logger.update(loss=loss.item())
return metric_logger.loss.global_avg, missed, confmat
def get_train_valid_loader(args, augment, random_seed, train_size=0.5, test_size=0.1, shuffle=True, num_workers=4, pin_memory=True):
"""
Utility function for loading and returning train and valid
multi-process iterators over the CIFAR-10 dataset. A sample
9x9 grid of the images can be optionally displayed.
If using CUDA, num_workers should be set to 1 and pin_memory to True.
Params
------
- data_dir: path directory to the dataset.
- batch_size: how many samples per batch to load.
- augment: whether to apply the data augmentation scheme
mentioned in the paper. Only applied on the train split.
- random_seed: fix seed for reproducibility.
- valid_size: percentage split of the training set used for
the validation set. Should be a float in the range [0, 1].
- shuffle: whether to shuffle the train/validation indices.
- show_sample: plot 9x9 sample grid of the dataset.
- num_workers: number of subprocesses to use when loading the dataset.
- pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
True if using GPU.
Returns
-------
- train_loader: training set iterator.
- valid_loader: validation set iterator.
"""
error_msg = "[!] test_size should be in the range [0, 1]."
assert ((test_size >= 0) and (test_size <= 1)), error_msg
# normalize = transforms.Normalize(
# mean=[0.4914, 0.4822, 0.4465],
# std=[0.2023, 0.1994, 0.2010],
# )
# define transforms
if augment:
train_transform = transforms.Compose([
# transforms.ColorJitter(brightness=0.3),
# transforms.Lambda(lambda img: sharpness(img, 5)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
# normalize,
])
valid_transform = transforms.Compose([
# transforms.ColorJitter(brightness=0.3),
# transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
# normalize,
])
else:
train_transform = transforms.Compose([
transforms.ToTensor(),
# normalize,
])
valid_transform = transforms.Compose([
transforms.ToTensor(),
# normalize,
])
# load the dataset
train_dataset = torchvision.datasets.ImageFolder(
root=args.data_path, transform=train_transform
)
test_dataset = torchvision.datasets.ImageFolder(
root=args.data_path, transform=valid_transform
)
num_train = len(train_dataset)
indices = list(range(num_train))
split = int(np.floor(test_size * num_train))
if shuffle:
np.random.seed(random_seed)
np.random.shuffle(indices)
train_idx, test_idx = indices[split:], indices[:split]
train_idx, valid_idx = train_idx[:int(len(train_idx)*train_size)], train_idx[int(len(train_idx)*train_size):]
print("\nTrain", len(train_idx), "\nValid", len(valid_idx), "\nTest", len(test_idx))
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx) if not args.test_only else SubsetSampler(train_idx)
valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx) if not args.test_only else SubsetSampler(valid_idx)
test_sampler = SubsetSampler(test_idx)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=train_sampler,
num_workers=num_workers, pin_memory=pin_memory,
)
valid_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=valid_sampler,
num_workers=num_workers, pin_memory=pin_memory,
)
test_loader = torch.utils.data.DataLoader(
test_dataset, batch_size=1, sampler=test_sampler,
num_workers=num_workers, pin_memory=pin_memory,
)
imgs = np.asarray(train_dataset.imgs)
# print('Train')
# print(imgs[train_idx])
#print('Valid')
#print(imgs[valid_idx])
return (train_loader, valid_loader, test_loader)
def main(args):
print(args)
device = torch.device(args.device)
torch.backends.cudnn.benchmark = True
#augment = True if not args.test_only else False
augment = False
data_loader, dl_val, data_loader_test = get_train_valid_loader(args=args, pin_memory=True, augment=augment,
num_workers=args.workers, train_size=0.99, test_size=0.2, random_seed=999)
print("Creating model")
model = torchvision.models.__dict__[args.model](pretrained=True)
flat = list(model.children())
body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(data_loader.dataset.classes)))
model = nn.Sequential(body, head)
# model.fc = nn.Linear(model.fc.in_features, 2)
# import ipdb; ipdb.set_trace()
criterion = nn.CrossEntropyLoss().to(device)
# optimizer = torch.optim.SGD(
# model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
'''
optimizer = torch.optim.Adam(
model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
optimizer,
lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9)
'''
es = utils.EarlyStopping()
if args.test_only:
model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
model = model.to(device)
print('TEST')
_, missed, _ = evaluate(model, criterion, data_loader_test, device=device)
print(missed)
print('TRAIN')
_, missed, _ = evaluate(model, criterion, data_loader, device=device)
print(missed)
return
model = model.to(device)
print("Start training")
start_time = time.time()
mb = master_bar(range(args.epochs))
"""
for epoch in mb:
_, train_confmat = train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, mb)
lr_scheduler.step( (epoch+1)*len(data_loader) )
val_loss, _, valid_confmat = evaluate(model, criterion, data_loader_test, device=device)
es(val_loss, model)
# print('Valid Missed')
# print(valid_missed)
# print('Train')
# print(train_confmat)
print('Valid')
print(valid_confmat)
# if es.early_stop:
# break
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str))
"""
#######
inner_it = args.inner_it
dataug_epoch_start=0
print_freq=1
KLdiv=False
tf_dict = {k: TF.TF_dict[k] for k in tf_names}
model = Augmented_model(Data_augV5(TF_dict=tf_dict, N_TF=3, mix_dist=0.0, fixed_prob=False, fixed_mag=False, shared_mag=False), model).to(device)
#model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
val_loss=torch.tensor(0) #Necessaire si pas de metastep sur une epoch
dl_val_it = iter(dl_val)
countcopy=0
#if inner_it!=0:
meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=args.lr) #lr=1e-2
#inner_opt = torch.optim.SGD(model['model'].parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #lr=1e-2 / momentum=0.9
inner_opt = torch.optim.Adam(model['model'].parameters(), lr=args.lr, weight_decay=args.weight_decay)
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
inner_opt,
lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9)
high_grad_track = True
if inner_it == 0:
high_grad_track=False
model.train()
model.augment(mode=False)
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel,track_higher_grads=high_grad_track)
i=0
for epoch in mb:
metric_logger = utils.MetricLogger(delimiter=" ")
confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
header = 'Epoch: {}'.format(epoch)
t0 = time.process_time()
for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=mb):
#for i, (xs, ys) in enumerate(dl_train):
#print_torch_mem("it"+str(i))
i+=1
image, target = image.to(device), target.to(device)
if(not KLdiv):
#Methode uniforme
logits = fmodel(image) # modified `params` can also be passed as a kwarg
output = F.log_softmax(logits, dim=1)
loss = F.cross_entropy(output, target, reduction='none') # no need to call loss.backwards()
if fmodel._data_augmentation: #Weight loss
w_loss = fmodel['data_aug'].loss_weight()#.to(device)
loss = loss * w_loss
loss = loss.mean()
else:
#Methode KL div
fmodel.augment(mode=False)
sup_logits = fmodel(xs)
log_sup=F.log_softmax(sup_logits, dim=1)
fmodel.augment(mode=True)
loss = F.cross_entropy(log_sup, ys)
if fmodel._data_augmentation:
aug_logits = fmodel(xs)
log_aug=F.log_softmax(aug_logits, dim=1)
aug_loss=0
if epoch>50: #debut differe ?
#KL div w/ logits - Similarite predictions (distributions)
aug_loss = F.softmax(sup_logits, dim=1)*(log_sup-log_aug)
aug_loss=aug_loss.sum(dim=-1)
#aug_loss = F.kl_div(aug_logits, sup_logits, reduction='none')
w_loss = fmodel['data_aug'].loss_weight() #Weight loss
aug_loss = (w_loss * aug_loss).mean()
aug_loss += (F.cross_entropy(log_aug, ys , reduction='none') * w_loss).mean()
#print(aug_loss)
unsupp_coeff = 1
loss += aug_loss * unsupp_coeff
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
if(high_grad_track and i%inner_it==0): #Perform Meta step
#print("meta")
#Peu utile si high_grad_track = False
val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val) + fmodel['data_aug'].reg_loss()
#print_graph(val_loss)
val_loss.backward()
countcopy+=1
model_copy(src=fmodel, dst=model)
optim_copy(dopt=diffopt, opt=inner_opt)
#if epoch>50:
meta_opt.step()
model['data_aug'].adjust_param(soft=False) #Contrainte sum(proba)=1
#model['data_aug'].next_TF_set()
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
acc1 = utils.accuracy(output, target)[0]
batch_size = image.shape[0]
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
metric_logger.update(loss=loss.item())
confmat.update(target.flatten(), output.argmax(1).flatten())
if(not high_grad_track and (torch.cuda.memory_cached()/1024.0**2)>20000):
countcopy+=1
print_torch_mem("copy")
model_copy(src=fmodel, dst=model)
optim_copy(dopt=diffopt, opt=inner_opt)
val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val)
#Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False)
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
print_torch_mem("copy")
if(not high_grad_track):
countcopy+=1
print_torch_mem("end copy")
model_copy(src=fmodel, dst=model)
optim_copy(dopt=diffopt, opt=inner_opt)
val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val)
#Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False)
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
print_torch_mem("end copy")
tf = time.process_time()
#### Print ####
if(print_freq and epoch%print_freq==0):
print('-'*9)
print('Epoch : %d'%(epoch))
print('Time : %.00f'%(tf - t0))
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
print('TF Proba :', model['data_aug']['prob'].data)
#print('proba grad',model['data_aug']['prob'].grad)
print('TF Mag :', model['data_aug']['mag'].data)
#print('Mag grad',model['data_aug']['mag'].grad)
#print('Reg loss:', model['data_aug'].reg_loss().item())
#print('Aug loss', aug_loss.item())
#############
#### Log ####
#print(type(model['data_aug']) is dataug.Data_augV5)
'''
param = [{'p': p.item(), 'm':model['data_aug']['mag'].item()} for p in model['data_aug']['prob']] if model['data_aug']._shared_mag else [{'p': p.item(), 'm': m.item()} for p, m in zip(model['data_aug']['prob'], model['data_aug']['mag'])]
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": param #if isinstance(model['data_aug'], Data_augV5)
#else [p.item() for p in model['data_aug']['prob']],
}
log.append(data)
'''
#############
train_confmat=confmat
lr_scheduler.step( (epoch+1)*len(data_loader) )
test_loss, _, test_confmat = evaluate(model, criterion, data_loader_test, device=device)
es(test_loss, model)
# print('Valid Missed')
# print(valid_missed)
# print('Train')
# print(train_confmat)
print('Test')
print(test_confmat)
# if es.early_stop:
# break
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str))
def parse_args():
import argparse
parser = argparse.ArgumentParser(description='PyTorch Classification Training')
parser.add_argument('--data-path', default='/github/smart_augmentation/salvador/data', help='dataset')
parser.add_argument('--model', default='resnet18', help='model') #'resnet18'
parser.add_argument('--device', default='cuda:0', help='device')
parser.add_argument('-b', '--batch-size', default=8, type=int)
parser.add_argument('--epochs', default=3, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('-j', '--workers', default=0, type=int, metavar='N',
help='number of data loading workers (default: 16)')
parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--wd', '--weight-decay', default=4e-5, type=float,
metavar='W', help='weight decay (default: 1e-4)',
dest='weight_decay')
parser.add_argument(
"--test-only",
dest="test_only",
help="Only test the model",
action="store_true",
)
parser.add_argument('--in_it', '--inner_it', default=0, type=int,
metavar='N', help='higher inner_it',
dest='inner_it')
args = parser.parse_args()
return args
if __name__ == "__main__":
args = parse_args()
main(args)

View file

@ -1,346 +0,0 @@
import torch
import kornia
import random
### Available TF for Dataug ###
'''
TF_dict={ #Dataugv4
## Geometric TF ##
'Identity' : (lambda x, mag: x),
'FlipUD' : (lambda x, mag: flipUD(x)),
'FlipLR' : (lambda x, mag: flipLR(x)),
'Rotate': (lambda x, mag: rotate(x, angle=torch.tensor([rand_int(mag, maxval=30)for _ in x], device=x.device))),
'TranslateX': (lambda x, mag: translate(x, translation=torch.tensor([[rand_int(mag, maxval=20), 0] for _ in x], device=x.device))),
'TranslateY': (lambda x, mag: translate(x, translation=torch.tensor([[0, rand_int(mag, maxval=20)] for _ in x], device=x.device))),
'ShearX': (lambda x, mag: shear(x, shear=torch.tensor([[rand_float(mag, maxval=0.3), 0] for _ in x], device=x.device))),
'ShearY': (lambda x, mag: shear(x, shear=torch.tensor([[0, rand_float(mag, maxval=0.3)] for _ in x], device=x.device))),
## Color TF (Expect image in the range of [0, 1]) ##
'Contrast': (lambda x, mag: contrast(x, contrast_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
'Color':(lambda x, mag: color(x, color_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
'Brightness':(lambda x, mag: brightness(x, brightness_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
'Posterize': (lambda x, mag: posterize(x, bits=torch.tensor([rand_int(mag, minval=4, maxval=8) for _ in x], device=x.device))),
'Solarize': (lambda x, mag: solarize(x, thresholds=torch.tensor([rand_int(mag,minval=1, maxval=256)/256. for _ in x], device=x.device))) , #=>Image entre [0,1] #Pas opti pour des batch
#Non fonctionnel
#'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
#'Equalize': (lambda mag: None),
}
'''
'''
TF_dict={ #Dataugv5 #AutoAugment
## Geometric TF ##
'Identity' : (lambda x, mag: x),
'FlipUD' : (lambda x, mag: flipUD(x)),
'FlipLR' : (lambda x, mag: flipLR(x)),
'Rotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30))),
'TranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=0))),
'TranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=1))),
'ShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=0))),
'ShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=1))),
## Color TF (Expect image in the range of [0, 1]) ##
'Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
'Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
'Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
'Posterize': (lambda x, mag: posterize(x, bits=rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
'Solarize': (lambda x, mag: solarize(x, thresholds=rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
#Non fonctionnel
#'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
#'Equalize': (lambda mag: None),
}
'''
TF_dict={ #Dataugv5
## Geometric TF ##
'Identity' : (lambda x, mag: x),
'FlipUD' : (lambda x, mag: flipUD(x)),
'FlipLR' : (lambda x, mag: flipLR(x)),
'Rotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30))),
'TranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=0))),
'TranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=1))),
'ShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=0))),
'ShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=1))),
## Color TF (Expect image in the range of [0, 1]) ##
'Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
'Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
'Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
'Posterize': (lambda x, mag: posterize(x, bits=rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
'Solarize': (lambda x, mag: solarize(x, thresholds=rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
#Color TF (Common mag scale)
'+Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
'+Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
'+Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
'+Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
'-Contrast': (lambda x, mag: contrast(x, contrast_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
'-Color':(lambda x, mag: color(x, color_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
'-Brightness':(lambda x, mag: brightness(x, brightness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
'-Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
'=Posterize': (lambda x, mag: posterize(x, bits=invScale_rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
'=Solarize': (lambda x, mag: solarize(x, thresholds=invScale_rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
'BRotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30*3))),
'BTranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20*3), zero_pos=0))),
'BTranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20*3), zero_pos=1))),
'BShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3*3), zero_pos=0))),
'BShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3*3), zero_pos=1))),
'BadTranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=20*2, maxval=20*3), zero_pos=0))),
'BadTranslateX_neg': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-20*3, maxval=-20*2), zero_pos=0))),
'BadTranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=20*2, maxval=20*3), zero_pos=1))),
'BadTranslateY_neg': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-20*3, maxval=-20*2), zero_pos=1))),
'BadColor':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
'BadSharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
'BadContrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
'BadBrightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
#Non fonctionnel
#'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
#'Equalize': (lambda mag: None),
}
TF_no_mag={'Identity', 'FlipUD', 'FlipLR'}
TF_ignore_mag= TF_no_mag | {'Solarize', 'Posterize'}
def int_image(float_image): #ATTENTION : legere perte d'info (granularite : 1/256 = 0.0039)
return (float_image*255.).type(torch.uint8)
def float_image(int_image):
return int_image.type(torch.float)/255.
#def rand_inverse(value):
# return value if random.random() < 0.5 else -value
#def rand_int(mag, maxval, minval=None): #[(-maxval,minval), maxval]
# real_max = int_parameter(mag, maxval=maxval)
# if not minval : minval = -real_max
# return random.randint(minval, real_max)
#def rand_float(mag, maxval, minval=None): #[(-maxval,minval), maxval]
# real_max = float_parameter(mag, maxval=maxval)
# if not minval : minval = -real_max
# return random.uniform(minval, real_max)
def rand_floats(size, mag, maxval, minval=None): #[(-maxval,minval), maxval]
real_mag = float_parameter(mag, maxval=maxval)
if not minval : minval = -real_mag
#return random.uniform(minval, real_max)
return minval + (real_mag-minval) * torch.rand(size, device=mag.device) #[min_val, real_mag]
def invScale_rand_floats(size, mag, maxval, minval):
#Mag=[0,PARAMETER_MAX] => [PARAMETER_MAX, 0] = [maxval, minval]
real_mag = float_parameter(float(PARAMETER_MAX) - mag, maxval=maxval-minval)+minval
return real_mag + (maxval-real_mag) * torch.rand(size, device=mag.device) #[real_mag, max_val]
def zero_stack(tensor, zero_pos):
if zero_pos==0:
return torch.stack((tensor, torch.zeros((tensor.shape[0],), device=tensor.device)), dim=1)
if zero_pos==1:
return torch.stack((torch.zeros((tensor.shape[0],), device=tensor.device), tensor), dim=1)
else:
raise Exception("Invalid zero_pos : ", zero_pos)
#https://github.com/tensorflow/models/blob/fc2056bce6ab17eabdc139061fef8f4f2ee763ec/research/autoaugment/augmentation_transforms.py#L137
PARAMETER_MAX = 1 # What is the max 'level' a transform could be predicted
def float_parameter(level, maxval):
"""Helper function to scale `val` between 0 and maxval .
Args:
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
maxval: Maximum value that the operation can have. This will be scaled
to level/PARAMETER_MAX.
Returns:
A float that results from scaling `maxval` according to `level`.
"""
#return float(level) * maxval / PARAMETER_MAX
return (level * maxval / PARAMETER_MAX)#.to(torch.float)
#def int_parameter(level, maxval): #Perte de gradient
"""Helper function to scale `val` between 0 and maxval .
Args:
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
maxval: Maximum value that the operation can have. This will be scaled
to level/PARAMETER_MAX.
Returns:
An int that results from scaling `maxval` according to `level`.
"""
#return int(level * maxval / PARAMETER_MAX)
# return (level * maxval / PARAMETER_MAX)
def flipLR(x):
device = x.device
(batch_size, channels, h, w) = x.shape
M =torch.tensor( [[[-1., 0., w-1],
[ 0., 1., 0.],
[ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
# warp the original image by the found transform
return kornia.warp_perspective(x, M, dsize=(h, w))
def flipUD(x):
device = x.device
(batch_size, channels, h, w) = x.shape
M =torch.tensor( [[[ 1., 0., 0.],
[ 0., -1., h-1],
[ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
# warp the original image by the found transform
return kornia.warp_perspective(x, M, dsize=(h, w))
def rotate(x, angle):
return kornia.rotate(x, angle=angle.type(torch.float)) #Kornia ne supporte pas les int
def translate(x, translation):
#print(translation)
return kornia.translate(x, translation=translation.type(torch.float)) #Kornia ne supporte pas les int
def shear(x, shear):
return kornia.shear(x, shear=shear)
def contrast(x, contrast_factor):
return kornia.adjust_contrast(x, contrast_factor=contrast_factor) #Expect image in the range of [0, 1]
#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageEnhance.py
def color(x, color_factor):
(batch_size, channels, h, w) = x.shape
gray_x = kornia.rgb_to_grayscale(x)
gray_x = gray_x.repeat_interleave(channels, dim=1)
return blend(gray_x, x, color_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
def brightness(x, brightness_factor):
device = x.device
return blend(torch.zeros(x.size(), device=device), x, brightness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
def sharpeness(x, sharpness_factor):
device = x.device
(batch_size, channels, h, w) = x.shape
k = torch.tensor([[[ 1., 1., 1.],
[ 1., 5., 1.],
[ 1., 1., 1.]]], device=device) #Smooth Filter : https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageFilter.py
smooth_x = kornia.filter2D(x, kernel=k, border_type='reflect', normalized=True) #Peut etre necessaire de s'occuper du channel Alhpa differement
return blend(smooth_x, x, sharpness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py
def posterize(x, bits):
bits = bits.type(torch.uint8) #Perte du gradient
x = int_image(x) #Expect image in the range of [0, 1]
mask = ~(2 ** (8 - bits) - 1).type(torch.uint8)
(batch_size, channels, h, w) = x.shape
mask = mask.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
return float_image(x & mask)
def auto_contrast(x): #PAS OPTIMISE POUR DES BATCH #EXTRA LENT
# Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
print("Warning : Pas encore check !")
(batch_size, channels, h, w) = x.shape
x = int_image(x) #Expect image in the range of [0, 1]
#print('Start',x[0])
for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
#print(img.shape)
for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
#print(chan.shape)
hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
# find lowest/highest samples after preprocessing
for lo in range(256):
if hist[lo]:
break
for hi in range(255, -1, -1):
if hist[hi]:
break
if hi <= lo:
# don't bother
pass
else:
scale = 255.0 / (hi - lo)
offset = -lo * scale
for ix in range(256):
n_ix = int(ix * scale + offset)
if n_ix < 0: n_ix = 0
elif n_ix > 255: n_ix = 255
chan[chan==ix]=n_ix
x[im_idx, chan_idx]=chan
#print('End',x[0])
return float_image(x)
def equalize(x): #PAS OPTIMISE POUR DES BATCH
raise Exception(self, "not implemented")
# Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
(batch_size, channels, h, w) = x.shape
x = int_image(x) #Expect image in the range of [0, 1]
#print('Start',x[0])
for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
#print(img.shape)
for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
#print(chan.shape)
hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
return float_image(x)
def solarize(x, thresholds):
batch_size, channels, h, w = x.shape
#imgs=[]
#for idx, t in enumerate(thresholds): #Operation par image
# mask = x[idx] > t #Perte du gradient
#In place
# inv_x = 1-x[idx][mask]
# x[idx][mask]=inv_x
#
#Out of place
# im = x[idx]
# inv_x = 1-im[mask]
# imgs.append(im.masked_scatter(mask,inv_x))
#idxs=torch.tensor(range(x.shape[0]), device=x.device)
#idxs=idxs.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
#x=x.scatter(dim=0, index=idxs, src=torch.stack(imgs))
#
thresholds = thresholds.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
#print(thresholds.grad_fn)
x=torch.where(x>thresholds,1-x, x)
#print(mask.grad_fn)
#x=x.min(thresholds)
#inv_x = 1-x[mask]
#x=x.where(x<thresholds,1-x)
#x[mask]=inv_x
#x=x.masked_scatter(mask, inv_x)
return x
#https://github.com/python-pillow/Pillow/blob/9c78c3f97291bd681bc8637922d6a2fa9415916c/src/PIL/Image.py#L2818
def blend(x,y,alpha): #out = image1 * (1.0 - alpha) + image2 * alpha
#return kornia.add_weighted(src1=x, alpha=(1-alpha), src2=y, beta=alpha, gamma=0) #out=src1alpha+src2beta+gamma #Ne fonctionne pas pour des batch de alpha
if not isinstance(x, torch.Tensor):
raise TypeError("x should be a tensor. Got {}".format(type(x)))
if not isinstance(y, torch.Tensor):
raise TypeError("y should be a tensor. Got {}".format(type(y)))
(batch_size, channels, h, w) = x.shape
alpha = alpha.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
res = x*(1-alpha) + y*alpha
return res

View file

@ -1,202 +0,0 @@
from __future__ import print_function
from collections import defaultdict, deque
import datetime
import math
import time
import torch
import numpy as np
import os
from fastprogress import progress_bar
class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a
window or the global series average.
"""
def __init__(self, window_size=20, fmt=None):
if fmt is None:
fmt = "{global_avg:.4f}"
self.deque = deque(maxlen=window_size)
self.total = 0.0
self.count = 0
self.fmt = fmt
def update(self, value, n=1):
self.deque.append(value)
self.count += n
self.total += value * n
@property
def median(self):
d = torch.tensor(list(self.deque))
return d.median().item()
@property
def avg(self):
d = torch.tensor(list(self.deque), dtype=torch.float32)
return d.mean().item()
@property
def global_avg(self):
return self.total / self.count
@property
def max(self):
return max(self.deque)
@property
def value(self):
return self.deque[-1]
def __str__(self):
return self.fmt.format(
median=self.median,
avg=self.avg,
global_avg=self.global_avg,
max=self.max,
value=self.value)
class ConfusionMatrix(object):
def __init__(self, num_classes):
self.num_classes = num_classes
self.mat = None
def update(self, a, b):
n = self.num_classes
if self.mat is None:
self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)
with torch.no_grad():
k = (a >= 0) & (a < n)
inds = n * a[k].to(torch.int64) + b[k]
self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)
def reset(self):
self.mat.zero_()
def compute(self):
h = self.mat.float()
acc_global = torch.diag(h).sum() / h.sum()
acc = torch.diag(h) / h.sum(1)
return acc_global, acc
def __str__(self):
acc_global, acc = self.compute()
return (
'global correct: {:.1f}\n'
'average row correct: {}').format(
acc_global.item() * 100,
['{:.1f}'.format(i) for i in (acc * 100).tolist()])
class MetricLogger(object):
def __init__(self, delimiter="\t"):
self.meters = defaultdict(SmoothedValue)
self.delimiter = delimiter
def update(self, **kwargs):
for k, v in kwargs.items():
if isinstance(v, torch.Tensor):
v = v.item()
assert isinstance(v, (float, int))
self.meters[k].update(v)
def __getattr__(self, attr):
if attr in self.meters:
return self.meters[attr]
if attr in self.__dict__:
return self.__dict__[attr]
raise AttributeError("'{}' object has no attribute '{}'".format(
type(self).__name__, attr))
def __str__(self):
loss_str = []
for name, meter in self.meters.items():
loss_str.append(
"{}: {}".format(name, str(meter))
)
return self.delimiter.join(loss_str)
def add_meter(self, name, meter):
self.meters[name] = meter
def log_every(self, iterable, parent, header=None, **kwargs):
if not header:
header = ''
log_msg = self.delimiter.join([
'{meters}'
])
progrss = progress_bar(iterable, parent=parent, **kwargs)
for idx, obj in enumerate(progrss):
yield idx, obj
progrss.comment = log_msg.format(
meters=str(self))
print('{header} {meters}'.format(header=header, meters=str(self)))
def accuracy(output, target, topk=(1,)):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with torch.no_grad():
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target[None])
res = []
for k in topk:
correct_k = correct[:k].flatten().sum(dtype=torch.float32)
res.append(correct_k * (100.0 / batch_size))
return res
class EarlyStopping:
"""Early stops the training if validation loss doesn't improve after a given patience."""
def __init__(self, patience=7, verbose=False, delta=0, augmented_model=False):
"""
Args:
patience (int): How long to wait after last time validation loss improved.
Default: 7
verbose (bool): If True, prints a message for each validation loss improvement.
Default: False
delta (float): Minimum change in the monitored quantity to qualify as an improvement.
Default: 0
"""
self.patience = patience
self.verbose = verbose
self.counter = 0
self.best_score = None
self.early_stop = False
self.val_loss_min = np.Inf
self.delta = delta
self.augmented_model = augmented_model
def __call__(self, val_loss, model):
score = -val_loss
if self.best_score is None:
self.best_score = score
self.save_checkpoint(val_loss, model)
elif score < self.best_score - self.delta:
self.counter += 1
# print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
# if self.counter >= self.patience:
# self.early_stop = True
else:
self.best_score = score
self.save_checkpoint(val_loss, model)
self.counter = 0
def save_checkpoint(self, val_loss, model):
'''Saves model when validation loss decrease.'''
if self.verbose:
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
torch.save(model.state_dict(), 'checkpoint.pt') if not self.augmented_model else torch.save(model['model'].state_dict(), 'checkpoint.pt')
self.val_loss_min = val_loss