mirror of
https://github.com/AntoineHX/smart_augmentation.git
synced 2025-06-27 15:35:24 +02:00
Remove Old folder
This commit is contained in:
parent
18be4d85ca
commit
431252992c
38 changed files with 0 additions and 7821 deletions
|
@ -1,456 +0,0 @@
|
||||||
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
"""Transforms used in the Augmentation Policies."""
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
from __future__ import division
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import inspect
|
|
||||||
import random
|
|
||||||
import numpy as np
|
|
||||||
# pylint:disable=g-multiple-import
|
|
||||||
from PIL import ImageOps, ImageEnhance, ImageFilter, Image
|
|
||||||
# pylint:enable=g-multiple-import
|
|
||||||
|
|
||||||
|
|
||||||
IMAGE_SIZE = 28
|
|
||||||
# What is the dataset mean and std of the images on the training set
|
|
||||||
MEANS = [0.49139968, 0.48215841, 0.44653091]
|
|
||||||
STDS = [0.24703223, 0.24348513, 0.26158784]
|
|
||||||
PARAMETER_MAX = 10 # What is the max 'level' a transform could be predicted
|
|
||||||
|
|
||||||
|
|
||||||
def random_flip(x):
|
|
||||||
"""Flip the input x horizontally with 50% probability."""
|
|
||||||
if np.random.rand(1)[0] > 0.5:
|
|
||||||
return np.fliplr(x)
|
|
||||||
return x
|
|
||||||
|
|
||||||
|
|
||||||
def zero_pad_and_crop(img, amount=4):
|
|
||||||
"""Zero pad by `amount` zero pixels on each side then take a random crop.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
img: numpy image that will be zero padded and cropped.
|
|
||||||
amount: amount of zeros to pad `img` with horizontally and verically.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The cropped zero padded img. The returned numpy array will be of the same
|
|
||||||
shape as `img`.
|
|
||||||
"""
|
|
||||||
padded_img = np.zeros((img.shape[0] + amount * 2, img.shape[1] + amount * 2,
|
|
||||||
img.shape[2]))
|
|
||||||
padded_img[amount:img.shape[0] + amount, amount:
|
|
||||||
img.shape[1] + amount, :] = img
|
|
||||||
top = np.random.randint(low=0, high=2 * amount)
|
|
||||||
left = np.random.randint(low=0, high=2 * amount)
|
|
||||||
new_img = padded_img[top:top + img.shape[0], left:left + img.shape[1], :]
|
|
||||||
return new_img
|
|
||||||
|
|
||||||
|
|
||||||
def create_cutout_mask(img_height, img_width, num_channels, size):
|
|
||||||
"""Creates a zero mask used for cutout of shape `img_height` x `img_width`.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
img_height: Height of image cutout mask will be applied to.
|
|
||||||
img_width: Width of image cutout mask will be applied to.
|
|
||||||
num_channels: Number of channels in the image.
|
|
||||||
size: Size of the zeros mask.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A mask of shape `img_height` x `img_width` with all ones except for a
|
|
||||||
square of zeros of shape `size` x `size`. This mask is meant to be
|
|
||||||
elementwise multiplied with the original image. Additionally returns
|
|
||||||
the `upper_coord` and `lower_coord` which specify where the cutout mask
|
|
||||||
will be applied.
|
|
||||||
"""
|
|
||||||
assert img_height == img_width
|
|
||||||
|
|
||||||
# Sample center where cutout mask will be applied
|
|
||||||
height_loc = np.random.randint(low=0, high=img_height)
|
|
||||||
width_loc = np.random.randint(low=0, high=img_width)
|
|
||||||
|
|
||||||
# Determine upper right and lower left corners of patch
|
|
||||||
upper_coord = (max(0, height_loc - size // 2), max(0, width_loc - size // 2))
|
|
||||||
lower_coord = (min(img_height, height_loc + size // 2),
|
|
||||||
min(img_width, width_loc + size // 2))
|
|
||||||
mask_height = lower_coord[0] - upper_coord[0]
|
|
||||||
mask_width = lower_coord[1] - upper_coord[1]
|
|
||||||
assert mask_height > 0
|
|
||||||
assert mask_width > 0
|
|
||||||
|
|
||||||
mask = np.ones((img_height, img_width, num_channels))
|
|
||||||
zeros = np.zeros((mask_height, mask_width, num_channels))
|
|
||||||
mask[upper_coord[0]:lower_coord[0], upper_coord[1]:lower_coord[1], :] = (
|
|
||||||
zeros)
|
|
||||||
return mask, upper_coord, lower_coord
|
|
||||||
|
|
||||||
|
|
||||||
def cutout_numpy(img, size=16):
|
|
||||||
"""Apply cutout with mask of shape `size` x `size` to `img`.
|
|
||||||
|
|
||||||
The cutout operation is from the paper https://arxiv.org/abs/1708.04552.
|
|
||||||
This operation applies a `size`x`size` mask of zeros to a random location
|
|
||||||
within `img`.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
img: Numpy image that cutout will be applied to.
|
|
||||||
size: Height/width of the cutout mask that will be
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A numpy tensor that is the result of applying the cutout mask to `img`.
|
|
||||||
"""
|
|
||||||
img_height, img_width, num_channels = (img.shape[0], img.shape[1],
|
|
||||||
img.shape[2])
|
|
||||||
assert len(img.shape) == 3
|
|
||||||
mask, _, _ = create_cutout_mask(img_height, img_width, num_channels, size)
|
|
||||||
return img * mask
|
|
||||||
|
|
||||||
|
|
||||||
def float_parameter(level, maxval):
|
|
||||||
"""Helper function to scale `val` between 0 and maxval .
|
|
||||||
|
|
||||||
Args:
|
|
||||||
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
|
|
||||||
maxval: Maximum value that the operation can have. This will be scaled
|
|
||||||
to level/PARAMETER_MAX.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A float that results from scaling `maxval` according to `level`.
|
|
||||||
"""
|
|
||||||
return float(level) * maxval / PARAMETER_MAX
|
|
||||||
|
|
||||||
|
|
||||||
def int_parameter(level, maxval):
|
|
||||||
"""Helper function to scale `val` between 0 and maxval .
|
|
||||||
|
|
||||||
Args:
|
|
||||||
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
|
|
||||||
maxval: Maximum value that the operation can have. This will be scaled
|
|
||||||
to level/PARAMETER_MAX.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
An int that results from scaling `maxval` according to `level`.
|
|
||||||
"""
|
|
||||||
return int(level * maxval / PARAMETER_MAX)
|
|
||||||
|
|
||||||
|
|
||||||
def pil_wrap(img):
|
|
||||||
"""Convert the `img` numpy tensor to a PIL Image."""
|
|
||||||
return Image.fromarray(
|
|
||||||
np.uint8((img * STDS + MEANS) * 255.0)).convert('RGBA')
|
|
||||||
|
|
||||||
|
|
||||||
def pil_unwrap(pil_img):
|
|
||||||
"""Converts the PIL img to a numpy array."""
|
|
||||||
pic_array = (np.array(pil_img.getdata()).reshape((IMAGE_SIZE, IMAGE_SIZE, 4)) / 255.0)
|
|
||||||
i1, i2 = np.where(pic_array[:, :, 3] == 0)
|
|
||||||
pic_array = (pic_array[:, :, :3] - MEANS) / STDS
|
|
||||||
pic_array[i1, i2] = [0, 0, 0]
|
|
||||||
return pic_array
|
|
||||||
|
|
||||||
|
|
||||||
def apply_policy(policy, img):
|
|
||||||
"""Apply the `policy` to the numpy `img`.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
policy: A list of tuples with the form (name, probability, level) where
|
|
||||||
`name` is the name of the augmentation operation to apply, `probability`
|
|
||||||
is the probability of applying the operation and `level` is what strength
|
|
||||||
the operation to apply.
|
|
||||||
img: Numpy image that will have `policy` applied to it.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The result of applying `policy` to `img`.
|
|
||||||
"""
|
|
||||||
#print('img shape :',img.shape)
|
|
||||||
#print('Policy len :',len(policy))
|
|
||||||
pil_img = pil_wrap(img)
|
|
||||||
for xform in policy:
|
|
||||||
#print('xform :', len(xform))
|
|
||||||
assert len(xform) == 3
|
|
||||||
name, probability, level = xform
|
|
||||||
#xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability, level)
|
|
||||||
xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability.eval(), level)
|
|
||||||
pil_img = xform_fn(pil_img)
|
|
||||||
return pil_unwrap(pil_img)
|
|
||||||
|
|
||||||
|
|
||||||
class TransformFunction(object):
|
|
||||||
"""Wraps the Transform function for pretty printing options."""
|
|
||||||
|
|
||||||
def __init__(self, func, name):
|
|
||||||
self.f = func
|
|
||||||
self.name = name
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '<' + self.name + '>'
|
|
||||||
|
|
||||||
def __call__(self, pil_img):
|
|
||||||
return self.f(pil_img)
|
|
||||||
|
|
||||||
|
|
||||||
class TransformT(object):
|
|
||||||
"""Each instance of this class represents a specific transform."""
|
|
||||||
|
|
||||||
def __init__(self, name, xform_fn):
|
|
||||||
self.name = name
|
|
||||||
self.xform = xform_fn
|
|
||||||
|
|
||||||
def pil_transformer(self, probability, level):
|
|
||||||
|
|
||||||
def return_function(im):
|
|
||||||
if random.random() < probability:
|
|
||||||
im = self.xform(im, level)
|
|
||||||
return im
|
|
||||||
|
|
||||||
name = self.name + '({:.1f},{})'.format(probability, level)
|
|
||||||
return TransformFunction(return_function, name)
|
|
||||||
|
|
||||||
def do_transform(self, image, level):
|
|
||||||
f = self.pil_transformer(PARAMETER_MAX, level)
|
|
||||||
return pil_unwrap(f(pil_wrap(image)))
|
|
||||||
|
|
||||||
|
|
||||||
################## Transform Functions ##################
|
|
||||||
identity = TransformT('identity', lambda pil_img, level: pil_img)
|
|
||||||
flip_lr = TransformT(
|
|
||||||
'FlipLR',
|
|
||||||
lambda pil_img, level: pil_img.transpose(Image.FLIP_LEFT_RIGHT))
|
|
||||||
flip_ud = TransformT(
|
|
||||||
'FlipUD',
|
|
||||||
lambda pil_img, level: pil_img.transpose(Image.FLIP_TOP_BOTTOM))
|
|
||||||
# pylint:disable=g-long-lambda
|
|
||||||
auto_contrast = TransformT(
|
|
||||||
'AutoContrast',
|
|
||||||
lambda pil_img, level: ImageOps.autocontrast(
|
|
||||||
pil_img.convert('RGB')).convert('RGBA'))
|
|
||||||
equalize = TransformT(
|
|
||||||
'Equalize',
|
|
||||||
lambda pil_img, level: ImageOps.equalize(
|
|
||||||
pil_img.convert('RGB')).convert('RGBA'))
|
|
||||||
invert = TransformT(
|
|
||||||
'Invert',
|
|
||||||
lambda pil_img, level: ImageOps.invert(
|
|
||||||
pil_img.convert('RGB')).convert('RGBA'))
|
|
||||||
# pylint:enable=g-long-lambda
|
|
||||||
blur = TransformT(
|
|
||||||
'Blur', lambda pil_img, level: pil_img.filter(ImageFilter.BLUR))
|
|
||||||
smooth = TransformT(
|
|
||||||
'Smooth',
|
|
||||||
lambda pil_img, level: pil_img.filter(ImageFilter.SMOOTH))
|
|
||||||
|
|
||||||
|
|
||||||
def _rotate_impl(pil_img, level):
|
|
||||||
"""Rotates `pil_img` from -30 to 30 degrees depending on `level`."""
|
|
||||||
degrees = int_parameter(level, 30)
|
|
||||||
if random.random() > 0.5:
|
|
||||||
degrees = -degrees
|
|
||||||
return pil_img.rotate(degrees)
|
|
||||||
|
|
||||||
|
|
||||||
rotate = TransformT('Rotate', _rotate_impl)
|
|
||||||
|
|
||||||
|
|
||||||
def _posterize_impl(pil_img, level):
|
|
||||||
"""Applies PIL Posterize to `pil_img`."""
|
|
||||||
level = int_parameter(level, 4)
|
|
||||||
return ImageOps.posterize(pil_img.convert('RGB'), 4 - level).convert('RGBA')
|
|
||||||
|
|
||||||
|
|
||||||
posterize = TransformT('Posterize', _posterize_impl)
|
|
||||||
|
|
||||||
|
|
||||||
def _shear_x_impl(pil_img, level):
|
|
||||||
"""Applies PIL ShearX to `pil_img`.
|
|
||||||
|
|
||||||
The ShearX operation shears the image along the horizontal axis with `level`
|
|
||||||
magnitude.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pil_img: Image in PIL object.
|
|
||||||
level: Strength of the operation specified as an Integer from
|
|
||||||
[0, `PARAMETER_MAX`].
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A PIL Image that has had ShearX applied to it.
|
|
||||||
"""
|
|
||||||
level = float_parameter(level, 0.3)
|
|
||||||
if random.random() > 0.5:
|
|
||||||
level = -level
|
|
||||||
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, level, 0, 0, 1, 0))
|
|
||||||
|
|
||||||
|
|
||||||
shear_x = TransformT('ShearX', _shear_x_impl)
|
|
||||||
|
|
||||||
|
|
||||||
def _shear_y_impl(pil_img, level):
|
|
||||||
"""Applies PIL ShearY to `pil_img`.
|
|
||||||
|
|
||||||
The ShearY operation shears the image along the vertical axis with `level`
|
|
||||||
magnitude.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pil_img: Image in PIL object.
|
|
||||||
level: Strength of the operation specified as an Integer from
|
|
||||||
[0, `PARAMETER_MAX`].
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A PIL Image that has had ShearX applied to it.
|
|
||||||
"""
|
|
||||||
level = float_parameter(level, 0.3)
|
|
||||||
if random.random() > 0.5:
|
|
||||||
level = -level
|
|
||||||
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, level, 1, 0))
|
|
||||||
|
|
||||||
|
|
||||||
shear_y = TransformT('ShearY', _shear_y_impl)
|
|
||||||
|
|
||||||
|
|
||||||
def _translate_x_impl(pil_img, level):
|
|
||||||
"""Applies PIL TranslateX to `pil_img`.
|
|
||||||
|
|
||||||
Translate the image in the horizontal direction by `level`
|
|
||||||
number of pixels.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pil_img: Image in PIL object.
|
|
||||||
level: Strength of the operation specified as an Integer from
|
|
||||||
[0, `PARAMETER_MAX`].
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A PIL Image that has had TranslateX applied to it.
|
|
||||||
"""
|
|
||||||
level = int_parameter(level, 10)
|
|
||||||
if random.random() > 0.5:
|
|
||||||
level = -level
|
|
||||||
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, level, 0, 1, 0))
|
|
||||||
|
|
||||||
|
|
||||||
translate_x = TransformT('TranslateX', _translate_x_impl)
|
|
||||||
|
|
||||||
|
|
||||||
def _translate_y_impl(pil_img, level):
|
|
||||||
"""Applies PIL TranslateY to `pil_img`.
|
|
||||||
|
|
||||||
Translate the image in the vertical direction by `level`
|
|
||||||
number of pixels.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pil_img: Image in PIL object.
|
|
||||||
level: Strength of the operation specified as an Integer from
|
|
||||||
[0, `PARAMETER_MAX`].
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A PIL Image that has had TranslateY applied to it.
|
|
||||||
"""
|
|
||||||
level = int_parameter(level, 10)
|
|
||||||
if random.random() > 0.5:
|
|
||||||
level = -level
|
|
||||||
return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE), Image.AFFINE, (1, 0, 0, 0, 1, level))
|
|
||||||
|
|
||||||
|
|
||||||
translate_y = TransformT('TranslateY', _translate_y_impl)
|
|
||||||
|
|
||||||
|
|
||||||
def _crop_impl(pil_img, level, interpolation=Image.BILINEAR):
|
|
||||||
"""Applies a crop to `pil_img` with the size depending on the `level`."""
|
|
||||||
cropped = pil_img.crop((level, level, IMAGE_SIZE - level, IMAGE_SIZE - level))
|
|
||||||
resized = cropped.resize((IMAGE_SIZE, IMAGE_SIZE), interpolation)
|
|
||||||
return resized
|
|
||||||
|
|
||||||
|
|
||||||
crop_bilinear = TransformT('CropBilinear', _crop_impl)
|
|
||||||
|
|
||||||
|
|
||||||
def _solarize_impl(pil_img, level):
|
|
||||||
"""Applies PIL Solarize to `pil_img`.
|
|
||||||
|
|
||||||
Translate the image in the vertical direction by `level`
|
|
||||||
number of pixels.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pil_img: Image in PIL object.
|
|
||||||
level: Strength of the operation specified as an Integer from
|
|
||||||
[0, `PARAMETER_MAX`].
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A PIL Image that has had Solarize applied to it.
|
|
||||||
"""
|
|
||||||
level = int_parameter(level, 256)
|
|
||||||
return ImageOps.solarize(pil_img.convert('RGB'), 256 - level).convert('RGBA')
|
|
||||||
|
|
||||||
|
|
||||||
solarize = TransformT('Solarize', _solarize_impl)
|
|
||||||
|
|
||||||
|
|
||||||
def _cutout_pil_impl(pil_img, level):
|
|
||||||
"""Apply cutout to pil_img at the specified level."""
|
|
||||||
size = int_parameter(level, 20)
|
|
||||||
if size <= 0:
|
|
||||||
return pil_img
|
|
||||||
img_height, img_width, num_channels = (IMAGE_SIZE, IMAGE_SIZE, 3)
|
|
||||||
_, upper_coord, lower_coord = (
|
|
||||||
create_cutout_mask(img_height, img_width, num_channels, size))
|
|
||||||
pixels = pil_img.load() # create the pixel map
|
|
||||||
for i in range(upper_coord[0], lower_coord[0]): # for every col:
|
|
||||||
for j in range(upper_coord[1], lower_coord[1]): # For every row
|
|
||||||
pixels[i, j] = (125, 122, 113, 0) # set the colour accordingly
|
|
||||||
return pil_img
|
|
||||||
|
|
||||||
cutout = TransformT('Cutout', _cutout_pil_impl)
|
|
||||||
|
|
||||||
|
|
||||||
def _enhancer_impl(enhancer):
|
|
||||||
"""Sets level to be between 0.1 and 1.8 for ImageEnhance transforms of PIL."""
|
|
||||||
def impl(pil_img, level):
|
|
||||||
v = float_parameter(level, 1.8) + .1 # going to 0 just destroys it
|
|
||||||
return enhancer(pil_img).enhance(v)
|
|
||||||
return impl
|
|
||||||
|
|
||||||
|
|
||||||
color = TransformT('Color', _enhancer_impl(ImageEnhance.Color))
|
|
||||||
contrast = TransformT('Contrast', _enhancer_impl(ImageEnhance.Contrast))
|
|
||||||
brightness = TransformT('Brightness', _enhancer_impl(
|
|
||||||
ImageEnhance.Brightness))
|
|
||||||
sharpness = TransformT('Sharpness', _enhancer_impl(ImageEnhance.Sharpness))
|
|
||||||
|
|
||||||
ALL_TRANSFORMS = [
|
|
||||||
flip_lr,
|
|
||||||
flip_ud,
|
|
||||||
auto_contrast,
|
|
||||||
equalize,
|
|
||||||
invert,
|
|
||||||
rotate,
|
|
||||||
posterize,
|
|
||||||
crop_bilinear,
|
|
||||||
solarize,
|
|
||||||
color,
|
|
||||||
contrast,
|
|
||||||
brightness,
|
|
||||||
sharpness,
|
|
||||||
shear_x,
|
|
||||||
shear_y,
|
|
||||||
translate_x,
|
|
||||||
translate_y,
|
|
||||||
cutout,
|
|
||||||
blur,
|
|
||||||
smooth
|
|
||||||
]
|
|
||||||
|
|
||||||
NAME_TO_TRANSFORM = {t.name: t for t in ALL_TRANSFORMS}
|
|
||||||
TRANSFORM_NAMES = NAME_TO_TRANSFORM.keys()
|
|
|
@ -1,131 +0,0 @@
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from far_ho.examples.datasets import Datasets, Dataset
|
|
||||||
|
|
||||||
import os
|
|
||||||
import numpy as np
|
|
||||||
import tensorflow as tf
|
|
||||||
|
|
||||||
import augmentation_transforms as augmentation_transforms ##### ATTENTION FICHIER EN DOUBLE => A REGLER MIEUX ####
|
|
||||||
|
|
||||||
def viz_data(dataset, fig_name='data_sample',aug_policy=None):
|
|
||||||
|
|
||||||
plt.figure(figsize=(10,10))
|
|
||||||
for i in range(25):
|
|
||||||
plt.subplot(5,5,i+1)
|
|
||||||
plt.xticks([])
|
|
||||||
plt.yticks([])
|
|
||||||
plt.grid(False)
|
|
||||||
|
|
||||||
img = dataset.data[i][:,:,0]
|
|
||||||
if aug_policy :
|
|
||||||
img = augment_img(img,aug_policy)
|
|
||||||
#print('im shape',img.shape)
|
|
||||||
plt.imshow(img, cmap=plt.cm.binary)
|
|
||||||
plt.xlabel(np.nonzero(dataset.target[i])[0].item())
|
|
||||||
|
|
||||||
plt.savefig(fig_name)
|
|
||||||
|
|
||||||
def augment_img(data, policy):
|
|
||||||
|
|
||||||
#print('Im shape',data.shape)
|
|
||||||
data = np.stack((data,)*3, axis=-1) #BOF BOF juste pour forcer 3 channels
|
|
||||||
#print('Im shape',data.shape)
|
|
||||||
final_img = augmentation_transforms.apply_policy(policy, data)
|
|
||||||
#final_img = augmentation_transforms.random_flip(augmentation_transforms.zero_pad_and_crop(final_img, 4))
|
|
||||||
# Apply cutout
|
|
||||||
#final_img = augmentation_transforms.cutout_numpy(final_img)
|
|
||||||
|
|
||||||
im_rgb = np.array(final_img, np.float32)
|
|
||||||
im_gray = np.dot(im_rgb[...,:3], [0.2989, 0.5870, 0.1140]) #Just pour retourner a 1 channel
|
|
||||||
|
|
||||||
return im_gray
|
|
||||||
|
|
||||||
|
|
||||||
### https://www.kaggle.com/raoulma/mnist-image-class-tensorflow-cnn-99-51-test-acc#5.-Build-the-neural-network-with-tensorflow-
|
|
||||||
## build the neural network class
|
|
||||||
# weight initialization
|
|
||||||
def weight_variable(shape, name = None):
|
|
||||||
initial = tf.truncated_normal(shape, stddev=0.1)
|
|
||||||
return tf.Variable(initial, name = name)
|
|
||||||
|
|
||||||
# bias initialization
|
|
||||||
def bias_variable(shape, name = None):
|
|
||||||
initial = tf.constant(0.1, shape=shape) # positive bias
|
|
||||||
return tf.Variable(initial, name = name)
|
|
||||||
|
|
||||||
# 2D convolution
|
|
||||||
def conv2d(x, W, name = None):
|
|
||||||
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name = name)
|
|
||||||
|
|
||||||
# max pooling
|
|
||||||
def max_pool_2x2(x, name = None):
|
|
||||||
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
|
|
||||||
padding='SAME', name = name)
|
|
||||||
|
|
||||||
def cnn(x_data_tf,y_data_tf, name='model'):
|
|
||||||
# tunable hyperparameters for nn architecture
|
|
||||||
s_f_conv1 = 3; # filter size of first convolution layer (default = 3)
|
|
||||||
n_f_conv1 = 36; # number of features of first convolution layer (default = 36)
|
|
||||||
s_f_conv2 = 3; # filter size of second convolution layer (default = 3)
|
|
||||||
n_f_conv2 = 36; # number of features of second convolution layer (default = 36)
|
|
||||||
s_f_conv3 = 3; # filter size of third convolution layer (default = 3)
|
|
||||||
n_f_conv3 = 36; # number of features of third convolution layer (default = 36)
|
|
||||||
n_n_fc1 = 576; # number of neurons of first fully connected layer (default = 576)
|
|
||||||
|
|
||||||
# 1.layer: convolution + max pooling
|
|
||||||
W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, 1, n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
|
|
||||||
b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
|
|
||||||
h_conv1_tf = tf.nn.relu(conv2d(x_data_tf,
|
|
||||||
W_conv1_tf) + b_conv1_tf,
|
|
||||||
name = 'h_conv1_tf') # (.,28,28,32)
|
|
||||||
h_pool1_tf = max_pool_2x2(h_conv1_tf,
|
|
||||||
name = 'h_pool1_tf') # (.,14,14,32)
|
|
||||||
|
|
||||||
# 2.layer: convolution + max pooling
|
|
||||||
W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2,
|
|
||||||
n_f_conv1, n_f_conv2],
|
|
||||||
name = 'W_conv2_tf')
|
|
||||||
b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
|
|
||||||
h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf,
|
|
||||||
W_conv2_tf) + b_conv2_tf,
|
|
||||||
name ='h_conv2_tf') #(.,14,14,32)
|
|
||||||
h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
|
|
||||||
|
|
||||||
# 3.layer: convolution + max pooling
|
|
||||||
W_conv3_tf = weight_variable([s_f_conv3, s_f_conv3,
|
|
||||||
n_f_conv2, n_f_conv3],
|
|
||||||
name = 'W_conv3_tf')
|
|
||||||
b_conv3_tf = bias_variable([n_f_conv3], name = 'b_conv3_tf')
|
|
||||||
h_conv3_tf = tf.nn.relu(conv2d(h_pool2_tf,
|
|
||||||
W_conv3_tf) + b_conv3_tf,
|
|
||||||
name = 'h_conv3_tf') #(.,7,7,32)
|
|
||||||
h_pool3_tf = max_pool_2x2(h_conv3_tf,
|
|
||||||
name = 'h_pool3_tf') # (.,4,4,32)
|
|
||||||
|
|
||||||
# 4.layer: fully connected
|
|
||||||
W_fc1_tf = weight_variable([4*4*n_f_conv3,n_n_fc1],
|
|
||||||
name = 'W_fc1_tf') # (4*4*32, 1024)
|
|
||||||
b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
|
|
||||||
h_pool3_flat_tf = tf.reshape(h_pool3_tf, [-1,4*4*n_f_conv3],
|
|
||||||
name = 'h_pool3_flat_tf') # (.,1024)
|
|
||||||
h_fc1_tf = tf.nn.relu(tf.matmul(h_pool3_flat_tf,
|
|
||||||
W_fc1_tf) + b_fc1_tf,
|
|
||||||
name = 'h_fc1_tf') # (.,1024)
|
|
||||||
|
|
||||||
# add dropout
|
|
||||||
#keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
|
|
||||||
#h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
|
|
||||||
|
|
||||||
# 5.layer: fully connected
|
|
||||||
W_fc2_tf = weight_variable([n_n_fc1, 10], name = 'W_fc2_tf')
|
|
||||||
b_fc2_tf = bias_variable([10], name = 'b_fc2_tf')
|
|
||||||
z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf),
|
|
||||||
b_fc2_tf, name = 'z_pred_tf')# => (.,10)
|
|
||||||
# predicted probabilities in one-hot encoding
|
|
||||||
y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf')
|
|
||||||
|
|
||||||
# tensor of correct predictions
|
|
||||||
y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
|
|
||||||
tf.argmax(y_data_tf, 1),
|
|
||||||
name = 'y_pred_correct_tf')
|
|
||||||
return y_pred_proba_tf
|
|
|
@ -1,166 +0,0 @@
|
||||||
#https://github.com/arcelien/pba/blob/master/autoaugment/train_cifar.py
|
|
||||||
from __future__ import absolute_import, print_function, division
|
|
||||||
|
|
||||||
import os
|
|
||||||
import numpy as np
|
|
||||||
import tensorflow as tf
|
|
||||||
#import tensorflow.contrib.layers as layers
|
|
||||||
import far_ho as far
|
|
||||||
import far_ho.examples as far_ex
|
|
||||||
#import pprint
|
|
||||||
|
|
||||||
import autoaugment.augmentation_transforms as augmentation_transforms
|
|
||||||
#import autoaugment.policies as found_policies
|
|
||||||
from autoaugment.wrn import build_wrn_model
|
|
||||||
|
|
||||||
|
|
||||||
def build_model(inputs, num_classes, is_training, hparams):
|
|
||||||
"""Constructs the vision model being trained/evaled.
|
|
||||||
Args:
|
|
||||||
inputs: input features/images being fed to the image model build built.
|
|
||||||
num_classes: number of output classes being predicted.
|
|
||||||
is_training: is the model training or not.
|
|
||||||
hparams: additional hyperparameters associated with the image model.
|
|
||||||
Returns:
|
|
||||||
The logits of the image model.
|
|
||||||
"""
|
|
||||||
scopes = setup_arg_scopes(is_training)
|
|
||||||
with contextlib.nested(*scopes):
|
|
||||||
if hparams.model_name == 'pyramid_net':
|
|
||||||
logits = build_shake_drop_model(
|
|
||||||
inputs, num_classes, is_training)
|
|
||||||
elif hparams.model_name == 'wrn':
|
|
||||||
logits = build_wrn_model(
|
|
||||||
inputs, num_classes, hparams.wrn_size)
|
|
||||||
elif hparams.model_name == 'shake_shake':
|
|
||||||
logits = build_shake_shake_model(
|
|
||||||
inputs, num_classes, hparams, is_training)
|
|
||||||
return logits
|
|
||||||
|
|
||||||
|
|
||||||
class CifarModel(object):
|
|
||||||
"""Builds an image model for Cifar10/Cifar100."""
|
|
||||||
|
|
||||||
def __init__(self, hparams):
|
|
||||||
self.hparams = hparams
|
|
||||||
|
|
||||||
def build(self, mode):
|
|
||||||
"""Construct the cifar model."""
|
|
||||||
assert mode in ['train', 'eval']
|
|
||||||
self.mode = mode
|
|
||||||
self._setup_misc(mode)
|
|
||||||
self._setup_images_and_labels()
|
|
||||||
self._build_graph(self.images, self.labels, mode)
|
|
||||||
|
|
||||||
self.init = tf.group(tf.global_variables_initializer(),
|
|
||||||
tf.local_variables_initializer())
|
|
||||||
|
|
||||||
def _setup_misc(self, mode):
|
|
||||||
"""Sets up miscellaneous in the cifar model constructor."""
|
|
||||||
self.lr_rate_ph = tf.Variable(0.0, name='lrn_rate', trainable=False)
|
|
||||||
self.reuse = None if (mode == 'train') else True
|
|
||||||
self.batch_size = self.hparams.batch_size
|
|
||||||
if mode == 'eval':
|
|
||||||
self.batch_size = 25
|
|
||||||
|
|
||||||
def _setup_images_and_labels(self):
|
|
||||||
"""Sets up image and label placeholders for the cifar model."""
|
|
||||||
if FLAGS.dataset == 'cifar10':
|
|
||||||
self.num_classes = 10
|
|
||||||
else:
|
|
||||||
self.num_classes = 100
|
|
||||||
self.images = tf.placeholder(tf.float32, [self.batch_size, 32, 32, 3])
|
|
||||||
self.labels = tf.placeholder(tf.float32,
|
|
||||||
[self.batch_size, self.num_classes])
|
|
||||||
|
|
||||||
def assign_epoch(self, session, epoch_value):
|
|
||||||
session.run(self._epoch_update, feed_dict={self._new_epoch: epoch_value})
|
|
||||||
|
|
||||||
def _build_graph(self, images, labels, mode):
|
|
||||||
"""Constructs the TF graph for the cifar model.
|
|
||||||
Args:
|
|
||||||
images: A 4-D image Tensor
|
|
||||||
labels: A 2-D labels Tensor.
|
|
||||||
mode: string indicating training mode ( e.g., 'train', 'valid', 'test').
|
|
||||||
"""
|
|
||||||
is_training = 'train' in mode
|
|
||||||
if is_training:
|
|
||||||
self.global_step = tf.train.get_or_create_global_step()
|
|
||||||
|
|
||||||
logits = build_model(
|
|
||||||
images,
|
|
||||||
self.num_classes,
|
|
||||||
is_training,
|
|
||||||
self.hparams)
|
|
||||||
self.predictions, self.cost = helper_utils.setup_loss(
|
|
||||||
logits, labels)
|
|
||||||
self.accuracy, self.eval_op = tf.metrics.accuracy(
|
|
||||||
tf.argmax(labels, 1), tf.argmax(self.predictions, 1))
|
|
||||||
self._calc_num_trainable_params()
|
|
||||||
|
|
||||||
# Adds L2 weight decay to the cost
|
|
||||||
self.cost = helper_utils.decay_weights(self.cost,
|
|
||||||
self.hparams.weight_decay_rate)
|
|
||||||
#### Attention: differe implem originale
|
|
||||||
|
|
||||||
self.init = tf.group(tf.global_variables_initializer(),
|
|
||||||
tf.local_variables_initializer())
|
|
||||||
|
|
||||||
|
|
||||||
########################################################
|
|
||||||
|
|
||||||
######## PBA ############
|
|
||||||
|
|
||||||
#Parallele Cifar model trainer
|
|
||||||
tf.flags.DEFINE_string('model_name', 'wrn',
|
|
||||||
'wrn, shake_shake_32, shake_shake_96, shake_shake_112, '
|
|
||||||
'pyramid_net')
|
|
||||||
tf.flags.DEFINE_string('checkpoint_dir', '/tmp/training', 'Training Directory.')
|
|
||||||
tf.flags.DEFINE_string('data_path', '/tmp/data',
|
|
||||||
'Directory where dataset is located.')
|
|
||||||
tf.flags.DEFINE_string('dataset', 'cifar10',
|
|
||||||
'Dataset to train with. Either cifar10 or cifar100')
|
|
||||||
tf.flags.DEFINE_integer('use_cpu', 1, '1 if use CPU, else GPU.')
|
|
||||||
## ???
|
|
||||||
|
|
||||||
FLAGS = tf.flags.FLAGS
|
|
||||||
FLAGS.dataset
|
|
||||||
FLAGS.data_path
|
|
||||||
FLAGS.model_name = 'wrn'
|
|
||||||
|
|
||||||
hparams = tf.contrib.training.HParams(
|
|
||||||
train_size=50000,
|
|
||||||
validation_size=0,
|
|
||||||
eval_test=1,
|
|
||||||
dataset=FLAGS.dataset,
|
|
||||||
data_path=FLAGS.data_path,
|
|
||||||
batch_size=128,
|
|
||||||
gradient_clipping_by_global_norm=5.0)
|
|
||||||
if FLAGS.model_name == 'wrn':
|
|
||||||
hparams.add_hparam('model_name', 'wrn')
|
|
||||||
hparams.add_hparam('num_epochs', 200)
|
|
||||||
hparams.add_hparam('wrn_size', 160)
|
|
||||||
hparams.add_hparam('lr', 0.1)
|
|
||||||
hparams.add_hparam('weight_decay_rate', 5e-4)
|
|
||||||
|
|
||||||
data_loader = data_utils.DataSet(hparams)
|
|
||||||
data_loader.reset()
|
|
||||||
|
|
||||||
with tf.Graph().as_default(): #, tf.device('/cpu:0' if FLAGS.use_cpu else '/gpu:0'):
|
|
||||||
"""Builds the image models for train and eval."""
|
|
||||||
# Determine if we should build the train and eval model. When using
|
|
||||||
# distributed training we only want to build one or the other and not both.
|
|
||||||
with tf.variable_scope('model', use_resource=False):
|
|
||||||
m = CifarModel(self.hparams)
|
|
||||||
m.build('train')
|
|
||||||
#self._num_trainable_params = m.num_trainable_params
|
|
||||||
#self._saver = m.saver
|
|
||||||
#with tf.variable_scope('model', reuse=True, use_resource=False):
|
|
||||||
# meval = CifarModel(self.hparams)
|
|
||||||
# meval.build('eval')
|
|
||||||
|
|
||||||
|
|
||||||
##### FAR-HO ####
|
|
||||||
for _ in range(n_hyper_iterations):
|
|
||||||
|
|
||||||
|
|
|
@ -1,92 +0,0 @@
|
||||||
import os
|
|
||||||
import numpy as np
|
|
||||||
import tensorflow as tf
|
|
||||||
import tensorflow.contrib.layers as layers
|
|
||||||
import far_ho as far
|
|
||||||
import far_ho.examples as far_ex
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
sess = tf.InteractiveSession()
|
|
||||||
|
|
||||||
|
|
||||||
def get_data():
|
|
||||||
# load a small portion of mnist data
|
|
||||||
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=(.1, .1,))
|
|
||||||
return datasets.train, datasets.validation
|
|
||||||
|
|
||||||
|
|
||||||
def g_logits(x,y):
|
|
||||||
with tf.variable_scope('model'):
|
|
||||||
h1 = layers.fully_connected(x, 300)
|
|
||||||
logits = layers.fully_connected(h1, int(y.shape[1]))
|
|
||||||
return logits
|
|
||||||
|
|
||||||
|
|
||||||
x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
|
|
||||||
y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
|
|
||||||
logits = g_logits(x,y)
|
|
||||||
train_set, validation_set = get_data()
|
|
||||||
|
|
||||||
lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
|
|
||||||
lr = far.get_hyperparameter('lr', initializer=0.01)
|
|
||||||
|
|
||||||
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
|
|
||||||
L = tf.reduce_mean(tf.sigmoid(lambdas)*ce)
|
|
||||||
E = tf.reduce_mean(ce)
|
|
||||||
|
|
||||||
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
|
|
||||||
|
|
||||||
inner_optimizer = far.GradientDescentOptimizer(lr)
|
|
||||||
outer_optimizer = tf.train.AdamOptimizer()
|
|
||||||
rev_it =10
|
|
||||||
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
|
|
||||||
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
|
|
||||||
|
|
||||||
T = 20 # Number of inner iterations
|
|
||||||
train_set_supplier = train_set.create_supplier(x, y)
|
|
||||||
validation_set_supplier = validation_set.create_supplier(x, y)
|
|
||||||
tf.global_variables_initializer().run()
|
|
||||||
|
|
||||||
print('inner:', L.eval(train_set_supplier()))
|
|
||||||
print('outer:', E.eval(validation_set_supplier()))
|
|
||||||
# print('-'*50)
|
|
||||||
n_hyper_iterations = 200
|
|
||||||
inner_losses = []
|
|
||||||
outer_losses = []
|
|
||||||
train_accs = []
|
|
||||||
val_accs = []
|
|
||||||
|
|
||||||
for _ in range(n_hyper_iterations):
|
|
||||||
hyper_step(T,
|
|
||||||
inner_objective_feed_dicts=train_set_supplier,
|
|
||||||
outer_objective_feed_dicts=validation_set_supplier)
|
|
||||||
|
|
||||||
inner_obj = L.eval(train_set_supplier())
|
|
||||||
outer_obj = E.eval(validation_set_supplier())
|
|
||||||
inner_losses.append(inner_obj)
|
|
||||||
outer_losses.append(outer_obj)
|
|
||||||
print('inner:', inner_obj)
|
|
||||||
print('outer:', outer_obj)
|
|
||||||
|
|
||||||
train_acc = accuracy.eval(train_set_supplier())
|
|
||||||
val_acc = accuracy.eval(validation_set_supplier())
|
|
||||||
train_accs.append(train_acc)
|
|
||||||
val_accs.append(val_acc)
|
|
||||||
print('training accuracy', train_acc)
|
|
||||||
print('validation accuracy', val_acc)
|
|
||||||
|
|
||||||
print('learning rate', lr.eval())
|
|
||||||
print('norm of examples weight', tf.norm(lambdas).eval())
|
|
||||||
print('-'*50)
|
|
||||||
|
|
||||||
plt.subplot(211)
|
|
||||||
plt.plot(inner_losses, label='training loss')
|
|
||||||
plt.plot(outer_losses, label='validation loss')
|
|
||||||
plt.legend(loc=0, frameon=True)
|
|
||||||
#plt.xlim(0, 19)
|
|
||||||
plt.subplot(212)
|
|
||||||
plt.plot(train_accs, label='training accuracy')
|
|
||||||
plt.plot(val_accs, label='validation accuracy')
|
|
||||||
plt.legend(loc=0, frameon=True)
|
|
||||||
|
|
||||||
plt.savefig('H%d - I%d - R%d'%(n_hyper_iterations,T,rev_it))
|
|
|
@ -1,126 +0,0 @@
|
||||||
import warnings
|
|
||||||
warnings.filterwarnings("ignore")
|
|
||||||
|
|
||||||
import os
|
|
||||||
import numpy as np
|
|
||||||
import tensorflow as tf
|
|
||||||
import tensorflow.contrib.layers as layers
|
|
||||||
import far_ho as far
|
|
||||||
import far_ho.examples as far_ex
|
|
||||||
|
|
||||||
tf.logging.set_verbosity(tf.logging.ERROR)
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import blue_utils as butil
|
|
||||||
|
|
||||||
#Reset
|
|
||||||
try:
|
|
||||||
sess.close()
|
|
||||||
except: pass
|
|
||||||
rnd = np.random.RandomState(1)
|
|
||||||
tf.reset_default_graph()
|
|
||||||
sess = tf.InteractiveSession()
|
|
||||||
|
|
||||||
def get_data(data_split):
|
|
||||||
# load a small portion of mnist data
|
|
||||||
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False)
|
|
||||||
print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target)
|
|
||||||
[print("Nb samples : ", d.num_examples) for d in datasets]
|
|
||||||
return datasets.train, datasets.validation, datasets.test
|
|
||||||
|
|
||||||
#Model
|
|
||||||
# FC : reshape = True
|
|
||||||
def g_logits(x,y, name='model'):
|
|
||||||
with tf.variable_scope(name):
|
|
||||||
h1 = layers.fully_connected(x, 300)
|
|
||||||
logits = layers.fully_connected(h1, int(y.shape[1]))
|
|
||||||
return logits
|
|
||||||
|
|
||||||
#### Hyper-parametres ####
|
|
||||||
n_hyper_iterations = 500
|
|
||||||
T = 20 # Number of inner iterations
|
|
||||||
rev_it =10
|
|
||||||
hp_lr = 1.e-3
|
|
||||||
##########################
|
|
||||||
|
|
||||||
#MNIST
|
|
||||||
#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
|
|
||||||
#y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
|
|
||||||
#logits = g_logits(x, y)
|
|
||||||
|
|
||||||
#CNN : reshape = False
|
|
||||||
x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
|
|
||||||
y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
|
|
||||||
|
|
||||||
logits = butil.cnn(x,y)
|
|
||||||
|
|
||||||
train_set, validation_set, test_set = get_data(data_split=(.05, .05,))
|
|
||||||
|
|
||||||
butil.viz_data(train_set)
|
|
||||||
print('Data sampled !')
|
|
||||||
|
|
||||||
# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
|
|
||||||
#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, .1), 1.e-7))
|
|
||||||
#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5))
|
|
||||||
#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.))
|
|
||||||
lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4))
|
|
||||||
mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9))
|
|
||||||
rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.00001), 0.00001))
|
|
||||||
|
|
||||||
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
|
|
||||||
L = tf.reduce_mean(ce) + rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles
|
|
||||||
E = tf.reduce_mean(ce)
|
|
||||||
|
|
||||||
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
|
|
||||||
|
|
||||||
inner_optimizer = far.MomentumOptimizer(lr, mu)
|
|
||||||
outer_optimizer = tf.train.AdamOptimizer(hp_lr)
|
|
||||||
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
|
|
||||||
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
|
|
||||||
|
|
||||||
train_set_supplier = train_set.create_supplier(x, y, batch_size=256) # stochastic GD
|
|
||||||
validation_set_supplier = validation_set.create_supplier(x, y)
|
|
||||||
|
|
||||||
his_params = []
|
|
||||||
|
|
||||||
tf.global_variables_initializer().run()
|
|
||||||
|
|
||||||
for hyt in range(n_hyper_iterations):
|
|
||||||
hyper_step(T,
|
|
||||||
inner_objective_feed_dicts=train_set_supplier,
|
|
||||||
outer_objective_feed_dicts=validation_set_supplier)
|
|
||||||
res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()),
|
|
||||||
E.eval(validation_set_supplier()),
|
|
||||||
accuracy.eval(train_set_supplier()),
|
|
||||||
accuracy.eval(validation_set_supplier())]
|
|
||||||
his_params.append(res)
|
|
||||||
|
|
||||||
print('Hyper-it :',hyt,'/',n_hyper_iterations)
|
|
||||||
print('inner:', L.eval(train_set_supplier()))
|
|
||||||
print('outer:', E.eval(validation_set_supplier()))
|
|
||||||
print('training accuracy:', res[5])
|
|
||||||
print('validation accuracy:', res[6])
|
|
||||||
#print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
|
|
||||||
print('-'*50)
|
|
||||||
|
|
||||||
test_set_supplier = test_set.create_supplier(x, y)
|
|
||||||
print('Test accuracy:',accuracy.eval(test_set_supplier()))
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
|
|
||||||
ax[0].set_title('Learning rate')
|
|
||||||
ax[0].plot([e[0] for e in his_params])
|
|
||||||
|
|
||||||
ax[1].set_title('Momentum factor')
|
|
||||||
ax[1].plot([e[1] for e in his_params])
|
|
||||||
|
|
||||||
#ax[2].set_title('L2 regulariz.')
|
|
||||||
#ax[2].plot([e[2] for e in his_params])
|
|
||||||
ax[2].set_title('Tr. and val. acc')
|
|
||||||
ax[2].plot([e[5] for e in his_params])
|
|
||||||
ax[2].plot([e[6] for e in his_params])
|
|
||||||
|
|
||||||
ax[3].set_title('Tr. and val. errors')
|
|
||||||
ax[3].plot([e[3] for e in his_params])
|
|
||||||
ax[3].plot([e[4] for e in his_params])
|
|
||||||
|
|
||||||
plt.savefig('res_cnn_H{}_I{}'.format(n_hyper_iterations,T))
|
|
|
@ -1,141 +0,0 @@
|
||||||
import warnings
|
|
||||||
warnings.filterwarnings("ignore")
|
|
||||||
|
|
||||||
import os
|
|
||||||
import numpy as np
|
|
||||||
import tensorflow as tf
|
|
||||||
import tensorflow.contrib.layers as layers
|
|
||||||
import far_ho as far
|
|
||||||
import far_ho.examples as far_ex
|
|
||||||
|
|
||||||
tf.logging.set_verbosity(tf.logging.ERROR)
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import blue_utils as butil
|
|
||||||
|
|
||||||
#Reset
|
|
||||||
try:
|
|
||||||
sess.close()
|
|
||||||
except: pass
|
|
||||||
rnd = np.random.RandomState(1)
|
|
||||||
tf.reset_default_graph()
|
|
||||||
sess = tf.InteractiveSession()
|
|
||||||
|
|
||||||
def get_data(data_split):
|
|
||||||
# load a small portion of mnist data
|
|
||||||
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=False)
|
|
||||||
print("Data shape : ", datasets.train.dim_data, "/ Label shape : ", datasets.train.dim_target)
|
|
||||||
[print("Nb samples : ", d.num_examples) for d in datasets]
|
|
||||||
return datasets.train, datasets.validation, datasets.test
|
|
||||||
|
|
||||||
#Model
|
|
||||||
# FC : reshape = True
|
|
||||||
def g_logits(x,y, name='model'):
|
|
||||||
with tf.variable_scope(name):
|
|
||||||
h1 = layers.fully_connected(x, 300)
|
|
||||||
logits = layers.fully_connected(h1, int(y.shape[1]))
|
|
||||||
return logits
|
|
||||||
|
|
||||||
#### Hyper-parametres ####
|
|
||||||
n_hyper_iterations = 10
|
|
||||||
T = 10 # Number of inner iterations
|
|
||||||
rev_it =10
|
|
||||||
hp_lr = 0.02
|
|
||||||
##########################
|
|
||||||
|
|
||||||
#MNIST
|
|
||||||
#x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
|
|
||||||
#y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
|
|
||||||
#logits = g_logits(x, y)
|
|
||||||
|
|
||||||
#CNN : reshape = False
|
|
||||||
x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
|
|
||||||
y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
|
|
||||||
|
|
||||||
logits = butil.cnn(x,y)
|
|
||||||
|
|
||||||
train_set, validation_set, test_set = get_data(data_split=(.1, .1,))
|
|
||||||
|
|
||||||
probX = far.get_hyperparameter('probX', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9))
|
|
||||||
probY = far.get_hyperparameter('probY', initializer=0.1, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 0.9))
|
|
||||||
|
|
||||||
#lr = far.get_hyperparameter('lr', initializer=1e-4, constraint=lambda t: tf.maximum(tf.minimum(t, 1e-4), 1e-4))
|
|
||||||
#mu = far.get_hyperparameter('mu', initializer=0.9, constraint=lambda t: tf.maximum(tf.minimum(t, 0.9), 0.9))
|
|
||||||
|
|
||||||
#probX, probY = 0.5, 0.5
|
|
||||||
#policy = [('TranslateX', probX, 8), ('TranslateY', probY, 8)]
|
|
||||||
policy = [('TranslateX', probX, 8), ('FlipUD', probY, 8)]
|
|
||||||
print('Hyp :',far.utils.hyperparameters(scope=None))
|
|
||||||
|
|
||||||
#butil.viz_data(train_set, aug_policy= policy)
|
|
||||||
#print('Data sampled !')
|
|
||||||
|
|
||||||
#Ajout artificiel des transfo a la loss juste pour qu il soit compter dans la dynamique du graph
|
|
||||||
probX_loss = tf.sigmoid(probX)
|
|
||||||
probY_loss = tf.sigmoid(probY)
|
|
||||||
|
|
||||||
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
|
|
||||||
L = tf.reduce_mean(probX_loss*probY_loss*ce)
|
|
||||||
E = tf.reduce_mean(ce)
|
|
||||||
|
|
||||||
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
|
|
||||||
|
|
||||||
inner_optimizer = far.AdamOptimizer()
|
|
||||||
outer_optimizer = tf.train.AdamOptimizer(hp_lr)
|
|
||||||
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
|
|
||||||
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)
|
|
||||||
|
|
||||||
train_set_supplier = train_set.create_supplier(x, y, batch_size=256, aug_policy=policy) # stochastic GD
|
|
||||||
validation_set_supplier = validation_set.create_supplier(x, y)
|
|
||||||
|
|
||||||
#print(train_set.dim_data,validation_set.dim_data)
|
|
||||||
|
|
||||||
his_params = []
|
|
||||||
|
|
||||||
tf.global_variables_initializer().run()
|
|
||||||
|
|
||||||
butil.viz_data(train_set, fig_name= 'Start_sample',aug_policy= policy)
|
|
||||||
print('Data sampled !')
|
|
||||||
|
|
||||||
for hyt in range(n_hyper_iterations):
|
|
||||||
hyper_step(T,
|
|
||||||
inner_objective_feed_dicts=train_set_supplier,
|
|
||||||
outer_objective_feed_dicts=validation_set_supplier,
|
|
||||||
_skip_hyper_ts=True)
|
|
||||||
res = sess.run(far.hyperparameters()) + [L.eval(train_set_supplier()),
|
|
||||||
E.eval(validation_set_supplier()),
|
|
||||||
accuracy.eval(train_set_supplier()),
|
|
||||||
accuracy.eval(validation_set_supplier())]
|
|
||||||
his_params.append(res)
|
|
||||||
|
|
||||||
butil.viz_data(train_set, fig_name= 'Train_sample_{}'.format(hyt),aug_policy= policy)
|
|
||||||
print('Data sampled !')
|
|
||||||
|
|
||||||
print('Hyper-it :',hyt,'/',n_hyper_iterations)
|
|
||||||
print('inner:', L.eval(train_set_supplier()))
|
|
||||||
print('outer:', E.eval(validation_set_supplier()))
|
|
||||||
print('training accuracy:', res[4])
|
|
||||||
print('validation accuracy:', res[5])
|
|
||||||
print('Transformation : ProbX -',res[0],'/ProbY -',res[1])
|
|
||||||
#print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
|
|
||||||
print('-'*50)
|
|
||||||
|
|
||||||
test_set_supplier = test_set.create_supplier(x, y)
|
|
||||||
print('Test accuracy:',accuracy.eval(test_set_supplier()))
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
|
|
||||||
ax[0].set_title('ProbX')
|
|
||||||
ax[0].plot([e[0] for e in his_params])
|
|
||||||
|
|
||||||
ax[1].set_title('ProbY')
|
|
||||||
ax[1].plot([e[1] for e in his_params])
|
|
||||||
|
|
||||||
ax[2].set_title('Tr. and val. errors')
|
|
||||||
ax[2].plot([e[2] for e in his_params])
|
|
||||||
ax[2].plot([e[3] for e in his_params])
|
|
||||||
|
|
||||||
ax[3].set_title('Tr. and val. acc')
|
|
||||||
ax[3].plot([e[4] for e in his_params])
|
|
||||||
ax[3].plot([e[5] for e in his_params])
|
|
||||||
|
|
||||||
plt.savefig('res_cnn_aug_H{}_I{}'.format(n_hyper_iterations,T))
|
|
|
@ -1,133 +0,0 @@
|
||||||
#https://github.com/lucfra/FAR-HO/blob/master/far_ho/examples/autoMLDemos/Far-HO%20Demo%2C%20AutoML%202018%2C%20ICML%20workshop.ipynb
|
|
||||||
import warnings
|
|
||||||
warnings.filterwarnings("ignore")
|
|
||||||
|
|
||||||
import os
|
|
||||||
import numpy as np
|
|
||||||
import tensorflow as tf
|
|
||||||
import tensorflow.contrib.layers as layers
|
|
||||||
import far_ho as far
|
|
||||||
import far_ho.examples as far_ex
|
|
||||||
|
|
||||||
tf.logging.set_verbosity(tf.logging.ERROR)
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
#import blue_utils as butil
|
|
||||||
|
|
||||||
#Reset
|
|
||||||
try:
|
|
||||||
sess.close()
|
|
||||||
except: pass
|
|
||||||
rnd = np.random.RandomState(1)
|
|
||||||
tf.reset_default_graph()
|
|
||||||
sess = tf.InteractiveSession()
|
|
||||||
|
|
||||||
def get_data(data_split):
|
|
||||||
# load a small portion of mnist data
|
|
||||||
datasets = far_ex.mnist(data_root_folder=os.path.join(os.getcwd(), 'MNIST_DATA'), partitions=data_split, reshape=True)
|
|
||||||
print("Data shape : ", datasets.train.dim_data, " / Label shape : ", datasets.train.dim_target)
|
|
||||||
[print("Nb samples : ", d.num_examples) for d in datasets]
|
|
||||||
return datasets.train, datasets.validation, datasets.test
|
|
||||||
|
|
||||||
#Model
|
|
||||||
# FC : reshape = True
|
|
||||||
def g_logits(x,y, name='model'):
|
|
||||||
with tf.variable_scope(name):
|
|
||||||
h1 = layers.fully_connected(x, 300)
|
|
||||||
logits = layers.fully_connected(h1, int(y.shape[1]))
|
|
||||||
return logits
|
|
||||||
|
|
||||||
#### Hyper-parametres ####
|
|
||||||
n_hyper_iterations = 90
|
|
||||||
T = 20 # Number of inner iterations
|
|
||||||
rev_it =10
|
|
||||||
hp_lr = 0.1
|
|
||||||
epochs =10
|
|
||||||
batch_size = 256
|
|
||||||
##########################
|
|
||||||
|
|
||||||
#MNIST
|
|
||||||
x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
|
|
||||||
y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
|
|
||||||
logits = g_logits(x, y)
|
|
||||||
|
|
||||||
#CNN : reshape = False
|
|
||||||
#x = tf.placeholder(dtype=tf.float32, shape=[None,28,28,1], name='x')
|
|
||||||
#y = tf.placeholder(dtype=tf.float32, shape=[None,10], name='y')
|
|
||||||
|
|
||||||
#logits = butil.cnn(x,y)
|
|
||||||
|
|
||||||
train_set, validation_set, test_set = get_data(data_split=(.6, .3,))
|
|
||||||
|
|
||||||
#butil.viz_data(train_set)
|
|
||||||
|
|
||||||
# lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
|
|
||||||
lr = far.get_hyperparameter('lr', initializer=1e-2, constraint=lambda t: tf.maximum(tf.minimum(t, 0.1), 1.e-7))
|
|
||||||
mu = far.get_hyperparameter('mu', initializer=0.95, constraint=lambda t: tf.maximum(tf.minimum(t, .99), 1.e-5))
|
|
||||||
#rho = far.get_hyperparameter('rho', initializer=0.00001, constraint=lambda t: tf.maximum(tf.minimum(t, 0.01), 0.))
|
|
||||||
|
|
||||||
|
|
||||||
ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
|
|
||||||
L = tf.reduce_mean(ce) #+ rho*tf.add_n([tf.reduce_sum(w**2) for w in tf.trainable_variables()]) #Retirer la seconde partie de la loss quand HP inutiles
|
|
||||||
E = tf.reduce_mean(ce)
|
|
||||||
|
|
||||||
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(logits, 1)), tf.float32))
|
|
||||||
|
|
||||||
inner_optimizer = far.MomentumOptimizer(lr, mu)
|
|
||||||
#inner_optimizer = far.GradientDescentOptimizer(lr)
|
|
||||||
outer_optimizer = tf.train.AdamOptimizer(hp_lr)
|
|
||||||
hyper_method = far.ReverseHG().truncated(reverse_iterations=rev_it)
|
|
||||||
hyper_step = far.HyperOptimizer(hyper_method).minimize(E, outer_optimizer, L, inner_optimizer)#, global_step=tf.train.get_or_create_step())
|
|
||||||
|
|
||||||
train_set_supplier = train_set.create_supplier(x, y, batch_size=batch_size)#, epochs=1) # stochastic GD
|
|
||||||
validation_set_supplier = validation_set.create_supplier(x, y)
|
|
||||||
|
|
||||||
|
|
||||||
print('Hyper iterations par epochs',int(train_set.num_examples/batch_size*epochs/T))
|
|
||||||
|
|
||||||
his_params = []
|
|
||||||
|
|
||||||
tf.global_variables_initializer().run()
|
|
||||||
|
|
||||||
for hyt in range(n_hyper_iterations):
|
|
||||||
hyper_step(T,
|
|
||||||
inner_objective_feed_dicts=train_set_supplier,
|
|
||||||
outer_objective_feed_dicts=validation_set_supplier,
|
|
||||||
_skip_hyper_ts=False)
|
|
||||||
res = sess.run(far.hyperparameters()) + [0, L.eval(train_set_supplier()),
|
|
||||||
E.eval(validation_set_supplier()),
|
|
||||||
accuracy.eval(train_set_supplier()),
|
|
||||||
accuracy.eval(validation_set_supplier())]
|
|
||||||
|
|
||||||
his_params.append(res)
|
|
||||||
|
|
||||||
print('Hyper-it :',hyt,'/',n_hyper_iterations)
|
|
||||||
print('inner:', res[3])
|
|
||||||
print('outer:', res[4])
|
|
||||||
print('training accuracy:', res[5])
|
|
||||||
print('validation accuracy:', res[6])
|
|
||||||
#print('learning rate', lr.eval(), 'momentum', mu.eval(), 'l2 coefficient', rho.eval())
|
|
||||||
print('-'*50)
|
|
||||||
|
|
||||||
test_set_supplier = test_set.create_supplier(x, y)
|
|
||||||
print('Test accuracy:',accuracy.eval(test_set_supplier()))
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(ncols=4, figsize=(15, 3))
|
|
||||||
ax[0].set_title('Learning rate')
|
|
||||||
ax[0].plot([e[0] for e in his_params])
|
|
||||||
|
|
||||||
ax[1].set_title('Momentum factor')
|
|
||||||
ax[1].plot([e[1] for e in his_params])
|
|
||||||
|
|
||||||
#ax[2].set_title('L2 regulariz.')
|
|
||||||
#ax[2].plot([e[2] for e in his_params])
|
|
||||||
ax[2].set_title('Tr. and val. acc')
|
|
||||||
ax[2].plot([e[5] for e in his_params])
|
|
||||||
ax[2].plot([e[6] for e in his_params])
|
|
||||||
|
|
||||||
ax[3].set_title('Tr. and val. errors')
|
|
||||||
ax[3].plot([e[3] for e in his_params])
|
|
||||||
ax[3].plot([e[4] for e in his_params])
|
|
||||||
|
|
||||||
plt.savefig('resultats/res_fc_H{}_I{}'.format(n_hyper_iterations,T))
|
|
||||||
#plt.savefig('resultats/res_fc_H{}_I{}_noHyp'.format(n_hyper_iterations,T))
|
|
|
@ -1,5 +0,0 @@
|
||||||
venv/
|
|
||||||
__pycache__
|
|
||||||
data/
|
|
||||||
log/
|
|
||||||
.vscode/
|
|
Binary file not shown.
|
@ -1,33 +0,0 @@
|
||||||
# Gradient Descent: The Ultimate Optimizer
|
|
||||||
|
|
||||||
[](https://github.com/ambv/black)
|
|
||||||
|
|
||||||
| ⚠️ WARNING: THIS IS NOT MY WORK ⚠️ |
|
|
||||||
| --- |
|
|
||||||
|
|
||||||
This repository contains the paper and code to the paper [Gradient Descent:
|
|
||||||
The Ultimate Optimizer](https://arxiv.org/abs/1909.13371).
|
|
||||||
|
|
||||||
I couldn't find the code (which is found in the appendix at the end of the
|
|
||||||
paper) anywhere on the web. What I present here is the code of the paper with
|
|
||||||
instructions on how to set it up.
|
|
||||||
|
|
||||||
Getting the code in a runnable state required some fixes on my part so the
|
|
||||||
code might be slightly different than that presented in the paper.
|
|
||||||
|
|
||||||
## Set up
|
|
||||||
|
|
||||||
```sh
|
|
||||||
git clone https://github.com/Rainymood/Gradient-Descent-The-Ultimate-Optimizer
|
|
||||||
cd Gradient-Descent-The-Ultimate-Optimizer
|
|
||||||
virtualenv -p python3 venv
|
|
||||||
source venv/bin/activate
|
|
||||||
pip install -r requirements.txt
|
|
||||||
python main.py
|
|
||||||
```
|
|
||||||
|
|
||||||
When you are done you can exit the virtualenv with
|
|
||||||
|
|
||||||
```shell
|
|
||||||
deactivate
|
|
||||||
```
|
|
|
@ -1,244 +0,0 @@
|
||||||
from hyperopt import *
|
|
||||||
#from hyperopt_v2 import *
|
|
||||||
|
|
||||||
import torchvision.transforms.functional as TF
|
|
||||||
import torchvision.transforms as T
|
|
||||||
|
|
||||||
#from scipy import ndimage
|
|
||||||
import kornia
|
|
||||||
|
|
||||||
import random
|
|
||||||
|
|
||||||
|
|
||||||
class MNIST_FullyConnected_Augmented(Optimizable):
|
|
||||||
"""
|
|
||||||
A fully-connected NN for the MNIST task. This is Optimizable but not itself
|
|
||||||
an optimizer.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, num_inp, num_hid, num_out, optimizer, device = torch.device('cuda')):
|
|
||||||
self.device = device
|
|
||||||
#print(self.device)
|
|
||||||
parameters = {
|
|
||||||
"w1": torch.zeros(num_inp, num_hid, device=self.device).t(),
|
|
||||||
"b1": torch.zeros(num_hid, device=self.device).t(),
|
|
||||||
"w2": torch.zeros(num_hid, num_out, device=self.device).t(),
|
|
||||||
"b2": torch.zeros(num_out, device=self.device).t(),
|
|
||||||
|
|
||||||
#Data augmentation
|
|
||||||
"prob": torch.tensor(0.5, device=self.device),
|
|
||||||
"mag": torch.tensor(180.0, device=self.device),
|
|
||||||
}
|
|
||||||
super().__init__(parameters, optimizer)
|
|
||||||
|
|
||||||
def initialize(self):
|
|
||||||
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
|
|
||||||
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
|
|
||||||
self.optimizer.initialize()
|
|
||||||
#print(self.device)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
"""Compute a prediction."""
|
|
||||||
#print("Prob:",self.parameters["prob"].item())
|
|
||||||
if random.random() < self.parameters["prob"]:
|
|
||||||
#angle = 45
|
|
||||||
#x = TF.rotate(x, angle)
|
|
||||||
#print(self.device)
|
|
||||||
#x = F.linear(x, torch.ones(28*28, 28*28, device=self.device).t()*self.parameters["mag"], bias=None)
|
|
||||||
x = x + self.parameters["mag"]
|
|
||||||
|
|
||||||
x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
|
|
||||||
x = torch.tanh(x)
|
|
||||||
x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
|
|
||||||
x = torch.tanh(x)
|
|
||||||
x = F.log_softmax(x, dim=1)
|
|
||||||
return x
|
|
||||||
|
|
||||||
def adjust(self):
|
|
||||||
self.optimizer.adjust(self.parameters)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "mnist_FC_augmented / " + str(self.optimizer)
|
|
||||||
|
|
||||||
class LeNet(Optimizable, nn.Module):
|
|
||||||
def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
|
|
||||||
nn.Module.__init__(self)
|
|
||||||
self.device = device
|
|
||||||
parameters = {
|
|
||||||
"w1": torch.zeros(20, num_inp, 5, 5, device=self.device),
|
|
||||||
"b1": torch.zeros(20, device=self.device),
|
|
||||||
"w2": torch.zeros(50, 20, 5, 5, device=self.device),
|
|
||||||
"b2": torch.zeros(50, device=self.device),
|
|
||||||
"w3": torch.zeros(500,4*4*50, device=self.device),
|
|
||||||
"b3": torch.zeros(500, device=self.device),
|
|
||||||
"w4": torch.zeros(10, 500, device=self.device),
|
|
||||||
"b4": torch.zeros(10, device=self.device),
|
|
||||||
|
|
||||||
#Data augmentation
|
|
||||||
"prob": torch.tensor(1.0, device=self.device),
|
|
||||||
"mag": torch.tensor(180.0, device=self.device),
|
|
||||||
}
|
|
||||||
super().__init__(parameters, optimizer)
|
|
||||||
|
|
||||||
def initialize(self):
|
|
||||||
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
|
|
||||||
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
|
|
||||||
nn.init.kaiming_uniform_(self.parameters["w3"], a=math.sqrt(5))
|
|
||||||
nn.init.kaiming_uniform_(self.parameters["w4"], a=math.sqrt(5))
|
|
||||||
self.optimizer.initialize()
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
|
|
||||||
if random.random() < self.parameters["prob"]:
|
|
||||||
|
|
||||||
batch_size = x.shape[0]
|
|
||||||
# create transformation (rotation)
|
|
||||||
alpha = self.parameters["mag"] # in degrees
|
|
||||||
angle = torch.ones(batch_size, device=self.device) * alpha
|
|
||||||
|
|
||||||
# define the rotation center
|
|
||||||
center = torch.ones(batch_size, 2, device=self.device)
|
|
||||||
center[..., 0] = x.shape[3] / 2 # x
|
|
||||||
center[..., 1] = x.shape[2] / 2 # y
|
|
||||||
|
|
||||||
#print(x.shape, center)
|
|
||||||
# define the scale factor
|
|
||||||
scale = torch.ones(batch_size, device=self.device)
|
|
||||||
|
|
||||||
# compute the transformation matrix
|
|
||||||
M = kornia.get_rotation_matrix2d(center, angle, scale)
|
|
||||||
|
|
||||||
# apply the transformation to original image
|
|
||||||
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
|
|
||||||
|
|
||||||
#print("Start Shape ", x.shape)
|
|
||||||
out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.max_pool2d(out, 2)
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.max_pool2d(out, 2)
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = out.view(out.size(0), -1)
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
return F.log_softmax(out, dim=1)
|
|
||||||
|
|
||||||
def adjust(self):
|
|
||||||
self.optimizer.adjust(self.parameters)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "mnist_CNN_augmented / " + str(self.optimizer)
|
|
||||||
|
|
||||||
class LeNet_v2(Optimizable, nn.Module):
|
|
||||||
def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
|
|
||||||
|
|
||||||
nn.Module.__init__(self)
|
|
||||||
self.device = device
|
|
||||||
self.conv1 = nn.Conv2d(num_inp, 20, 5, 1)
|
|
||||||
self.conv2 = nn.Conv2d(20, 50, 5, 1)
|
|
||||||
#self.fc1 = nn.Linear(4*4*50, 500)
|
|
||||||
self.fc1 = nn.Linear(1250, 500)
|
|
||||||
self.fc2 = nn.Linear(500, 10)
|
|
||||||
|
|
||||||
#print(self.conv1.weight)
|
|
||||||
parameters = {
|
|
||||||
"w1": self.conv1.weight,
|
|
||||||
"b1": self.conv1.bias,
|
|
||||||
"w2": self.conv2.weight,
|
|
||||||
"b2": self.conv2.bias,
|
|
||||||
"w3": self.fc1.weight,
|
|
||||||
"b3": self.fc1.bias,
|
|
||||||
"w4": self.fc2.weight,
|
|
||||||
"b4": self.fc2.bias,
|
|
||||||
|
|
||||||
#Data augmentation
|
|
||||||
"prob": torch.tensor(0.5, device=self.device),
|
|
||||||
"mag": torch.tensor(1.0, device=self.device),
|
|
||||||
}
|
|
||||||
Optimizable.__init__(self, parameters, optimizer)
|
|
||||||
|
|
||||||
'''
|
|
||||||
def forward(self, x): #Sature la memoire ???
|
|
||||||
x = F.relu(self.conv1(x))
|
|
||||||
x = F.max_pool2d(x, 2, 2)
|
|
||||||
x = F.relu(self.conv2(x))
|
|
||||||
x = F.max_pool2d(x, 2, 2)
|
|
||||||
#x = x.view(-1, 4*4*50)
|
|
||||||
x = x.view(x.size(0), -1)
|
|
||||||
x = F.relu(self.fc1(x))
|
|
||||||
x = self.fc2(x)
|
|
||||||
return F.log_softmax(x, dim=1)
|
|
||||||
'''
|
|
||||||
def forward(self, x):
|
|
||||||
|
|
||||||
if random.random() < self.parameters["prob"].item():
|
|
||||||
#print(self.parameters["prob"])
|
|
||||||
#x = [T.ToTensor()(
|
|
||||||
# TF.affine(img=T.ToPILImage()(im), angle=self.parameters["mag"], translate=(0,0), scale=1, shear=0, resample=0, fillcolor=None))
|
|
||||||
# for im in torch.unbind(x,dim=0)]
|
|
||||||
#x = torch.stack(x,dim=0)
|
|
||||||
|
|
||||||
#x = [ndimage.rotate(im, self.parameters["mag"], reshape=False)
|
|
||||||
# for im in torch.unbind(x,dim=0)]
|
|
||||||
#x = torch.stack(x,dim=0)
|
|
||||||
|
|
||||||
#x = [im + self.parameters["mag"]
|
|
||||||
# for im in torch.unbind(x,dim=0)]
|
|
||||||
#x = torch.stack(x,dim=0)
|
|
||||||
|
|
||||||
batch_size = x.shape[0]
|
|
||||||
# create transformation (rotation)
|
|
||||||
alpha = self.parameters["mag"] * 180 # in degrees
|
|
||||||
angle = torch.ones(batch_size, device=self.device) * alpha
|
|
||||||
|
|
||||||
# define the rotation center
|
|
||||||
center = torch.ones(batch_size, 2, device=self.device)
|
|
||||||
center[..., 0] = x.shape[3] / 2 # x
|
|
||||||
center[..., 1] = x.shape[2] / 2 # y
|
|
||||||
|
|
||||||
#print(x.shape, center)
|
|
||||||
# define the scale factor
|
|
||||||
scale = torch.ones(batch_size, device=self.device)
|
|
||||||
|
|
||||||
# compute the transformation matrix
|
|
||||||
M = kornia.get_rotation_matrix2d(center, angle, scale)
|
|
||||||
|
|
||||||
# apply the transformation to original image
|
|
||||||
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
|
|
||||||
|
|
||||||
#print("Start Shape ", x.shape)
|
|
||||||
out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.max_pool2d(out, 2)
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.max_pool2d(out, 2)
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = out.view(out.size(0), -1)
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
return F.log_softmax(out, dim=1)
|
|
||||||
|
|
||||||
def initialize(self):
|
|
||||||
self.optimizer.initialize()
|
|
||||||
|
|
||||||
def adjust(self):
|
|
||||||
self.optimizer.adjust(self.parameters)
|
|
||||||
|
|
||||||
def adjust_val(self):
|
|
||||||
self.optimizer.adjust_val(self.parameters)
|
|
||||||
|
|
||||||
def eval(self):
|
|
||||||
self.parameters['prob']=torch.tensor(0.0, device=self.device)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "mnist_CNN_augmented / " + str(self.optimizer)
|
|
|
@ -1,52 +0,0 @@
|
||||||
import torch
|
|
||||||
from torch.utils.data import Dataset, DataLoader
|
|
||||||
from torchvision import transforms
|
|
||||||
import torchvision.transforms.functional as TF
|
|
||||||
|
|
||||||
class MNIST_aug(Dataset):
|
|
||||||
|
|
||||||
training_file = 'training.pt'
|
|
||||||
test_file = 'test.pt'
|
|
||||||
classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four',
|
|
||||||
'5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.images = [TF.to_pil_image(x) for x in torch.ByteTensor(10, 3, 48, 48)]
|
|
||||||
self.set_stage(0) # initial stage
|
|
||||||
|
|
||||||
def __getitem__(self, index):
|
|
||||||
image = self.images[index]
|
|
||||||
|
|
||||||
# Just apply your transformations here
|
|
||||||
image = self.crop(image)
|
|
||||||
x = TF.to_tensor(image)
|
|
||||||
return x
|
|
||||||
|
|
||||||
def set_stage(self, stage):
|
|
||||||
if stage == 0:
|
|
||||||
print('Using (32, 32) crops')
|
|
||||||
self.crop = transforms.RandomCrop((32, 32))
|
|
||||||
elif stage == 1:
|
|
||||||
print('Using (28, 28) crops')
|
|
||||||
self.crop = transforms.RandomCrop((28, 28))
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.images)
|
|
||||||
|
|
||||||
|
|
||||||
dataset = MyData()
|
|
||||||
loader = DataLoader(dataset,
|
|
||||||
batch_size=2,
|
|
||||||
num_workers=2,
|
|
||||||
shuffle=True)
|
|
||||||
|
|
||||||
for batch_idx, data in enumerate(loader):
|
|
||||||
print('Batch idx {}, data shape {}'.format(
|
|
||||||
batch_idx, data.shape))
|
|
||||||
|
|
||||||
loader.dataset.set_stage(1)
|
|
||||||
|
|
||||||
for batch_idx, data in enumerate(loader):
|
|
||||||
print('Batch idx {}, data shape {}'.format(
|
|
||||||
batch_idx, data.shape))
|
|
||||||
|
|
|
@ -1,150 +0,0 @@
|
||||||
#from hyperopt import *
|
|
||||||
from hyperopt_v2 import *
|
|
||||||
|
|
||||||
import torchvision.transforms.functional as TF
|
|
||||||
import torchvision.transforms as T
|
|
||||||
|
|
||||||
#from scipy import ndimage
|
|
||||||
import kornia
|
|
||||||
|
|
||||||
import random
|
|
||||||
|
|
||||||
|
|
||||||
class LeNet_v3(nn.Module):
|
|
||||||
def __init__(self, num_inp, num_out):
|
|
||||||
super(LeNet_v3, self).__init__()
|
|
||||||
self.params = nn.ParameterDict({
|
|
||||||
'w1': nn.Parameter(torch.zeros(20, num_inp, 5, 5)),
|
|
||||||
'b1': nn.Parameter(torch.zeros(20)),
|
|
||||||
'w2': nn.Parameter(torch.zeros(50, 20, 5, 5)),
|
|
||||||
'b2': nn.Parameter(torch.zeros(50)),
|
|
||||||
'w3': nn.Parameter(torch.zeros(500,4*4*50)),
|
|
||||||
'b3': nn.Parameter(torch.zeros(500)),
|
|
||||||
'w4': nn.Parameter(torch.zeros(10, 500)),
|
|
||||||
'b4': nn.Parameter(torch.zeros(10))
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
def initialize(self):
|
|
||||||
nn.init.kaiming_uniform_(self.params["w1"], a=math.sqrt(5))
|
|
||||||
nn.init.kaiming_uniform_(self.params["w2"], a=math.sqrt(5))
|
|
||||||
nn.init.kaiming_uniform_(self.params["w3"], a=math.sqrt(5))
|
|
||||||
nn.init.kaiming_uniform_(self.params["w4"], a=math.sqrt(5))
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
#print("Start Shape ", x.shape)
|
|
||||||
out = F.relu(F.conv2d(input=x, weight=self.params["w1"], bias=self.params["b1"]))
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.max_pool2d(out, 2)
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.relu(F.conv2d(input=out, weight=self.params["w2"], bias=self.params["b2"]))
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.max_pool2d(out, 2)
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = out.view(out.size(0), -1)
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.relu(F.linear(out, self.params["w3"], self.params["b3"]))
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
out = F.linear(out, self.params["w4"], self.params["b4"])
|
|
||||||
#print("Shape ", out.shape)
|
|
||||||
return F.log_softmax(out, dim=1)
|
|
||||||
|
|
||||||
|
|
||||||
def print_grad_fn(self):
|
|
||||||
for n, p in self.params.items():
|
|
||||||
print(n, p.grad_fn)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "mnist_CNN_augmented / "
|
|
||||||
|
|
||||||
class Data_aug(nn.Module):
|
|
||||||
def __init__(self):
|
|
||||||
super(Data_aug, self).__init__()
|
|
||||||
self.data_augmentation = True
|
|
||||||
self.params = nn.ParameterDict({
|
|
||||||
"prob": nn.Parameter(torch.tensor(0.5)),
|
|
||||||
"mag": nn.Parameter(torch.tensor(180.0))
|
|
||||||
})
|
|
||||||
|
|
||||||
#self.params["mag"].register_hook(print)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
|
|
||||||
if self.data_augmentation and self.training and random.random() < self.params["prob"]:
|
|
||||||
#print('Aug')
|
|
||||||
batch_size = x.shape[0]
|
|
||||||
# create transformation (rotation)
|
|
||||||
alpha = self.params["mag"] # in degrees
|
|
||||||
angle = torch.ones(batch_size, device=x.device) * alpha
|
|
||||||
|
|
||||||
# define the rotation center
|
|
||||||
center = torch.ones(batch_size, 2, device=x.device)
|
|
||||||
center[..., 0] = x.shape[3] / 2 # x
|
|
||||||
center[..., 1] = x.shape[2] / 2 # y
|
|
||||||
|
|
||||||
#print(x.shape, center)
|
|
||||||
# define the scale factor
|
|
||||||
scale = torch.ones(batch_size, device=x.device)
|
|
||||||
|
|
||||||
# compute the transformation matrix
|
|
||||||
M = kornia.get_rotation_matrix2d(center, angle, scale)
|
|
||||||
|
|
||||||
# apply the transformation to original image
|
|
||||||
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
|
|
||||||
|
|
||||||
return x
|
|
||||||
|
|
||||||
def eval(self):
|
|
||||||
self.params['prob']=torch.tensor(0.0, device=self.device)
|
|
||||||
nn.Module.eval(self)
|
|
||||||
|
|
||||||
def data_augmentation(self, mode=True):
|
|
||||||
self.data_augmentation=mode
|
|
||||||
|
|
||||||
def print_grad_fn(self):
|
|
||||||
for n, p in self.params.items():
|
|
||||||
print(n, p.grad_fn)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "Data_Augmenter / "
|
|
||||||
|
|
||||||
class Augmented_model(nn.Module):
|
|
||||||
def __init__(self, model, data_augmenter):
|
|
||||||
#self.model = model
|
|
||||||
#self.data_aug = data_augmenter
|
|
||||||
super(Augmented_model, self).__init__()#nn.Module.__init__(self)
|
|
||||||
#super().__init__()
|
|
||||||
self.mods = nn.ModuleDict({
|
|
||||||
'data_aug': data_augmenter,
|
|
||||||
'model': model
|
|
||||||
})
|
|
||||||
#for name, param in self.mods.named_parameters():
|
|
||||||
# print(name, type(param.data), param.size())
|
|
||||||
|
|
||||||
#params = self.mods.named_parameters() #self.parameters()
|
|
||||||
#parameters = [param for param in self.model.parameters()] + [param for param in self.data_aug.parameters()]
|
|
||||||
#Optimizable.__init__(self, params, optimizer)
|
|
||||||
|
|
||||||
def initialize(self):
|
|
||||||
self.mods['model'].initialize()
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
return self.mods['model'](self.mods['data_aug'](x))
|
|
||||||
|
|
||||||
#def adjust(self):
|
|
||||||
# self.optimizer.adjust(self) #Parametres des dict
|
|
||||||
|
|
||||||
def data_augmentation(self, mode=True):
|
|
||||||
self.mods['data_aug'].data_augmentation=mode
|
|
||||||
|
|
||||||
def begin(self):
|
|
||||||
for param in self.parameters():
|
|
||||||
param.requires_grad_() # keep gradient information…
|
|
||||||
param.retain_grad() # even if not a leaf…
|
|
||||||
|
|
||||||
def print_grad_fn(self):
|
|
||||||
for n, m in self.mods.items():
|
|
||||||
m.print_grad_fn()
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return str(self.mods['data_aug'])+ str(self.mods['model'])# + str(self.optimizer)
|
|
|
@ -1,5 +0,0 @@
|
||||||
digraph {
|
|
||||||
graph [size="12,12"]
|
|
||||||
node [align=left fontsize=12 height=0.2 ranksep=0.1 shape=box style=filled]
|
|
||||||
94296775052080 [label=NoneType fillcolor=darkolivegreen1]
|
|
||||||
}
|
|
|
@ -1,19 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|
||||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
|
||||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
|
||||||
<!-- Generated by graphviz version 2.40.1 (20161225.0304)
|
|
||||||
-->
|
|
||||||
<!-- Title: %3 Pages: 1 -->
|
|
||||||
<svg width="75pt" height="30pt"
|
|
||||||
viewBox="0.00 0.00 74.65 30.40" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
|
||||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 26.4)">
|
|
||||||
<title>%3</title>
|
|
||||||
<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-26.4 70.6472,-26.4 70.6472,4 -4,4"/>
|
|
||||||
<!-- 94296775052080 -->
|
|
||||||
<g id="node1" class="node">
|
|
||||||
<title>94296775052080</title>
|
|
||||||
<polygon fill="#caff70" stroke="#000000" points="66.4717,-22.6036 .1755,-22.6036 .1755,.2036 66.4717,.2036 66.4717,-22.6036"/>
|
|
||||||
<text text-anchor="middle" x="33.3236" y="-7.6" font-family="Times,serif" font-size="12.00" fill="#000000">NoneType</text>
|
|
||||||
</g>
|
|
||||||
</g>
|
|
||||||
</svg>
|
|
Before Width: | Height: | Size: 937 B |
|
@ -1,345 +0,0 @@
|
||||||
import math
|
|
||||||
import torch
|
|
||||||
import torchvision
|
|
||||||
import torch.nn as nn
|
|
||||||
import torch.nn.functional as F
|
|
||||||
import torch.optim as optim
|
|
||||||
|
|
||||||
|
|
||||||
class Optimizable():#nn.Module):
|
|
||||||
"""
|
|
||||||
This is the interface for anything that has parameters that need to be
|
|
||||||
optimized, somewhat like torch.nn.Model but with the right plumbing for
|
|
||||||
hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
|
|
||||||
interface which does not give us enough control about the detachments.)
|
|
||||||
Nominal operation of an Optimizable at the lowest level is as follows:
|
|
||||||
o = MyOptimizable(…)
|
|
||||||
o.initialize()
|
|
||||||
loop {
|
|
||||||
o.begin()
|
|
||||||
o.zero_grad()
|
|
||||||
loss = –compute loss function from parameters–
|
|
||||||
loss.backward()
|
|
||||||
o.adjust()
|
|
||||||
}
|
|
||||||
Optimizables recursively handle updates to their optimiz*ers*.
|
|
||||||
"""
|
|
||||||
#def __init__(self):
|
|
||||||
# super(Optimizable, self).__init__()
|
|
||||||
# self.parameters = nn.Parameter(torch.zeros(()))
|
|
||||||
|
|
||||||
def __init__(self, parameters, optimizer):
|
|
||||||
#super(Optimizable, self).__init__()
|
|
||||||
self.parameters = parameters # a dict mapping names to tensors
|
|
||||||
self.optimizer = optimizer # which must itself be Optimizable!
|
|
||||||
self.all_params_with_gradients = []
|
|
||||||
#self.device = device
|
|
||||||
|
|
||||||
def initialize(self):
|
|
||||||
"""Initialize parameters, e.g. with a Kaiming initializer."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def begin(self):
|
|
||||||
"""Enable gradient tracking on current parameters."""
|
|
||||||
self.all_params_with_gradients = [] #Reintialisation pour eviter surcharge de la memoire
|
|
||||||
for name, param in self.parameters.items():
|
|
||||||
#for param in self.parameters:
|
|
||||||
param.requires_grad_() # keep gradient information…
|
|
||||||
param.retain_grad() # even if not a leaf…
|
|
||||||
#param.to(self.device)
|
|
||||||
#if param.device == torch.device('cuda:0'):
|
|
||||||
# print(name, param.device)
|
|
||||||
self.all_params_with_gradients.append(param)
|
|
||||||
self.optimizer.begin()
|
|
||||||
|
|
||||||
def zero_grad(self):
|
|
||||||
""" Set all gradients to zero. """
|
|
||||||
for param in self.all_params_with_gradients:
|
|
||||||
#param = param.to(self.device)
|
|
||||||
param.grad = torch.zeros(param.shape, device=param.device)
|
|
||||||
self.optimizer.zero_grad()
|
|
||||||
|
|
||||||
""" Note: at this point you would probably call .backwards() on the loss
|
|
||||||
function. """
|
|
||||||
|
|
||||||
def adjust(self):
|
|
||||||
""" Update parameters """
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def print_grad_fn(self):
|
|
||||||
self.optimizer.print_grad_fn()
|
|
||||||
for n, p in self.parameters.items():
|
|
||||||
print(n," - ", p.grad_fn)
|
|
||||||
|
|
||||||
def param_grad(self):
|
|
||||||
return self.all_params_with_gradients
|
|
||||||
|
|
||||||
def param(self, param_name):
|
|
||||||
return self.parameters[param_name].item()
|
|
||||||
|
|
||||||
|
|
||||||
class MNIST_FullyConnected(Optimizable):
|
|
||||||
"""
|
|
||||||
A fully-connected NN for the MNIST task. This is Optimizable but not itself
|
|
||||||
an optimizer.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, num_inp, num_hid, num_out, optimizer):
|
|
||||||
parameters = {
|
|
||||||
"w1": torch.zeros(num_inp, num_hid).t(),
|
|
||||||
"b1": torch.zeros(num_hid).t(),
|
|
||||||
"w2": torch.zeros(num_hid, num_out).t(),
|
|
||||||
"b2": torch.zeros(num_out).t(),
|
|
||||||
}
|
|
||||||
super().__init__(parameters, optimizer)
|
|
||||||
|
|
||||||
def initialize(self):
|
|
||||||
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
|
|
||||||
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
|
|
||||||
self.optimizer.initialize()
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
"""Compute a prediction."""
|
|
||||||
x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
|
|
||||||
x = torch.tanh(x)
|
|
||||||
x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
|
|
||||||
x = torch.tanh(x)
|
|
||||||
x = F.log_softmax(x, dim=1)
|
|
||||||
return x
|
|
||||||
|
|
||||||
def adjust(self):
|
|
||||||
self.optimizer.adjust(self.parameters)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "mnist / " + str(self.optimizer)
|
|
||||||
|
|
||||||
|
|
||||||
class NoOpOptimizer(Optimizable):#, nn.Module):
|
|
||||||
"""
|
|
||||||
NoOpOptimizer sits on top of a stack, and does not affect what lies below.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
#super(Optimizable, self).__init__()
|
|
||||||
pass
|
|
||||||
|
|
||||||
def initialize(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def begin(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def zero_grad(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def adjust(self, params):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def adjust_val(self, params):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def print_grad_fn(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "static"
|
|
||||||
|
|
||||||
class Adam(Optimizable):
|
|
||||||
"""
|
|
||||||
A fully hyperoptimizable Adam optimizer
|
|
||||||
"""
|
|
||||||
|
|
||||||
def clamp(x):
|
|
||||||
return (x.tanh() + 1.0) / 2.0
|
|
||||||
|
|
||||||
def unclamp(y):
|
|
||||||
z = y * 2.0 - 1.0
|
|
||||||
return ((1.0 + z) / (1.0 - z)).log() / 2.0
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
alpha=0.001,
|
|
||||||
beta1=0.9,
|
|
||||||
beta2=0.999,
|
|
||||||
log_eps=-8.0,
|
|
||||||
optimizer=NoOpOptimizer(),
|
|
||||||
device = torch.device('cuda')
|
|
||||||
):
|
|
||||||
self.device = device
|
|
||||||
parameters = {
|
|
||||||
"alpha": torch.tensor(alpha, device=self.device),
|
|
||||||
"beta1": Adam.unclamp(torch.tensor(beta1, device=self.device)),
|
|
||||||
"beta2": Adam.unclamp(torch.tensor(beta2, device=self.device)),
|
|
||||||
"log_eps": torch.tensor(log_eps, device=self.device),
|
|
||||||
}
|
|
||||||
super().__init__(parameters, optimizer)
|
|
||||||
self.num_adjustments = 0
|
|
||||||
self.num_adjustments_val = 0
|
|
||||||
self.cache = {}
|
|
||||||
|
|
||||||
for name, param in parameters.items():
|
|
||||||
param.requires_grad_() # keep gradient information…
|
|
||||||
param.retain_grad() # even if not a leaf…
|
|
||||||
#param.to(self.device)
|
|
||||||
#if param.device == torch.device('cuda:0'):
|
|
||||||
# print(name, param.device)
|
|
||||||
|
|
||||||
def adjust(self, params): #Update param d'apprentissage
|
|
||||||
self.num_adjustments += 1
|
|
||||||
self.optimizer.adjust(self.parameters)
|
|
||||||
#print('Adam update')
|
|
||||||
t = self.num_adjustments
|
|
||||||
beta1 = Adam.clamp(self.parameters["beta1"])
|
|
||||||
beta2 = Adam.clamp(self.parameters["beta2"])
|
|
||||||
for name, param in params.items():
|
|
||||||
if name == "mag": continue
|
|
||||||
if name not in self.cache:
|
|
||||||
self.cache[name] = {
|
|
||||||
"m": torch.zeros(param.shape, device=self.device),
|
|
||||||
"v": torch.zeros(param.shape, device=self.device)
|
|
||||||
+ 10.0 ** self.parameters["log_eps"].data
|
|
||||||
# NOTE that we add a little ‘fudge factor' here because sqrt is not
|
|
||||||
# differentiable at exactly zero
|
|
||||||
}
|
|
||||||
#print(name, param.device)
|
|
||||||
g = param.grad.detach()
|
|
||||||
self.cache[name]["m"] = m = (
|
|
||||||
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
|
|
||||||
)
|
|
||||||
self.cache[name]["v"] = v = (
|
|
||||||
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
|
|
||||||
)
|
|
||||||
self.all_params_with_gradients.append(m)
|
|
||||||
self.all_params_with_gradients.append(v)
|
|
||||||
m_hat = m / (1.0 - beta1 ** float(t))
|
|
||||||
v_hat = v / (1.0 - beta2 ** float(t))
|
|
||||||
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
|
|
||||||
params[name] = param.detach() - self.parameters["alpha"] * dparam
|
|
||||||
#print(name)
|
|
||||||
|
|
||||||
def adjust_val(self, params): #Update param Transformations
|
|
||||||
self.num_adjustments_val += 1
|
|
||||||
self.optimizer.adjust_val(self.parameters)
|
|
||||||
#print('Adam update')
|
|
||||||
t = self.num_adjustments_val
|
|
||||||
beta1 = Adam.clamp(self.parameters["beta1"])
|
|
||||||
beta2 = Adam.clamp(self.parameters["beta2"])
|
|
||||||
for name, param in params.items():
|
|
||||||
if name != "mag": continue
|
|
||||||
if name not in self.cache:
|
|
||||||
self.cache[name] = {
|
|
||||||
"m": torch.zeros(param.shape, device=self.device),
|
|
||||||
"v": torch.zeros(param.shape, device=self.device)
|
|
||||||
+ 10.0 ** self.parameters["log_eps"].data
|
|
||||||
# NOTE that we add a little ‘fudge factor' here because sqrt is not
|
|
||||||
# differentiable at exactly zero
|
|
||||||
}
|
|
||||||
#print(name, param.device)
|
|
||||||
g = param.grad.detach()
|
|
||||||
self.cache[name]["m"] = m = (
|
|
||||||
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
|
|
||||||
)
|
|
||||||
self.cache[name]["v"] = v = (
|
|
||||||
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
|
|
||||||
)
|
|
||||||
self.all_params_with_gradients.append(m)
|
|
||||||
self.all_params_with_gradients.append(v)
|
|
||||||
m_hat = m / (1.0 - beta1 ** float(t))
|
|
||||||
v_hat = v / (1.0 - beta2 ** float(t))
|
|
||||||
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
|
|
||||||
params[name] = param.detach() - self.parameters["alpha"] * dparam
|
|
||||||
#print(name)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
|
|
||||||
'''
|
|
||||||
class SGD(Optimizable):
|
|
||||||
"""
|
|
||||||
A hyperoptimizable SGD
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, alpha=0.01, optimizer=NoOpOptimizer()):
|
|
||||||
parameters = {"alpha": torch.tensor(alpha)}
|
|
||||||
super().__init__(parameters, optimizer)
|
|
||||||
|
|
||||||
def adjust(self, params):
|
|
||||||
self.optimizer.adjust(self.parameters)
|
|
||||||
for name, param in params.items():
|
|
||||||
g = param.grad.detach()
|
|
||||||
params[name] = param.detach() - g * self.parameters["alpha"]
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "sgd(%f) / " % self.parameters["alpha"] + str(self.optimizer)
|
|
||||||
|
|
||||||
class SGDPerParam(Optimizable):
|
|
||||||
"""
|
|
||||||
Like above, but can be taught a separate step size for each parameter it
|
|
||||||
tunes.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, alpha=0.01, params=[], optimizer=NoOpOptimizer()):
|
|
||||||
parameters = {name + "_alpha": torch.tensor(alpha) for name in params}
|
|
||||||
super().__init__(parameters, optimizer)
|
|
||||||
|
|
||||||
def adjust(self, params):
|
|
||||||
self.optimizer.adjust(self.parameters)
|
|
||||||
for name, param in params.items():
|
|
||||||
g = param.grad.detach()
|
|
||||||
params[name] = param.detach() - g * self.parameters[name + "_alpha"]
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "sgd(%s) / " % str(
|
|
||||||
{k: t.item() for k, t in self.parameters.items()}
|
|
||||||
) + str(self.optimizer)
|
|
||||||
'''
|
|
||||||
'''
|
|
||||||
class AdamBaydin(Optimizable):
|
|
||||||
""" Same as above, but only optimizes the learning rate, treating the
|
|
||||||
remaining hyperparameters as constants. """
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
alpha=0.001,
|
|
||||||
beta1=0.9,
|
|
||||||
beta2=0.999,
|
|
||||||
log_eps=-8.0,
|
|
||||||
optimizer=NoOpOptimizer(),
|
|
||||||
):
|
|
||||||
parameters = {"alpha": torch.tensor(alpha)}
|
|
||||||
self.beta1 = beta1
|
|
||||||
self.beta2 = beta2
|
|
||||||
self.log_eps = log_eps
|
|
||||||
super().__init__(parameters, optimizer)
|
|
||||||
self.num_adjustments = 0
|
|
||||||
self.cache = {}
|
|
||||||
|
|
||||||
def adjust(self, params):
|
|
||||||
self.num_adjustments += 1
|
|
||||||
self.optimizer.adjust(self.parameters)
|
|
||||||
t = self.num_adjustments
|
|
||||||
beta1 = self.beta1
|
|
||||||
beta2 = self.beta2
|
|
||||||
for name, param in params.items():
|
|
||||||
if name not in self.cache:
|
|
||||||
self.cache[name] = {
|
|
||||||
"m": torch.zeros(param.shape),
|
|
||||||
"v": torch.zeros(param.shape) + 10.0 ** self.log_eps,
|
|
||||||
}
|
|
||||||
g = param.grad.detach()
|
|
||||||
self.cache[name]["m"] = m = (
|
|
||||||
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
|
|
||||||
)
|
|
||||||
self.cache[name]["v"] = v = (
|
|
||||||
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
|
|
||||||
)
|
|
||||||
self.all_params_with_gradients.append(m)
|
|
||||||
self.all_params_with_gradients.append(v)
|
|
||||||
m_hat = m / (1.0 - beta1 ** float(t))
|
|
||||||
v_hat = v / (1.0 - beta2 ** float(t))
|
|
||||||
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.log_eps)
|
|
||||||
params[name] = param.detach() - self.parameters["alpha"] * dparam
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
|
|
||||||
'''
|
|
|
@ -1,296 +0,0 @@
|
||||||
import math
|
|
||||||
import torch
|
|
||||||
import torchvision
|
|
||||||
import torch.nn as nn
|
|
||||||
import torch.nn.functional as F
|
|
||||||
from torch.optim.optimizer import Optimizer
|
|
||||||
|
|
||||||
class Optimizable():
|
|
||||||
"""
|
|
||||||
This is the interface for anything that has parameters that need to be
|
|
||||||
optimized, somewhat like torch.nn.Model but with the right plumbing for
|
|
||||||
hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
|
|
||||||
interface which does not give us enough control about the detachments.)
|
|
||||||
Nominal operation of an Optimizable at the lowest level is as follows:
|
|
||||||
o = MyOptimizable(…)
|
|
||||||
o.initialize()
|
|
||||||
loop {
|
|
||||||
o.begin()
|
|
||||||
o.zero_grad()
|
|
||||||
loss = –compute loss function from parameters–
|
|
||||||
loss.backward()
|
|
||||||
o.adjust()
|
|
||||||
}
|
|
||||||
Optimizables recursively handle updates to their optimiz*ers*.
|
|
||||||
"""
|
|
||||||
#def __init__(self):
|
|
||||||
# super(Optimizable, self).__init__()
|
|
||||||
# self.parameters = nn.Parameter(torch.zeros(()))
|
|
||||||
|
|
||||||
def __init__(self, parameters, optimizer):
|
|
||||||
self.params = parameters # a dict mapping names to tensors
|
|
||||||
self.optimizer = optimizer # which must itself be Optimizable!
|
|
||||||
self.all_params_with_gradients = []
|
|
||||||
#self.device = device
|
|
||||||
|
|
||||||
def initialize(self):
|
|
||||||
"""Initialize parameters, e.g. with a Kaiming initializer."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def begin(self):
|
|
||||||
"""Enable gradient tracking on current parameters."""
|
|
||||||
self.all_params_with_gradients = nn.ParameterList() #Reintialisation pour eviter surcharge de la memoire
|
|
||||||
print("Opti param :", type(self.params))
|
|
||||||
#for name, param in self.params:
|
|
||||||
if isinstance(self.params,dict): #Dict
|
|
||||||
for name, param in self.params:
|
|
||||||
param.requires_grad_() # keep gradient information…
|
|
||||||
param.retain_grad() # even if not a leaf…
|
|
||||||
self.all_params_with_gradients.append(param)
|
|
||||||
if isinstance(self.params,list): #List
|
|
||||||
for param in self.params:
|
|
||||||
param.requires_grad_() # keep gradient information…
|
|
||||||
param.retain_grad() # even if not a leaf…
|
|
||||||
self.all_params_with_gradients.append(param)
|
|
||||||
self.optimizer.begin()
|
|
||||||
|
|
||||||
def zero_grad(self):
|
|
||||||
""" Set all gradients to zero. """
|
|
||||||
for param in self.all_params_with_gradients:
|
|
||||||
param.grad = torch.zeros(param.shape, device=param.device)
|
|
||||||
self.optimizer.zero_grad()
|
|
||||||
|
|
||||||
""" Note: at this point you would probably call .backwards() on the loss
|
|
||||||
function. """
|
|
||||||
|
|
||||||
def adjust(self):
|
|
||||||
""" Update parameters """
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class NoOpOptimizer(Optimizable):#, nn.Module):
|
|
||||||
"""
|
|
||||||
NoOpOptimizer sits on top of a stack, and does not affect what lies below.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
#super(Optimizable, self).__init__()
|
|
||||||
pass
|
|
||||||
|
|
||||||
def initialize(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def begin(self):
|
|
||||||
#print("NoOpt begin")
|
|
||||||
pass
|
|
||||||
|
|
||||||
def zero_grad(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def adjust(self, params):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def step(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def print_grad_fn(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "static"
|
|
||||||
|
|
||||||
|
|
||||||
class SGD(Optimizer, nn.Module): #Eviter Optimizer
|
|
||||||
"""
|
|
||||||
A hyperoptimizable SGD
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, params, lr=0.01, height=0):
|
|
||||||
self.height=height
|
|
||||||
#params : a optimiser
|
|
||||||
#reste (defaults) param de l'opti
|
|
||||||
print('SGD - H', height)
|
|
||||||
nn.Module.__init__(self)
|
|
||||||
|
|
||||||
optim_keys = ('lr','') #A mettre dans Optimizable ? #'' pour eviter iteration dans la chaine de charactere...
|
|
||||||
'''
|
|
||||||
self_params = {"lr": torch.tensor(lr),
|
|
||||||
"momentum": 0,
|
|
||||||
"dampening":0,
|
|
||||||
"weight_decay":0,
|
|
||||||
"nesterov": False}
|
|
||||||
'''
|
|
||||||
#self_params = dict(lr=torch.tensor(lr),
|
|
||||||
# momentum=0, dampening=0, weight_decay=0, nesterov=False)
|
|
||||||
|
|
||||||
self_params = nn.ParameterDict({
|
|
||||||
"lr": nn.Parameter(torch.tensor(lr)),
|
|
||||||
"momentum": nn.Parameter(torch.tensor(0.0)),
|
|
||||||
"dampening": nn.Parameter(torch.tensor(0.0)),
|
|
||||||
"weight_decay": nn.Parameter(torch.tensor(0.0)),
|
|
||||||
})
|
|
||||||
|
|
||||||
for k in self_params.keys() & optim_keys:
|
|
||||||
self_params[k].requires_grad_() # keep gradient information…
|
|
||||||
self_params[k].retain_grad() # even if not a leaf…
|
|
||||||
#self_params[k].register_hook(print)
|
|
||||||
|
|
||||||
if height==0:
|
|
||||||
optimizer = NoOpOptimizer()
|
|
||||||
else:
|
|
||||||
#def dict_generator(): yield {k: self_params[k] for k in self_params.keys() & optim_keys}
|
|
||||||
#(dict for dict in {k: self_params[k] for k in self_params.keys() & optim_keys}) #Devrait mar
|
|
||||||
optimizer = SGD(params=(self_params[k]for k in self_params.keys() & optim_keys), lr=lr, height=height-1)
|
|
||||||
#optimizer.register_backward_hook(print)
|
|
||||||
|
|
||||||
self.optimizer = optimizer
|
|
||||||
#if(height==0):
|
|
||||||
# for n,p in params.items():
|
|
||||||
# print(n,p)
|
|
||||||
|
|
||||||
#Optimizable.__init__(self, self_params, optimizer)
|
|
||||||
|
|
||||||
#print(type(params))
|
|
||||||
#for p in params:
|
|
||||||
# print(type(p))
|
|
||||||
Optimizer.__init__(self, params, self_params)
|
|
||||||
|
|
||||||
for group in self.param_groups:
|
|
||||||
for p in group['params']:
|
|
||||||
print(type(p.data), p.size())
|
|
||||||
print('End SGD-H', height)
|
|
||||||
|
|
||||||
def begin(self):
|
|
||||||
for group in self.param_groups:
|
|
||||||
for p in group['params']:
|
|
||||||
#print(type(p.data), p.size())
|
|
||||||
p.requires_grad_() # keep gradient information…
|
|
||||||
p.retain_grad() # even if not a leaf…
|
|
||||||
#p.register_hook(lambda x: print(self.height, x.grad_fn))
|
|
||||||
|
|
||||||
self.optimizer.begin()
|
|
||||||
|
|
||||||
def print_grad_fn(self):
|
|
||||||
self.optimizer.print_grad_fn()
|
|
||||||
for group in self.param_groups:
|
|
||||||
for i, p in enumerate(group['params']):
|
|
||||||
print(self.height," - ", i, p.grad_fn)
|
|
||||||
|
|
||||||
#def adjust(self, params):
|
|
||||||
# self.optimizer.adjust(self.params)
|
|
||||||
# for name, param in params.items():
|
|
||||||
# g = param.grad.detach()
|
|
||||||
# params[name] = param.detach() - g * self.params["lr"]
|
|
||||||
|
|
||||||
def step(self):
|
|
||||||
"""Performs a single optimization step.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
closure (callable, optional): A closure that reevaluates the model
|
|
||||||
and returns the loss.
|
|
||||||
"""
|
|
||||||
print('SGD start')
|
|
||||||
self.optimizer.step()
|
|
||||||
|
|
||||||
for group in self.param_groups:
|
|
||||||
for i, p in enumerate(group['params']):
|
|
||||||
if p.grad is None:
|
|
||||||
continue
|
|
||||||
#d_p = p.grad.data
|
|
||||||
d_p = p.grad.detach()
|
|
||||||
|
|
||||||
#print(group['lr'])
|
|
||||||
p.data.add_(-group['lr'].item(), d_p)
|
|
||||||
#group['params'][i] = p.detach() - d_p * group['lr']
|
|
||||||
p.data-= group['lr']*d_p #Data ne pas utiliser perte info
|
|
||||||
|
|
||||||
for p in group['params']:
|
|
||||||
if p.grad is None:
|
|
||||||
print(p, p.grad)
|
|
||||||
continue
|
|
||||||
|
|
||||||
print("SGD end")
|
|
||||||
#return loss
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "sgd(%f) / " % self.params["lr"] + str(self.optimizer)
|
|
||||||
|
|
||||||
|
|
||||||
class Adam(Optimizable, nn.Module):
|
|
||||||
"""
|
|
||||||
A fully hyperoptimizable Adam optimizer
|
|
||||||
"""
|
|
||||||
|
|
||||||
def clamp(x):
|
|
||||||
return (x.tanh() + 1.0) / 2.0
|
|
||||||
|
|
||||||
def unclamp(y):
|
|
||||||
z = y * 2.0 - 1.0
|
|
||||||
return ((1.0 + z) / (1.0 - z)).log() / 2.0
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
alpha=0.001,
|
|
||||||
beta1=0.9,
|
|
||||||
beta2=0.999,
|
|
||||||
log_eps=-8.0,
|
|
||||||
optimizer=NoOpOptimizer(),
|
|
||||||
device = torch.device('cuda')
|
|
||||||
):
|
|
||||||
#super(Adam, self).__init__()
|
|
||||||
nn.Module.__init__(self)
|
|
||||||
self.device = device
|
|
||||||
params = nn.ParameterDict({
|
|
||||||
"alpha": nn.Parameter(torch.tensor(alpha, device=self.device)),
|
|
||||||
"beta1": nn.Parameter(Adam.unclamp(torch.tensor(beta1, device=self.device))),
|
|
||||||
"beta2": nn.Parameter(Adam.unclamp(torch.tensor(beta2, device=self.device))),
|
|
||||||
"log_eps": nn.Parameter(torch.tensor(log_eps, device=self.device)),
|
|
||||||
})
|
|
||||||
Optimizable.__init__(self, params, optimizer)
|
|
||||||
self.num_adjustments = 0
|
|
||||||
self.cache = {}
|
|
||||||
|
|
||||||
for name, param in params.items():
|
|
||||||
param.requires_grad_() # keep gradient information…
|
|
||||||
param.retain_grad() # even if not a leaf…
|
|
||||||
|
|
||||||
def adjust(self, params, pytorch_mod=False):
|
|
||||||
self.num_adjustments += 1
|
|
||||||
self.optimizer.adjust(self.params)
|
|
||||||
t = self.num_adjustments
|
|
||||||
beta1 = Adam.clamp(self.params["beta1"])
|
|
||||||
beta2 = Adam.clamp(self.params["beta2"])
|
|
||||||
|
|
||||||
updated_param = []
|
|
||||||
if pytorch_mod:
|
|
||||||
params = params.named_parameters(prefix='') #Changer nom d'input...
|
|
||||||
|
|
||||||
for name, param in params:
|
|
||||||
if name not in self.cache:
|
|
||||||
self.cache[name] = {
|
|
||||||
"m": torch.zeros(param.shape, device=self.device),
|
|
||||||
"v": torch.zeros(param.shape, device=self.device)
|
|
||||||
+ 10.0 ** self.params["log_eps"].data
|
|
||||||
# NOTE that we add a little ‘fudge factor' here because sqrt is not
|
|
||||||
# differentiable at exactly zero
|
|
||||||
}
|
|
||||||
#print(name, param.device)
|
|
||||||
g = param.grad.detach()
|
|
||||||
self.cache[name]["m"] = m = (
|
|
||||||
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
|
|
||||||
)
|
|
||||||
self.cache[name]["v"] = v = (
|
|
||||||
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
|
|
||||||
)
|
|
||||||
self.all_params_with_gradients.append(nn.Parameter(m)) #Risque de surcharger la memoire => Dict mieux ?
|
|
||||||
self.all_params_with_gradients.append(nn.Parameter(v))
|
|
||||||
m_hat = m / (1.0 - beta1 ** float(t))
|
|
||||||
v_hat = v / (1.0 - beta2 ** float(t))
|
|
||||||
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.params["log_eps"])
|
|
||||||
updated_param[name] = param.detach() - self.params["alpha"] * dparam
|
|
||||||
|
|
||||||
if pytorch_mod: params.update(updated_param) #Changer nom d'input...
|
|
||||||
else: params = updated_param
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "adam(" + str(self.params) + ") / " + str(self.optimizer)
|
|
|
@ -1,182 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import json, math, time, os
|
|
||||||
from hyperopt import *
|
|
||||||
import gc
|
|
||||||
|
|
||||||
BATCH_SIZE = 300
|
|
||||||
|
|
||||||
mnist_train = torchvision.datasets.MNIST(
|
|
||||||
"./data", train=True, download=True, transform=torchvision.transforms.ToTensor()
|
|
||||||
)
|
|
||||||
|
|
||||||
mnist_test = torchvision.datasets.MNIST(
|
|
||||||
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
|
||||||
)
|
|
||||||
|
|
||||||
dl_train = torch.utils.data.DataLoader(
|
|
||||||
mnist_train, batch_size=BATCH_SIZE, shuffle=False
|
|
||||||
)
|
|
||||||
dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=10000, shuffle=False)
|
|
||||||
|
|
||||||
|
|
||||||
def test(model):
|
|
||||||
for i, (features_, labels_) in enumerate(dl_test):
|
|
||||||
features, labels = torch.reshape(features_, (10000, 28 * 28)), labels_
|
|
||||||
pred = model.forward(features)
|
|
||||||
return pred.argmax(dim=1).eq(labels).sum().item() / 10000 * 100
|
|
||||||
|
|
||||||
|
|
||||||
def train(model, epochs=3, height=1):
|
|
||||||
stats = []
|
|
||||||
for epoch in range(epochs):
|
|
||||||
for i, (features_, labels_) in enumerate(dl_train):
|
|
||||||
t0 = time.process_time()
|
|
||||||
model.begin()
|
|
||||||
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
|
|
||||||
pred = model.forward(
|
|
||||||
features
|
|
||||||
) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
|
|
||||||
loss = F.nll_loss(pred, labels)
|
|
||||||
model.zero_grad()
|
|
||||||
loss.backward(create_graph=True)
|
|
||||||
model.adjust()
|
|
||||||
tf = time.process_time()
|
|
||||||
data = {
|
|
||||||
"time": tf - t0,
|
|
||||||
"iter": epoch * len(dl_train) + i,
|
|
||||||
"loss": loss.item(),
|
|
||||||
"params": {
|
|
||||||
k: v.item()
|
|
||||||
for k, v in model.optimizer.parameters.items()
|
|
||||||
if "." not in k
|
|
||||||
},
|
|
||||||
}
|
|
||||||
stats.append(data)
|
|
||||||
return stats
|
|
||||||
|
|
||||||
|
|
||||||
def run(opt, name="out", usr={}, epochs=3, height=1):
|
|
||||||
torch.manual_seed(0x42)
|
|
||||||
model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
|
|
||||||
print("Running...", str(model))
|
|
||||||
model.initialize()
|
|
||||||
log = train(model, epochs, height)
|
|
||||||
acc = test(model)
|
|
||||||
out = {"acc": acc, "log": log, "usr": usr}
|
|
||||||
with open("log/%s.json" % name, "w+") as f:
|
|
||||||
json.dump(out, f, indent=True)
|
|
||||||
times = [x["time"] for x in log]
|
|
||||||
print("Times (ms):", np.mean(times), "+/-", np.std(times))
|
|
||||||
print("Final accuracy:", acc)
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
def sgd_experiments():
|
|
||||||
run(SGD(0.01), "sgd", epochs=1)
|
|
||||||
out = run(SGD(0.01, optimizer=SGD(0.01)), "sgd+sgd", epochs=1)
|
|
||||||
alpha = out["log"][-1]["params"]["alpha"]
|
|
||||||
print(alpha)
|
|
||||||
run(SGD(alpha), "sgd-final", epochs=1)
|
|
||||||
|
|
||||||
|
|
||||||
def adam_experiments():
|
|
||||||
run(Adam(), "adam", epochs=1)
|
|
||||||
print()
|
|
||||||
mo = SGDPerParam(
|
|
||||||
0.001, ["alpha", "beta1", "beta2", "log_eps"], optimizer=SGD(0.0001)
|
|
||||||
)
|
|
||||||
out = run(Adam(optimizer=mo), "adam+sgd", epochs=1)
|
|
||||||
p = out["log"][-1]["params"]
|
|
||||||
alpha = p["alpha"]
|
|
||||||
beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
|
|
||||||
beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
|
|
||||||
log_eps = p["log_eps"]
|
|
||||||
print(alpha, beta1, beta2, log_eps)
|
|
||||||
print(mo)
|
|
||||||
run(
|
|
||||||
Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
|
|
||||||
"adam+sgd-final",
|
|
||||||
epochs=1,
|
|
||||||
)
|
|
||||||
print()
|
|
||||||
out = run(Adam(optimizer=Adam()), "adam2", epochs=1)
|
|
||||||
p = out["log"][-1]["params"]
|
|
||||||
alpha = p["alpha"]
|
|
||||||
beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
|
|
||||||
beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
|
|
||||||
log_eps = p["log_eps"]
|
|
||||||
print(alpha, beta1, beta2, log_eps)
|
|
||||||
run(
|
|
||||||
Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
|
|
||||||
"adam2-final",
|
|
||||||
epochs=1,
|
|
||||||
)
|
|
||||||
print()
|
|
||||||
mo = SGDPerParam(0.001, ["alpha"], optimizer=SGD(0.0001))
|
|
||||||
out = run(AdamBaydin(optimizer=mo), "adambaydin+sgd", epochs=1)
|
|
||||||
p = out["log"][-1]["params"]
|
|
||||||
alpha = p["alpha"]
|
|
||||||
print(alpha)
|
|
||||||
print(mo)
|
|
||||||
run(Adam(alpha=p["alpha"]), "adambaydin+sgd-final", epochs=1)
|
|
||||||
print()
|
|
||||||
out = run(AdamBaydin(optimizer=Adam()), "adambaydin2", epochs=1)
|
|
||||||
p = out["log"][-1]["params"]
|
|
||||||
alpha = p["alpha"]
|
|
||||||
print(alpha)
|
|
||||||
run(Adam(alpha=p["alpha"]), "adambaydin2-final", epochs=1)
|
|
||||||
|
|
||||||
|
|
||||||
def surface():
|
|
||||||
run(SGD(10 ** -3, optimizer=SGD(10 ** -1)), "tst", epochs=1)
|
|
||||||
for log_alpha in np.linspace(-3, 2, 10):
|
|
||||||
run(SGD(10 ** log_alpha), "sgd@1e%+.2f" % log_alpha, epochs=1)
|
|
||||||
|
|
||||||
|
|
||||||
def make_sgd_stack(height, top):
|
|
||||||
if height == 0:
|
|
||||||
return SGD(alpha=top)
|
|
||||||
return SGD(alpha=top, optimizer=make_sgd_stack(height - 1, top))
|
|
||||||
|
|
||||||
|
|
||||||
def make_adam_stack(height, top=0.0000001):
|
|
||||||
if height == 0:
|
|
||||||
return Adam(alpha=top)
|
|
||||||
return Adam(alpha=top, optimizer=make_adam_stack(height - 1))
|
|
||||||
|
|
||||||
|
|
||||||
def stack_test():
|
|
||||||
for top in np.linspace(-7, 3, 20):
|
|
||||||
for height in range(6):
|
|
||||||
print("height =", height, "to p=", top)
|
|
||||||
opt = make_sgd_stack(height, 10 ** top)
|
|
||||||
run(
|
|
||||||
opt,
|
|
||||||
"metasgd3-%d@%+.2f" % (height, top),
|
|
||||||
{"height": height, "top": top},
|
|
||||||
epochs=1,
|
|
||||||
height=height,
|
|
||||||
)
|
|
||||||
gc.collect()
|
|
||||||
|
|
||||||
|
|
||||||
def perf_test():
|
|
||||||
for h in range(51):
|
|
||||||
print("height:", h)
|
|
||||||
# opt = make_sgd_stack(h, 0.01)
|
|
||||||
opt = make_adam_stack(h)
|
|
||||||
run(opt, "adamperf-%d" % h, {"height": h}, epochs=1)
|
|
||||||
gc.collect()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
try:
|
|
||||||
os.mkdir("log")
|
|
||||||
except:
|
|
||||||
print("log/ exists already")
|
|
||||||
|
|
||||||
surface()
|
|
||||||
sgd_experiments()
|
|
||||||
adam_experiments()
|
|
||||||
stack_test()
|
|
||||||
perf_test()
|
|
|
@ -1,5 +0,0 @@
|
||||||
numpy==1.17.2
|
|
||||||
Pillow==6.2.0
|
|
||||||
six==1.12.0
|
|
||||||
torch==1.2.0
|
|
||||||
torchvision==0.4.0
|
|
|
@ -1,344 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import json, math, time, os
|
|
||||||
from data_aug import *
|
|
||||||
#from data_aug_v2 import *
|
|
||||||
import gc
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from torchviz import make_dot, make_dot_from_trace
|
|
||||||
|
|
||||||
from torch.utils.data import SubsetRandomSampler
|
|
||||||
|
|
||||||
BATCH_SIZE = 300
|
|
||||||
#TEST_SIZE = 10000
|
|
||||||
TEST_SIZE = 300
|
|
||||||
DATA_LIMIT = 10
|
|
||||||
|
|
||||||
'''
|
|
||||||
data_train = torchvision.datasets.MNIST(
|
|
||||||
"./data", train=True, download=True,
|
|
||||||
transform=torchvision.transforms.Compose([
|
|
||||||
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
|
||||||
torchvision.transforms.ToTensor()
|
|
||||||
])
|
|
||||||
)
|
|
||||||
data_test = torchvision.datasets.MNIST(
|
|
||||||
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
|
||||||
)
|
|
||||||
|
|
||||||
'''
|
|
||||||
data_train = torchvision.datasets.CIFAR10(
|
|
||||||
"./data", train=True, download=True,
|
|
||||||
transform=torchvision.transforms.Compose([
|
|
||||||
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
|
||||||
torchvision.transforms.ToTensor()
|
|
||||||
])
|
|
||||||
)
|
|
||||||
|
|
||||||
data_test = torchvision.datasets.CIFAR10(
|
|
||||||
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
|
||||||
)
|
|
||||||
|
|
||||||
train_subset_indices=range(int(len(data_train)/2))
|
|
||||||
val_subset_indices=range(int(len(data_train)/2),len(data_train))
|
|
||||||
|
|
||||||
dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
|
|
||||||
dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
|
|
||||||
dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False)
|
|
||||||
|
|
||||||
def test(model, reshape_in=True, device = torch.device('cuda')):
|
|
||||||
for i, (features_, labels_) in enumerate(dl_test):
|
|
||||||
if reshape_in :
|
|
||||||
features, labels = torch.reshape(features_, (TEST_SIZE, 28 * 28)), labels_
|
|
||||||
else:
|
|
||||||
features, labels =features_, labels_
|
|
||||||
|
|
||||||
features, labels = features.to(device), labels.to(device)
|
|
||||||
|
|
||||||
pred = model.forward(features)
|
|
||||||
return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
|
|
||||||
|
|
||||||
def train_one_epoch(model, optimizer, epoch=0, reshape_in=True, device = torch.device('cuda'), train_data=True):
|
|
||||||
if train_data: dl = dl_train
|
|
||||||
else: dl = dl_val
|
|
||||||
for i, (features_, labels_) in enumerate(dl):
|
|
||||||
if i > DATA_LIMIT : break
|
|
||||||
#t0 = time.process_time()
|
|
||||||
|
|
||||||
if reshape_in :
|
|
||||||
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
|
|
||||||
else:
|
|
||||||
features, labels =features_, labels_
|
|
||||||
|
|
||||||
features, labels = features.to(device), labels.to(device)
|
|
||||||
|
|
||||||
#optimizer.begin()
|
|
||||||
#optimizer.zero_grad()
|
|
||||||
model.begin()
|
|
||||||
model.zero_grad()
|
|
||||||
pred = model.forward(features)
|
|
||||||
|
|
||||||
#loss = F.nll_loss(pred, labels)
|
|
||||||
loss = F.cross_entropy(pred,labels)
|
|
||||||
|
|
||||||
#model.print_grad_fn()
|
|
||||||
#optimizer.print_grad_fn()
|
|
||||||
#print('-'*50)
|
|
||||||
|
|
||||||
loss.backward(create_graph=True)
|
|
||||||
|
|
||||||
#optimizer.step()
|
|
||||||
if train_data: model.adjust()
|
|
||||||
else: model.adjust_val()
|
|
||||||
|
|
||||||
#tf = time.process_time()
|
|
||||||
#data = {
|
|
||||||
# "time": tf - t0,
|
|
||||||
# "iter": epoch * len(dl_train) + i,
|
|
||||||
# "loss": loss.item(),
|
|
||||||
# "params": {
|
|
||||||
# k: v.item()
|
|
||||||
# for k, v in model.optimizer.parameters.items()
|
|
||||||
# if "." not in k
|
|
||||||
# },
|
|
||||||
#}
|
|
||||||
#stats.append(data)
|
|
||||||
|
|
||||||
#print_torch_mem(i)
|
|
||||||
return loss.item()
|
|
||||||
|
|
||||||
def train_v2(model, optimizer, epochs=3, reshape_in=True, device = torch.device('cuda')):
|
|
||||||
log = []
|
|
||||||
for epoch in range(epochs):
|
|
||||||
|
|
||||||
#dl_train.dataset.transform=torchvision.transforms.Compose([
|
|
||||||
# torchvision.transforms.RandomAffine(degrees=model.param('mag'), translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
|
||||||
# torchvision.transforms.ToTensor()
|
|
||||||
#])
|
|
||||||
viz_data(fig_name='res/data_sample')
|
|
||||||
t0 = time.process_time()
|
|
||||||
loss = train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device)
|
|
||||||
train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device,train_data=False)
|
|
||||||
|
|
||||||
#acc = test(model=model, reshape_in=reshape_in, device=device)
|
|
||||||
acc = 0
|
|
||||||
|
|
||||||
|
|
||||||
tf = time.process_time()
|
|
||||||
data = {
|
|
||||||
"time": tf - t0,
|
|
||||||
"epoch": epoch,
|
|
||||||
"loss": loss,
|
|
||||||
"acc": acc,
|
|
||||||
"params": {
|
|
||||||
k: v.item()
|
|
||||||
for k, v in model.optimizer.parameters.items()
|
|
||||||
#for k, v in model.mods.data_aug.params.named_parameters()
|
|
||||||
if "." not in k
|
|
||||||
|
|
||||||
},
|
|
||||||
}
|
|
||||||
log.append(data)
|
|
||||||
|
|
||||||
|
|
||||||
print("Epoch :",epoch+1, "/",epochs, "- Loss :",log[-1]["loss"])
|
|
||||||
param = [p for p in model.param_grad() if p.grad is not None]
|
|
||||||
if(len(param)!=0):
|
|
||||||
print(param[-2],' / ', param[-2].grad)
|
|
||||||
print(param[-1],' / ', param[-1].grad)
|
|
||||||
return log
|
|
||||||
|
|
||||||
def train(model, epochs=3, height=1, reshape_in=True, device = torch.device('cuda')):
|
|
||||||
stats = []
|
|
||||||
for epoch in range(epochs):
|
|
||||||
for i, (features_, labels_) in enumerate(dl_train):
|
|
||||||
t0 = time.process_time()
|
|
||||||
model.begin()
|
|
||||||
if reshape_in :
|
|
||||||
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
|
|
||||||
else:
|
|
||||||
features, labels =features_, labels_
|
|
||||||
|
|
||||||
features, labels = features.to(device), labels.to(device)
|
|
||||||
|
|
||||||
pred = model.forward(
|
|
||||||
features
|
|
||||||
) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
|
|
||||||
#loss = F.nll_loss(pred, labels)
|
|
||||||
loss = F.cross_entropy(pred,labels)
|
|
||||||
|
|
||||||
#print('-'*50)
|
|
||||||
#param = [p for p in model.param_grad() if p.grad is not None]
|
|
||||||
#if(len(param)!=0):
|
|
||||||
# print(param[-2],' / ', param[-2].grad)
|
|
||||||
# print(param[-1],' / ', param[-1].grad)
|
|
||||||
|
|
||||||
model.zero_grad()
|
|
||||||
loss.backward(create_graph=True)
|
|
||||||
model.adjust()
|
|
||||||
tf = time.process_time()
|
|
||||||
data = {
|
|
||||||
"time": tf - t0,
|
|
||||||
"iter": epoch * len(dl_train) + i,
|
|
||||||
"loss": loss.item(),
|
|
||||||
"params": {
|
|
||||||
k: v.item()
|
|
||||||
for k, v in model.optimizer.parameters.items()
|
|
||||||
if "." not in k
|
|
||||||
},
|
|
||||||
}
|
|
||||||
stats.append(data)
|
|
||||||
|
|
||||||
print('-'*50)
|
|
||||||
i=0
|
|
||||||
for obj in gc.get_objects():
|
|
||||||
try:
|
|
||||||
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)) and len(obj.size())>1:
|
|
||||||
print(i, type(obj), obj.size())
|
|
||||||
i+=1
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
print("Epoch :",epoch+1, "/",epochs, "- Loss :",stats[-1]["loss"])
|
|
||||||
param = [p for p in model.param_grad() if p.grad is not None]
|
|
||||||
if(len(param)!=0):
|
|
||||||
print(param[-2],' / ', param[-2].grad)
|
|
||||||
print(param[-1],' / ', param[-1].grad)
|
|
||||||
return stats
|
|
||||||
|
|
||||||
def run(opt, name="out", usr={}, epochs=10, height=1, cnn=True, device = torch.device('cuda')):
|
|
||||||
torch.manual_seed(0x42)
|
|
||||||
if not cnn:
|
|
||||||
reshape_in = True
|
|
||||||
#model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
|
|
||||||
model = MNIST_FullyConnected_Augmented(28 * 28, 128, 10, opt, device=device)
|
|
||||||
|
|
||||||
else:
|
|
||||||
reshape_in = False
|
|
||||||
#model = LeNet(1, 10,opt, device)
|
|
||||||
#model = LeNet_v2(1, 10,opt, device).to(device=device)
|
|
||||||
model = LeNet_v2(3, 10,opt, device).to(device=device)
|
|
||||||
optimizer=None
|
|
||||||
'''
|
|
||||||
m = LeNet_v3(1, 10)
|
|
||||||
a = Data_aug()
|
|
||||||
model = Augmented_model(model=m,
|
|
||||||
data_augmenter=a,
|
|
||||||
optimizer=opt).to(device) #deux fois le meme optimizer ?...
|
|
||||||
'''
|
|
||||||
'''
|
|
||||||
m = LeNet_v3(1, 10)
|
|
||||||
a = Data_aug()
|
|
||||||
model = Augmented_model(model=m, data_augmenter=a).to(device)
|
|
||||||
#optimizer = SGD(model.parameters())
|
|
||||||
optimizer = SGD(model.parameters(), lr=0.01, height=1)
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
#for idx, m in enumerate(model.modules()):
|
|
||||||
# print(idx, '->', m)
|
|
||||||
print("Running...", str(model))
|
|
||||||
model.initialize()
|
|
||||||
#print_model(model)
|
|
||||||
#model.data_augmentation(False)
|
|
||||||
#model.eval()
|
|
||||||
|
|
||||||
log = train_v2(model=model, optimizer=optimizer, epochs=epochs, reshape_in=reshape_in, device=device)
|
|
||||||
model.eval()
|
|
||||||
acc = test(model, reshape_in, device=device)
|
|
||||||
|
|
||||||
|
|
||||||
#param = [p for p in model.param_grad() if p.grad is not None]
|
|
||||||
#if(len(param)!=0):
|
|
||||||
# print(param[-2],' / ', param[-2].grad)
|
|
||||||
# print(param[-1],' / ', param[-1].grad)
|
|
||||||
|
|
||||||
out = {"acc": acc, "log": log, "usr": usr}
|
|
||||||
with open("log/%s.json" % name, "w+") as f:
|
|
||||||
json.dump(out, f, indent=True)
|
|
||||||
times = [x["time"] for x in log]
|
|
||||||
print("Times (ms):", np.mean(times), "+/-", np.std(times))
|
|
||||||
print("Final accuracy:", acc)
|
|
||||||
|
|
||||||
#plot_res(log, fig_name='res/'+name)
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
def make_adam_stack(height, top=0.0000001, device = torch.device('cuda')):
|
|
||||||
#print(height,device)
|
|
||||||
if height == 0:
|
|
||||||
return Adam(alpha=top, device=device)
|
|
||||||
return Adam(alpha=top, optimizer=make_adam_stack(height - 1, top, device=device), device=device)
|
|
||||||
|
|
||||||
def plot_res(log, fig_name='res'):
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
|
|
||||||
ax[0].set_title('Loss')
|
|
||||||
ax[0].plot([x["loss"] for x in log])
|
|
||||||
|
|
||||||
ax[1].set_title('Acc')
|
|
||||||
ax[1].plot([x["acc"] for x in log])
|
|
||||||
|
|
||||||
ax[2].set_title('mag')
|
|
||||||
ax[2].plot([x["data_aug"] for x in log])
|
|
||||||
|
|
||||||
plt.savefig(fig_name)
|
|
||||||
|
|
||||||
def print_torch_mem(add_info=''):
|
|
||||||
|
|
||||||
nb=0
|
|
||||||
max_size=0
|
|
||||||
for obj in gc.get_objects():
|
|
||||||
try:
|
|
||||||
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
|
|
||||||
#print(i, type(obj), obj.size())
|
|
||||||
size = np.sum(obj.size())
|
|
||||||
if(size>max_size): max_size=size
|
|
||||||
nb+=1
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
|
|
||||||
|
|
||||||
def print_model(model, fig_name='graph/graph'): #Semble ne pas marcher pour les models en fonctionnel
|
|
||||||
x = torch.randn(1,1,28,28, device=device)
|
|
||||||
dot=make_dot(model(x), params=dict(model.named_parameters()))
|
|
||||||
dot.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
|
|
||||||
dot.render(fig_name)
|
|
||||||
print("Model graph generated !")
|
|
||||||
|
|
||||||
def viz_data(fig_name='data_sample'):
|
|
||||||
|
|
||||||
features_, labels_ = next(iter(dl_train))
|
|
||||||
plt.figure(figsize=(10,10))
|
|
||||||
#for i, (features_, labels_) in enumerate(dl_train):
|
|
||||||
for i in range(25):
|
|
||||||
if i==25: break
|
|
||||||
#print(features_.size(), labels_.size())
|
|
||||||
|
|
||||||
plt.subplot(5,5,i+1)
|
|
||||||
plt.xticks([])
|
|
||||||
plt.yticks([])
|
|
||||||
plt.grid(False)
|
|
||||||
|
|
||||||
img = features_[i,0,:,:]
|
|
||||||
|
|
||||||
#print('im shape',img.shape)
|
|
||||||
plt.imshow(img, cmap=plt.cm.binary)
|
|
||||||
plt.xlabel(labels_[i].item())
|
|
||||||
|
|
||||||
plt.savefig(fig_name)
|
|
||||||
|
|
||||||
##########################################
|
|
||||||
if __name__ == "__main__":
|
|
||||||
try:
|
|
||||||
os.mkdir("log")
|
|
||||||
except:
|
|
||||||
print("log/ exists already")
|
|
||||||
|
|
||||||
device = torch.device('cuda')
|
|
||||||
|
|
||||||
run(make_adam_stack(height=1, top=0.001, device=device),
|
|
||||||
"Augmented_MNIST",
|
|
||||||
epochs=100,
|
|
||||||
cnn=True,
|
|
||||||
device = device)
|
|
||||||
print()
|
|
|
@ -1,73 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import tensorflow as tf
|
|
||||||
|
|
||||||
## build the neural network class
|
|
||||||
# weight initialization
|
|
||||||
def weight_variable(shape, name = None):
|
|
||||||
initial = tf.truncated_normal(shape, stddev=0.1)
|
|
||||||
return tf.Variable(initial, name = name)
|
|
||||||
|
|
||||||
# bias initialization
|
|
||||||
def bias_variable(shape, name = None):
|
|
||||||
initial = tf.constant(0.1, shape=shape) # positive bias
|
|
||||||
return tf.Variable(initial, name = name)
|
|
||||||
|
|
||||||
# 2D convolution
|
|
||||||
def conv2d(x, W, name = None):
|
|
||||||
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID', name = name)
|
|
||||||
|
|
||||||
# max pooling
|
|
||||||
def max_pool_2x2(x, name = None):
|
|
||||||
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
|
|
||||||
padding='SAME', name = name)
|
|
||||||
|
|
||||||
def LeNet(images, num_classes):
|
|
||||||
# tunable hyperparameters for nn architecture
|
|
||||||
s_f_conv1 = 5; # filter size of first convolution layer (default = 3)
|
|
||||||
n_f_conv1 = 20; # number of features of first convolution layer (default = 36)
|
|
||||||
s_f_conv2 = 5; # filter size of second convolution layer (default = 3)
|
|
||||||
n_f_conv2 = 50; # number of features of second convolution layer (default = 36)
|
|
||||||
n_n_fc1 = 500; # number of neurons of first fully connected layer (default = 576)
|
|
||||||
n_n_fc2 = 500; # number of neurons of first fully connected layer (default = 576)
|
|
||||||
|
|
||||||
#print(images.shape)
|
|
||||||
# 1.layer: convolution + max pooling
|
|
||||||
W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, int(images.shape[3]), n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
|
|
||||||
b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
|
|
||||||
h_conv1_tf = tf.nn.relu(conv2d(images, W_conv1_tf) + b_conv1_tf, name = 'h_conv1_tf') # (.,28,28,32)
|
|
||||||
h_pool1_tf = max_pool_2x2(h_conv1_tf, name = 'h_pool1_tf') # (.,14,14,32)
|
|
||||||
#print(h_conv1_tf.shape)
|
|
||||||
#print(h_pool1_tf.shape)
|
|
||||||
# 2.layer: convolution + max pooling
|
|
||||||
W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2, n_f_conv1, n_f_conv2], name = 'W_conv2_tf')
|
|
||||||
b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
|
|
||||||
h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf, W_conv2_tf) + b_conv2_tf, name ='h_conv2_tf') #(.,14,14,32)
|
|
||||||
h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
|
|
||||||
|
|
||||||
#print(h_pool2_tf.shape)
|
|
||||||
|
|
||||||
# 4.layer: fully connected
|
|
||||||
W_fc1_tf = weight_variable([5*5*n_f_conv2,n_n_fc1], name = 'W_fc1_tf') # (4*4*32, 1024)
|
|
||||||
b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
|
|
||||||
h_pool2_flat_tf = tf.reshape(h_pool2_tf, [int(h_pool2_tf.shape[0]), -1], name = 'h_pool3_flat_tf') # (.,1024)
|
|
||||||
h_fc1_tf = tf.nn.relu(tf.matmul(h_pool2_flat_tf, W_fc1_tf) + b_fc1_tf,
|
|
||||||
name = 'h_fc1_tf') # (.,1024)
|
|
||||||
|
|
||||||
# add dropout
|
|
||||||
#keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
|
|
||||||
#h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
|
|
||||||
print(h_fc1_tf.shape)
|
|
||||||
|
|
||||||
# 5.layer: fully connected
|
|
||||||
W_fc2_tf = weight_variable([n_n_fc1, num_classes], name = 'W_fc2_tf')
|
|
||||||
b_fc2_tf = bias_variable([num_classes], name = 'b_fc2_tf')
|
|
||||||
z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf), b_fc2_tf, name = 'z_pred_tf')# => (.,10)
|
|
||||||
# predicted probabilities in one-hot encoding
|
|
||||||
#y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf')
|
|
||||||
|
|
||||||
# tensor of correct predictions
|
|
||||||
#y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
|
|
||||||
# tf.argmax(y_data_tf, 1),
|
|
||||||
# name = 'y_pred_correct_tf')
|
|
||||||
logits = z_pred_tf
|
|
||||||
return logits #y_pred_proba_tf
|
|
353
Old/PBA/model.py
353
Old/PBA/model.py
|
@ -1,353 +0,0 @@
|
||||||
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
# ==============================================================================
|
|
||||||
"""PBA & AutoAugment Train/Eval module.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
from __future__ import division
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import contextlib
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import tensorflow as tf
|
|
||||||
|
|
||||||
import autoaugment.custom_ops as ops
|
|
||||||
from autoaugment.shake_drop import build_shake_drop_model
|
|
||||||
from autoaugment.shake_shake import build_shake_shake_model
|
|
||||||
import pba.data_utils as data_utils
|
|
||||||
import pba.helper_utils as helper_utils
|
|
||||||
from pba.wrn import build_wrn_model
|
|
||||||
from pba.resnet import build_resnet_model
|
|
||||||
|
|
||||||
from pba.LeNet import LeNet
|
|
||||||
|
|
||||||
arg_scope = tf.contrib.framework.arg_scope
|
|
||||||
|
|
||||||
|
|
||||||
def setup_arg_scopes(is_training):
|
|
||||||
"""Sets up the argscopes that will be used when building an image model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
is_training: Is the model training or not.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Arg scopes to be put around the model being constructed.
|
|
||||||
"""
|
|
||||||
|
|
||||||
batch_norm_decay = 0.9
|
|
||||||
batch_norm_epsilon = 1e-5
|
|
||||||
batch_norm_params = {
|
|
||||||
# Decay for the moving averages.
|
|
||||||
'decay': batch_norm_decay,
|
|
||||||
# epsilon to prevent 0s in variance.
|
|
||||||
'epsilon': batch_norm_epsilon,
|
|
||||||
'scale': True,
|
|
||||||
# collection containing the moving mean and moving variance.
|
|
||||||
'is_training': is_training,
|
|
||||||
}
|
|
||||||
|
|
||||||
scopes = []
|
|
||||||
|
|
||||||
scopes.append(arg_scope([ops.batch_norm], **batch_norm_params))
|
|
||||||
return scopes
|
|
||||||
|
|
||||||
|
|
||||||
def build_model(inputs, num_classes, is_training, hparams):
|
|
||||||
"""Constructs the vision model being trained/evaled.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
inputs: input features/images being fed to the image model build built.
|
|
||||||
num_classes: number of output classes being predicted.
|
|
||||||
is_training: is the model training or not.
|
|
||||||
hparams: additional hyperparameters associated with the image model.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The logits of the image model.
|
|
||||||
"""
|
|
||||||
scopes = setup_arg_scopes(is_training)
|
|
||||||
if len(scopes) != 1:
|
|
||||||
raise ValueError('Nested scopes depreciated in py3.')
|
|
||||||
with scopes[0]:
|
|
||||||
if hparams.model_name == 'pyramid_net':
|
|
||||||
logits = build_shake_drop_model(inputs, num_classes, is_training)
|
|
||||||
elif hparams.model_name == 'wrn':
|
|
||||||
logits = build_wrn_model(inputs, num_classes, hparams.wrn_size)
|
|
||||||
elif hparams.model_name == 'shake_shake':
|
|
||||||
logits = build_shake_shake_model(inputs, num_classes, hparams,
|
|
||||||
is_training)
|
|
||||||
elif hparams.model_name == 'resnet':
|
|
||||||
logits = build_resnet_model(inputs, num_classes, hparams,
|
|
||||||
is_training)
|
|
||||||
elif hparams.model_name == 'LeNet':
|
|
||||||
logits = LeNet(inputs, num_classes)
|
|
||||||
else:
|
|
||||||
raise ValueError("Unknown model name.")
|
|
||||||
return logits
|
|
||||||
|
|
||||||
|
|
||||||
class Model(object):
|
|
||||||
"""Builds an model."""
|
|
||||||
|
|
||||||
def __init__(self, hparams, num_classes, image_size):
|
|
||||||
self.hparams = hparams
|
|
||||||
self.num_classes = num_classes
|
|
||||||
self.image_size = image_size
|
|
||||||
|
|
||||||
def build(self, mode):
|
|
||||||
"""Construct the model."""
|
|
||||||
assert mode in ['train', 'eval']
|
|
||||||
self.mode = mode
|
|
||||||
self._setup_misc(mode)
|
|
||||||
self._setup_images_and_labels(self.hparams.dataset)
|
|
||||||
self._build_graph(self.images, self.labels, mode)
|
|
||||||
|
|
||||||
self.init = tf.group(tf.global_variables_initializer(),
|
|
||||||
tf.local_variables_initializer())
|
|
||||||
|
|
||||||
def _setup_misc(self, mode):
|
|
||||||
"""Sets up miscellaneous in the model constructor."""
|
|
||||||
self.lr_rate_ph = tf.Variable(0.0, name='lrn_rate', trainable=False)
|
|
||||||
self.reuse = None if (mode == 'train') else True
|
|
||||||
self.batch_size = self.hparams.batch_size
|
|
||||||
if mode == 'eval':
|
|
||||||
self.batch_size = self.hparams.test_batch_size
|
|
||||||
|
|
||||||
def _setup_images_and_labels(self, dataset):
|
|
||||||
"""Sets up image and label placeholders for the model."""
|
|
||||||
if dataset == 'cifar10' or dataset == 'cifar100' or self.mode == 'train':
|
|
||||||
self.images = tf.placeholder(tf.float32,
|
|
||||||
[self.batch_size, self.image_size, self.image_size, 3])
|
|
||||||
self.labels = tf.placeholder(tf.float32,
|
|
||||||
[self.batch_size, self.num_classes])
|
|
||||||
else:
|
|
||||||
self.images = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3])
|
|
||||||
self.labels = tf.placeholder(tf.float32, [None, self.num_classes])
|
|
||||||
|
|
||||||
def assign_epoch(self, session, epoch_value):
|
|
||||||
session.run(
|
|
||||||
self._epoch_update, feed_dict={self._new_epoch: epoch_value})
|
|
||||||
|
|
||||||
def _build_graph(self, images, labels, mode):
|
|
||||||
"""Constructs the TF graph for the model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
images: A 4-D image Tensor
|
|
||||||
labels: A 2-D labels Tensor.
|
|
||||||
mode: string indicating training mode ( e.g., 'train', 'valid', 'test').
|
|
||||||
"""
|
|
||||||
is_training = 'train' in mode
|
|
||||||
if is_training:
|
|
||||||
self.global_step = tf.train.get_or_create_global_step()
|
|
||||||
|
|
||||||
logits = build_model(images, self.num_classes, is_training,
|
|
||||||
self.hparams)
|
|
||||||
self.predictions, self.cost = helper_utils.setup_loss(logits, labels)
|
|
||||||
|
|
||||||
self._calc_num_trainable_params()
|
|
||||||
|
|
||||||
# Adds L2 weight decay to the cost
|
|
||||||
self.cost = helper_utils.decay_weights(self.cost,
|
|
||||||
self.hparams.weight_decay_rate)
|
|
||||||
|
|
||||||
if is_training:
|
|
||||||
self._build_train_op()
|
|
||||||
|
|
||||||
# Setup checkpointing for this child model
|
|
||||||
# Keep 2 or more checkpoints around during training.
|
|
||||||
with tf.device('/cpu:0'):
|
|
||||||
self.saver = tf.train.Saver(max_to_keep=10)
|
|
||||||
|
|
||||||
self.init = tf.group(tf.global_variables_initializer(),
|
|
||||||
tf.local_variables_initializer())
|
|
||||||
|
|
||||||
def _calc_num_trainable_params(self):
|
|
||||||
self.num_trainable_params = np.sum([
|
|
||||||
np.prod(var.get_shape().as_list())
|
|
||||||
for var in tf.trainable_variables()
|
|
||||||
])
|
|
||||||
tf.logging.info('number of trainable params: {}'.format(
|
|
||||||
self.num_trainable_params))
|
|
||||||
|
|
||||||
def _build_train_op(self):
|
|
||||||
"""Builds the train op for the model."""
|
|
||||||
hparams = self.hparams
|
|
||||||
tvars = tf.trainable_variables()
|
|
||||||
grads = tf.gradients(self.cost, tvars)
|
|
||||||
if hparams.gradient_clipping_by_global_norm > 0.0:
|
|
||||||
grads, norm = tf.clip_by_global_norm(
|
|
||||||
grads, hparams.gradient_clipping_by_global_norm)
|
|
||||||
tf.summary.scalar('grad_norm', norm)
|
|
||||||
|
|
||||||
# Setup the initial learning rate
|
|
||||||
initial_lr = self.lr_rate_ph
|
|
||||||
optimizer = tf.train.MomentumOptimizer(
|
|
||||||
initial_lr, 0.9, use_nesterov=True)
|
|
||||||
|
|
||||||
self.optimizer = optimizer
|
|
||||||
apply_op = optimizer.apply_gradients(
|
|
||||||
zip(grads, tvars), global_step=self.global_step, name='train_step')
|
|
||||||
train_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
|
||||||
with tf.control_dependencies([apply_op]):
|
|
||||||
self.train_op = tf.group(*train_ops)
|
|
||||||
|
|
||||||
|
|
||||||
class ModelTrainer(object):
|
|
||||||
"""Trains an instance of the Model class."""
|
|
||||||
|
|
||||||
def __init__(self, hparams):
|
|
||||||
self._session = None
|
|
||||||
self.hparams = hparams
|
|
||||||
|
|
||||||
# Set the random seed to be sure the same validation set
|
|
||||||
# is used for each model
|
|
||||||
np.random.seed(0)
|
|
||||||
self.data_loader = data_utils.DataSet(hparams)
|
|
||||||
np.random.seed() # Put the random seed back to random
|
|
||||||
self.data_loader.reset()
|
|
||||||
|
|
||||||
# extra stuff for ray
|
|
||||||
self._build_models()
|
|
||||||
self._new_session()
|
|
||||||
self._session.__enter__()
|
|
||||||
|
|
||||||
def save_model(self, checkpoint_dir, step=None):
|
|
||||||
"""Dumps model into the backup_dir.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
step: If provided, creates a checkpoint with the given step
|
|
||||||
number, instead of overwriting the existing checkpoints.
|
|
||||||
"""
|
|
||||||
model_save_name = os.path.join(checkpoint_dir,
|
|
||||||
'model.ckpt') + '-' + str(step)
|
|
||||||
save_path = self.saver.save(self.session, model_save_name)
|
|
||||||
tf.logging.info('Saved child model')
|
|
||||||
return model_save_name
|
|
||||||
|
|
||||||
def extract_model_spec(self, checkpoint_path):
|
|
||||||
"""Loads a checkpoint with the architecture structure stored in the name."""
|
|
||||||
self.saver.restore(self.session, checkpoint_path)
|
|
||||||
tf.logging.warning(
|
|
||||||
'Loaded child model checkpoint from {}'.format(checkpoint_path))
|
|
||||||
|
|
||||||
def eval_child_model(self, model, data_loader, mode):
|
|
||||||
"""Evaluate the child model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model: image model that will be evaluated.
|
|
||||||
data_loader: dataset object to extract eval data from.
|
|
||||||
mode: will the model be evalled on train, val or test.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Accuracy of the model on the specified dataset.
|
|
||||||
"""
|
|
||||||
tf.logging.info('Evaluating child model in mode {}'.format(mode))
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
accuracy = helper_utils.eval_child_model(
|
|
||||||
self.session, model, data_loader, mode)
|
|
||||||
tf.logging.info(
|
|
||||||
'Eval child model accuracy: {}'.format(accuracy))
|
|
||||||
# If epoch trained without raising the below errors, break
|
|
||||||
# from loop.
|
|
||||||
break
|
|
||||||
except (tf.errors.AbortedError, tf.errors.UnavailableError) as e:
|
|
||||||
tf.logging.info(
|
|
||||||
'Retryable error caught: {}. Retrying.'.format(e))
|
|
||||||
|
|
||||||
return accuracy
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
|
||||||
def _new_session(self):
|
|
||||||
"""Creates a new session for model m."""
|
|
||||||
# Create a new session for this model, initialize
|
|
||||||
# variables, and save / restore from checkpoint.
|
|
||||||
sess_cfg = tf.ConfigProto(
|
|
||||||
allow_soft_placement=True, log_device_placement=False)
|
|
||||||
sess_cfg.gpu_options.allow_growth = True
|
|
||||||
self._session = tf.Session('', config=sess_cfg)
|
|
||||||
self._session.run([self.m.init, self.meval.init])
|
|
||||||
return self._session
|
|
||||||
|
|
||||||
def _build_models(self):
|
|
||||||
"""Builds the image models for train and eval."""
|
|
||||||
# Determine if we should build the train and eval model. When using
|
|
||||||
# distributed training we only want to build one or the other and not both.
|
|
||||||
with tf.variable_scope('model', use_resource=False):
|
|
||||||
m = Model(self.hparams, self.data_loader.num_classes, self.data_loader.image_size)
|
|
||||||
m.build('train')
|
|
||||||
self._num_trainable_params = m.num_trainable_params
|
|
||||||
self._saver = m.saver
|
|
||||||
with tf.variable_scope('model', reuse=True, use_resource=False):
|
|
||||||
meval = Model(self.hparams, self.data_loader.num_classes, self.data_loader.image_size)
|
|
||||||
meval.build('eval')
|
|
||||||
self.m = m
|
|
||||||
self.meval = meval
|
|
||||||
|
|
||||||
def _run_training_loop(self, curr_epoch):
|
|
||||||
"""Trains the model `m` for one epoch."""
|
|
||||||
start_time = time.time()
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
train_accuracy = helper_utils.run_epoch_training(
|
|
||||||
self.session, self.m, self.data_loader, curr_epoch)
|
|
||||||
break
|
|
||||||
except (tf.errors.AbortedError, tf.errors.UnavailableError) as e:
|
|
||||||
tf.logging.info(
|
|
||||||
'Retryable error caught: {}. Retrying.'.format(e))
|
|
||||||
tf.logging.info('Finished epoch: {}'.format(curr_epoch))
|
|
||||||
tf.logging.info('Epoch time(min): {}'.format(
|
|
||||||
(time.time() - start_time) / 60.0))
|
|
||||||
return train_accuracy
|
|
||||||
|
|
||||||
def _compute_final_accuracies(self, iteration):
|
|
||||||
"""Run once training is finished to compute final test accuracy."""
|
|
||||||
if (iteration >= self.hparams.num_epochs - 1):
|
|
||||||
test_accuracy = self.eval_child_model(self.meval, self.data_loader,
|
|
||||||
'test')
|
|
||||||
else:
|
|
||||||
test_accuracy = 0
|
|
||||||
tf.logging.info('Test Accuracy: {}'.format(test_accuracy))
|
|
||||||
return test_accuracy
|
|
||||||
|
|
||||||
def run_model(self, epoch):
|
|
||||||
"""Trains and evalutes the image model."""
|
|
||||||
valid_accuracy = 0.
|
|
||||||
training_accuracy = self._run_training_loop(epoch)
|
|
||||||
if self.hparams.validation_size > 0:
|
|
||||||
valid_accuracy = self.eval_child_model(self.meval,
|
|
||||||
self.data_loader, 'val')
|
|
||||||
tf.logging.info('Train Acc: {}, Valid Acc: {}'.format(
|
|
||||||
training_accuracy, valid_accuracy))
|
|
||||||
return training_accuracy, valid_accuracy
|
|
||||||
|
|
||||||
def reset_config(self, new_hparams):
|
|
||||||
self.hparams = new_hparams
|
|
||||||
self.data_loader.reset_policy(new_hparams)
|
|
||||||
return
|
|
||||||
|
|
||||||
@property
|
|
||||||
def saver(self):
|
|
||||||
return self._saver
|
|
||||||
|
|
||||||
@property
|
|
||||||
def session(self):
|
|
||||||
return self._session
|
|
||||||
|
|
||||||
@property
|
|
||||||
def num_trainable_params(self):
|
|
||||||
return self._num_trainable_params
|
|
|
@ -1,59 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
export PYTHONPATH="$(pwd)"
|
|
||||||
|
|
||||||
cifar10_LeNet_search() {
|
|
||||||
local_dir="$PWD/results/"
|
|
||||||
data_path="$PWD/datasets/cifar-10-batches-py"
|
|
||||||
|
|
||||||
python pba/search.py \
|
|
||||||
--local_dir "$local_dir" \
|
|
||||||
--model_name LeNet \
|
|
||||||
--data_path "$data_path" --dataset cifar10 \
|
|
||||||
--train_size 4000 --val_size 46000 \
|
|
||||||
--checkpoint_freq 0 \
|
|
||||||
--name "cifar10_search" --gpu 0.15 --cpu 2 \
|
|
||||||
--num_samples 16 --perturbation_interval 3 --epochs 150 \
|
|
||||||
--explore cifar10 --aug_policy cifar10 \
|
|
||||||
--lr 0.1 --wd 0.0005
|
|
||||||
}
|
|
||||||
|
|
||||||
cifar10_search() {
|
|
||||||
local_dir="$PWD/results/"
|
|
||||||
data_path="$PWD/datasets/cifar-10-batches-py"
|
|
||||||
|
|
||||||
python pba/search.py \
|
|
||||||
--local_dir "$local_dir" \
|
|
||||||
--model_name wrn_40_2 \
|
|
||||||
--data_path "$data_path" --dataset cifar10 \
|
|
||||||
--train_size 4000 --val_size 46000 \
|
|
||||||
--checkpoint_freq 0 \
|
|
||||||
--name "cifar10_search" --gpu 0.15 --cpu 2 \
|
|
||||||
--num_samples 16 --perturbation_interval 3 --epochs 200 \
|
|
||||||
--explore cifar10 --aug_policy cifar10 \
|
|
||||||
--lr 0.1 --wd 0.0005
|
|
||||||
}
|
|
||||||
|
|
||||||
svhn_search() {
|
|
||||||
local_dir="$PWD/results/"
|
|
||||||
data_path="$PWD/datasets/"
|
|
||||||
|
|
||||||
python pba/search.py \
|
|
||||||
--local_dir "$local_dir" --data_path "$data_path" \
|
|
||||||
--model_name wrn_40_2 --dataset svhn \
|
|
||||||
--train_size 1000 --val_size 7325 \
|
|
||||||
--checkpoint_freq 0 \
|
|
||||||
--name "svhn_search" --gpu 0.19 --cpu 2 \
|
|
||||||
--num_samples 16 --perturbation_interval 3 --epochs 160 \
|
|
||||||
--explore cifar10 --aug_policy cifar10 --no_cutout \
|
|
||||||
--lr 0.1 --wd 0.005
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ "$1" = "rcifar10" ]; then
|
|
||||||
cifar10_search
|
|
||||||
elif [ "$1" = "rsvhn" ]; then
|
|
||||||
svhn_search
|
|
||||||
elif [ "$1" = "LeNet" ]; then
|
|
||||||
cifar10_LeNet_search
|
|
||||||
else
|
|
||||||
echo "invalid args"
|
|
||||||
fi
|
|
210
Old/PBA/setup.py
210
Old/PBA/setup.py
|
@ -1,210 +0,0 @@
|
||||||
"""Parse flags and set up hyperparameters."""
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
from __future__ import division
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import random
|
|
||||||
import tensorflow as tf
|
|
||||||
|
|
||||||
from pba.augmentation_transforms_hp import NUM_HP_TRANSFORM
|
|
||||||
|
|
||||||
|
|
||||||
def create_parser(state):
|
|
||||||
"""Create arg parser for flags."""
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument(
|
|
||||||
'--model_name',
|
|
||||||
default='wrn',
|
|
||||||
choices=('wrn_28_10', 'wrn_40_2', 'shake_shake_32', 'shake_shake_96',
|
|
||||||
'shake_shake_112', 'pyramid_net', 'resnet', 'LeNet'))
|
|
||||||
parser.add_argument(
|
|
||||||
'--data_path',
|
|
||||||
default='/tmp/datasets/',
|
|
||||||
help='Directory where dataset is located.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--dataset',
|
|
||||||
default='cifar10',
|
|
||||||
choices=('cifar10', 'cifar100', 'svhn', 'svhn-full', 'test'))
|
|
||||||
parser.add_argument(
|
|
||||||
'--recompute_dset_stats',
|
|
||||||
action='store_true',
|
|
||||||
help='Instead of using hardcoded mean/std, recompute from dataset.')
|
|
||||||
parser.add_argument('--local_dir', type=str, default='/tmp/ray_results/', help='Ray directory.')
|
|
||||||
parser.add_argument('--restore', type=str, default=None, help='If specified, tries to restore from given path.')
|
|
||||||
parser.add_argument('--train_size', type=int, default=5000, help='Number of training examples.')
|
|
||||||
parser.add_argument('--val_size', type=int, default=45000, help='Number of validation examples.')
|
|
||||||
parser.add_argument('--checkpoint_freq', type=int, default=50, help='Checkpoint frequency.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--cpu', type=float, default=4, help='Allocated by Ray')
|
|
||||||
parser.add_argument(
|
|
||||||
'--gpu', type=float, default=1, help='Allocated by Ray')
|
|
||||||
parser.add_argument(
|
|
||||||
'--aug_policy',
|
|
||||||
type=str,
|
|
||||||
default='cifar10',
|
|
||||||
help=
|
|
||||||
'which augmentation policy to use (in augmentation_transforms_hp.py)')
|
|
||||||
# search-use only
|
|
||||||
parser.add_argument(
|
|
||||||
'--explore',
|
|
||||||
type=str,
|
|
||||||
default='cifar10',
|
|
||||||
help='which explore function to use')
|
|
||||||
parser.add_argument(
|
|
||||||
'--epochs',
|
|
||||||
type=int,
|
|
||||||
default=0,
|
|
||||||
help='Number of epochs, or <=0 for default')
|
|
||||||
parser.add_argument(
|
|
||||||
'--no_cutout', action='store_true', help='turn off cutout')
|
|
||||||
parser.add_argument('--lr', type=float, default=0.1, help='learning rate')
|
|
||||||
parser.add_argument('--wd', type=float, default=0.0005, help='weight decay')
|
|
||||||
parser.add_argument('--bs', type=int, default=128, help='batch size')
|
|
||||||
parser.add_argument('--test_bs', type=int, default=25, help='test batch size')
|
|
||||||
parser.add_argument('--num_samples', type=int, default=1, help='Number of Ray samples')
|
|
||||||
|
|
||||||
if state == 'train':
|
|
||||||
parser.add_argument(
|
|
||||||
'--use_hp_policy',
|
|
||||||
action='store_true',
|
|
||||||
help='otherwise use autoaug policy')
|
|
||||||
parser.add_argument(
|
|
||||||
'--hp_policy',
|
|
||||||
type=str,
|
|
||||||
default=None,
|
|
||||||
help='either a comma separated list of values or a file')
|
|
||||||
parser.add_argument(
|
|
||||||
'--hp_policy_epochs',
|
|
||||||
type=int,
|
|
||||||
default=200,
|
|
||||||
help='number of epochs/iterations policy trained for')
|
|
||||||
parser.add_argument(
|
|
||||||
'--no_aug',
|
|
||||||
action='store_true',
|
|
||||||
help=
|
|
||||||
'no additional augmentation at all (besides cutout if not toggled)'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--flatten',
|
|
||||||
action='store_true',
|
|
||||||
help='randomly select aug policy from schedule')
|
|
||||||
parser.add_argument('--name', type=str, default='autoaug')
|
|
||||||
|
|
||||||
elif state == 'search':
|
|
||||||
parser.add_argument('--perturbation_interval', type=int, default=10)
|
|
||||||
parser.add_argument('--name', type=str, default='autoaug_pbt')
|
|
||||||
else:
|
|
||||||
raise ValueError('unknown state')
|
|
||||||
args = parser.parse_args()
|
|
||||||
tf.logging.info(str(args))
|
|
||||||
return args
|
|
||||||
|
|
||||||
|
|
||||||
def create_hparams(state, FLAGS): # pylint: disable=invalid-name
|
|
||||||
"""Creates hyperparameters to pass into Ray config.
|
|
||||||
|
|
||||||
Different options depending on search or eval mode.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
state: a string, 'train' or 'search'.
|
|
||||||
FLAGS: parsed command line flags.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
tf.hparams object.
|
|
||||||
"""
|
|
||||||
epochs = 0
|
|
||||||
tf.logging.info('data path: {}'.format(FLAGS.data_path))
|
|
||||||
hparams = tf.contrib.training.HParams(
|
|
||||||
train_size=FLAGS.train_size,
|
|
||||||
validation_size=FLAGS.val_size,
|
|
||||||
dataset=FLAGS.dataset,
|
|
||||||
data_path=FLAGS.data_path,
|
|
||||||
batch_size=FLAGS.bs,
|
|
||||||
gradient_clipping_by_global_norm=5.0,
|
|
||||||
explore=FLAGS.explore,
|
|
||||||
aug_policy=FLAGS.aug_policy,
|
|
||||||
no_cutout=FLAGS.no_cutout,
|
|
||||||
recompute_dset_stats=FLAGS.recompute_dset_stats,
|
|
||||||
lr=FLAGS.lr,
|
|
||||||
weight_decay_rate=FLAGS.wd,
|
|
||||||
test_batch_size=FLAGS.test_bs)
|
|
||||||
|
|
||||||
if state == 'train':
|
|
||||||
hparams.add_hparam('no_aug', FLAGS.no_aug)
|
|
||||||
hparams.add_hparam('use_hp_policy', FLAGS.use_hp_policy)
|
|
||||||
if FLAGS.use_hp_policy:
|
|
||||||
if FLAGS.hp_policy == 'random':
|
|
||||||
tf.logging.info('RANDOM SEARCH')
|
|
||||||
parsed_policy = []
|
|
||||||
for i in range(NUM_HP_TRANSFORM * 4):
|
|
||||||
if i % 2 == 0:
|
|
||||||
parsed_policy.append(random.randint(0, 10))
|
|
||||||
else:
|
|
||||||
parsed_policy.append(random.randint(0, 9))
|
|
||||||
elif FLAGS.hp_policy.endswith('.txt') or FLAGS.hp_policy.endswith(
|
|
||||||
'.p'):
|
|
||||||
# will be loaded in in data_utils
|
|
||||||
parsed_policy = FLAGS.hp_policy
|
|
||||||
else:
|
|
||||||
# parse input into a fixed augmentation policy
|
|
||||||
parsed_policy = FLAGS.hp_policy.split(', ')
|
|
||||||
parsed_policy = [int(p) for p in parsed_policy]
|
|
||||||
hparams.add_hparam('hp_policy', parsed_policy)
|
|
||||||
hparams.add_hparam('hp_policy_epochs', FLAGS.hp_policy_epochs)
|
|
||||||
hparams.add_hparam('flatten', FLAGS.flatten)
|
|
||||||
elif state == 'search':
|
|
||||||
hparams.add_hparam('no_aug', False)
|
|
||||||
hparams.add_hparam('use_hp_policy', True)
|
|
||||||
# default start value of 0
|
|
||||||
hparams.add_hparam('hp_policy',
|
|
||||||
[0 for _ in range(4 * NUM_HP_TRANSFORM)])
|
|
||||||
else:
|
|
||||||
raise ValueError('unknown state')
|
|
||||||
|
|
||||||
if FLAGS.model_name == 'wrn_40_2':
|
|
||||||
hparams.add_hparam('model_name', 'wrn')
|
|
||||||
epochs = 200
|
|
||||||
hparams.add_hparam('wrn_size', 32)
|
|
||||||
hparams.add_hparam('wrn_depth', 40)
|
|
||||||
elif FLAGS.model_name == 'wrn_28_10':
|
|
||||||
hparams.add_hparam('model_name', 'wrn')
|
|
||||||
epochs = 200
|
|
||||||
hparams.add_hparam('wrn_size', 160)
|
|
||||||
hparams.add_hparam('wrn_depth', 28)
|
|
||||||
elif FLAGS.model_name == 'resnet':
|
|
||||||
hparams.add_hparam('model_name', 'resnet')
|
|
||||||
epochs = 200
|
|
||||||
hparams.add_hparam('resnet_size', 20)
|
|
||||||
hparams.add_hparam('num_filters', 32)
|
|
||||||
elif FLAGS.model_name == 'shake_shake_32':
|
|
||||||
hparams.add_hparam('model_name', 'shake_shake')
|
|
||||||
epochs = 1800
|
|
||||||
hparams.add_hparam('shake_shake_widen_factor', 2)
|
|
||||||
elif FLAGS.model_name == 'shake_shake_96':
|
|
||||||
hparams.add_hparam('model_name', 'shake_shake')
|
|
||||||
epochs = 1800
|
|
||||||
hparams.add_hparam('shake_shake_widen_factor', 6)
|
|
||||||
elif FLAGS.model_name == 'shake_shake_112':
|
|
||||||
hparams.add_hparam('model_name', 'shake_shake')
|
|
||||||
epochs = 1800
|
|
||||||
hparams.add_hparam('shake_shake_widen_factor', 7)
|
|
||||||
elif FLAGS.model_name == 'pyramid_net':
|
|
||||||
hparams.add_hparam('model_name', 'pyramid_net')
|
|
||||||
epochs = 1800
|
|
||||||
hparams.set_hparam('batch_size', 64)
|
|
||||||
|
|
||||||
elif FLAGS.model_name == 'LeNet':
|
|
||||||
hparams.add_hparam('model_name', 'LeNet')
|
|
||||||
epochs = 200
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise ValueError('Not Valid Model Name: %s' % FLAGS.model_name)
|
|
||||||
if FLAGS.epochs > 0:
|
|
||||||
tf.logging.info('overwriting with custom epochs')
|
|
||||||
epochs = FLAGS.epochs
|
|
||||||
hparams.add_hparam('num_epochs', epochs)
|
|
||||||
tf.logging.info('epochs: {}, lr: {}, wd: {}'.format(
|
|
||||||
hparams.num_epochs, hparams.lr, hparams.weight_decay_rate))
|
|
||||||
return hparams
|
|
|
@ -1,41 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
export PYTHONPATH="$(pwd)"
|
|
||||||
|
|
||||||
# args: [model name] [lr] [wd] #Learning rate / weight decay
|
|
||||||
eval_cifar10() {
|
|
||||||
hp_policy="$PWD/schedules/rcifar10_16_wrn.txt"
|
|
||||||
local_dir="$PWD/results/"
|
|
||||||
data_path="$PWD/datasets/cifar-10-batches-py"
|
|
||||||
|
|
||||||
size=50000
|
|
||||||
dataset="cifar10"
|
|
||||||
name="eval_cifar10_$1" # has 8 cutout size
|
|
||||||
|
|
||||||
python pba/train.py \
|
|
||||||
--local_dir "$local_dir" --data_path "$data_path" \
|
|
||||||
--model_name "$1" --dataset "$dataset" \
|
|
||||||
--train_size "$size" --val_size 0 \
|
|
||||||
--checkpoint_freq 25 --gpu 1 --cpu 4 \
|
|
||||||
--use_hp_policy --hp_policy "$hp_policy" \
|
|
||||||
--hp_policy_epochs 200 \
|
|
||||||
--aug_policy cifar10 --name "$name" \
|
|
||||||
--lr "$2" --wd "$3"
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ "$@" = "wrn_28_10" ]; then
|
|
||||||
eval_cifar10 wrn_28_10 0.1 0.0005
|
|
||||||
elif [ "$@" = "ss_32" ]; then
|
|
||||||
eval_cifar10 shake_shake_32 0.01 0.001
|
|
||||||
elif [ "$@" = "ss_96" ]; then
|
|
||||||
eval_cifar10 shake_shake_96 0.01 0.001
|
|
||||||
elif [ "$@" = "ss_112" ]; then
|
|
||||||
eval_cifar10 shake_shake_112 0.01 0.001
|
|
||||||
elif [ "$@" = "pyramid_net" ]; then
|
|
||||||
eval_cifar10 pyramid_net 0.05 0.00005
|
|
||||||
|
|
||||||
elif [ "$@" = "LeNet" ]; then
|
|
||||||
eval_cifar10 LeNet 0.05 0.0
|
|
||||||
|
|
||||||
else
|
|
||||||
echo "invalid args"
|
|
||||||
fi
|
|
|
@ -1,73 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import tensorflow as tf
|
|
||||||
|
|
||||||
## build the neural network class
|
|
||||||
# weight initialization
|
|
||||||
def weight_variable(shape, name = None):
|
|
||||||
initial = tf.truncated_normal(shape, stddev=0.1)
|
|
||||||
return tf.Variable(initial, name = name)
|
|
||||||
|
|
||||||
# bias initialization
|
|
||||||
def bias_variable(shape, name = None):
|
|
||||||
initial = tf.constant(0.1, shape=shape) # positive bias
|
|
||||||
return tf.Variable(initial, name = name)
|
|
||||||
|
|
||||||
# 2D convolution
|
|
||||||
def conv2d(x, W, name = None):
|
|
||||||
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID', name = name)
|
|
||||||
|
|
||||||
# max pooling
|
|
||||||
def max_pool_2x2(x, name = None):
|
|
||||||
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
|
|
||||||
padding='SAME', name = name)
|
|
||||||
|
|
||||||
def LeNet(images, num_classes):
|
|
||||||
# tunable hyperparameters for nn architecture
|
|
||||||
s_f_conv1 = 5; # filter size of first convolution layer (default = 3)
|
|
||||||
n_f_conv1 = 20; # number of features of first convolution layer (default = 36)
|
|
||||||
s_f_conv2 = 5; # filter size of second convolution layer (default = 3)
|
|
||||||
n_f_conv2 = 50; # number of features of second convolution layer (default = 36)
|
|
||||||
n_n_fc1 = 500; # number of neurons of first fully connected layer (default = 576)
|
|
||||||
n_n_fc2 = 500; # number of neurons of first fully connected layer (default = 576)
|
|
||||||
|
|
||||||
#print(images.shape)
|
|
||||||
# 1.layer: convolution + max pooling
|
|
||||||
W_conv1_tf = weight_variable([s_f_conv1, s_f_conv1, int(images.shape[3]), n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
|
|
||||||
b_conv1_tf = bias_variable([n_f_conv1], name = 'b_conv1_tf') # (32)
|
|
||||||
h_conv1_tf = tf.nn.relu(conv2d(images, W_conv1_tf) + b_conv1_tf, name = 'h_conv1_tf') # (.,28,28,32)
|
|
||||||
h_pool1_tf = max_pool_2x2(h_conv1_tf, name = 'h_pool1_tf') # (.,14,14,32)
|
|
||||||
#print(h_conv1_tf.shape)
|
|
||||||
#print(h_pool1_tf.shape)
|
|
||||||
# 2.layer: convolution + max pooling
|
|
||||||
W_conv2_tf = weight_variable([s_f_conv2, s_f_conv2, n_f_conv1, n_f_conv2], name = 'W_conv2_tf')
|
|
||||||
b_conv2_tf = bias_variable([n_f_conv2], name = 'b_conv2_tf')
|
|
||||||
h_conv2_tf = tf.nn.relu(conv2d(h_pool1_tf, W_conv2_tf) + b_conv2_tf, name ='h_conv2_tf') #(.,14,14,32)
|
|
||||||
h_pool2_tf = max_pool_2x2(h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)
|
|
||||||
|
|
||||||
#print(h_pool2_tf.shape)
|
|
||||||
|
|
||||||
# 4.layer: fully connected
|
|
||||||
W_fc1_tf = weight_variable([5*5*n_f_conv2,n_n_fc1], name = 'W_fc1_tf') # (4*4*32, 1024)
|
|
||||||
b_fc1_tf = bias_variable([n_n_fc1], name = 'b_fc1_tf') # (1024)
|
|
||||||
h_pool2_flat_tf = tf.reshape(h_pool2_tf, [int(h_pool2_tf.shape[0]), -1], name = 'h_pool3_flat_tf') # (.,1024)
|
|
||||||
h_fc1_tf = tf.nn.relu(tf.matmul(h_pool2_flat_tf, W_fc1_tf) + b_fc1_tf,
|
|
||||||
name = 'h_fc1_tf') # (.,1024)
|
|
||||||
|
|
||||||
# add dropout
|
|
||||||
#keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
|
|
||||||
#h_fc1_drop_tf = tf.nn.dropout(h_fc1_tf, keep_prob_tf, name = 'h_fc1_drop_tf')
|
|
||||||
print(h_fc1_tf.shape)
|
|
||||||
|
|
||||||
# 5.layer: fully connected
|
|
||||||
W_fc2_tf = weight_variable([n_n_fc1, num_classes], name = 'W_fc2_tf')
|
|
||||||
b_fc2_tf = bias_variable([num_classes], name = 'b_fc2_tf')
|
|
||||||
z_pred_tf = tf.add(tf.matmul(h_fc1_tf, W_fc2_tf), b_fc2_tf, name = 'z_pred_tf')# => (.,10)
|
|
||||||
# predicted probabilities in one-hot encoding
|
|
||||||
#y_pred_proba_tf = tf.nn.softmax(z_pred_tf, name='y_pred_proba_tf')
|
|
||||||
|
|
||||||
# tensor of correct predictions
|
|
||||||
#y_pred_correct_tf = tf.equal(tf.argmax(y_pred_proba_tf, 1),
|
|
||||||
# tf.argmax(y_data_tf, 1),
|
|
||||||
# name = 'y_pred_correct_tf')
|
|
||||||
logits = z_pred_tf
|
|
||||||
return logits #y_pred_proba_tf
|
|
620
Old/UDA/main.py
620
Old/UDA/main.py
|
@ -1,620 +0,0 @@
|
||||||
# coding=utf-8
|
|
||||||
# Copyright 2019 The Google UDA Team Authors.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
"""UDA on CIFAR-10 and SVHN.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
from __future__ import division
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import contextlib
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import json
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from absl import flags
|
|
||||||
import absl.logging as _logging # pylint: disable=unused-import
|
|
||||||
|
|
||||||
import tensorflow as tf
|
|
||||||
|
|
||||||
from randaugment import custom_ops as ops
|
|
||||||
import data
|
|
||||||
import utils
|
|
||||||
|
|
||||||
from randaugment.wrn import build_wrn_model
|
|
||||||
from randaugment.shake_drop import build_shake_drop_model
|
|
||||||
from randaugment.shake_shake import build_shake_shake_model
|
|
||||||
|
|
||||||
from randaugment.LeNet import LeNet
|
|
||||||
|
|
||||||
|
|
||||||
# TPU related
|
|
||||||
flags.DEFINE_string(
|
|
||||||
"master", default=None,
|
|
||||||
help="the TPU address. This should be set when using Cloud TPU")
|
|
||||||
flags.DEFINE_string(
|
|
||||||
"tpu", default=None,
|
|
||||||
help="The Cloud TPU to use for training. This should be either the name "
|
|
||||||
"used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 url.")
|
|
||||||
flags.DEFINE_string(
|
|
||||||
"gcp_project", default=None,
|
|
||||||
help="Project name for the Cloud TPU-enabled project. If not specified, "
|
|
||||||
"we will attempt to automatically detect the GCE project from metadata.")
|
|
||||||
flags.DEFINE_string(
|
|
||||||
"tpu_zone", default=None,
|
|
||||||
help="GCE zone where the Cloud TPU is located in. If not specified, we "
|
|
||||||
"will attempt to automatically detect the GCE project from metadata.")
|
|
||||||
flags.DEFINE_bool(
|
|
||||||
"use_tpu", default=False,
|
|
||||||
help="Use TPUs rather than GPU/CPU.")
|
|
||||||
flags.DEFINE_enum(
|
|
||||||
"task_name", "cifar10",
|
|
||||||
enum_values=["cifar10", "svhn"],
|
|
||||||
help="The task to use")
|
|
||||||
|
|
||||||
# UDA config:
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"sup_size", default=4000,
|
|
||||||
help="Number of supervised pairs to use. "
|
|
||||||
"-1: all training samples. 4000: 4000 supervised examples.")
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"aug_copy", default=0,
|
|
||||||
help="Number of different augmented data generated.")
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"unsup_ratio", default=0,
|
|
||||||
help="The ratio between batch size of unlabeled data and labeled data, "
|
|
||||||
"i.e., unsup_ratio * train_batch_size is the batch_size for unlabeled data."
|
|
||||||
"Do not use the unsupervised objective if set to 0.")
|
|
||||||
flags.DEFINE_enum(
|
|
||||||
"tsa", "",
|
|
||||||
enum_values=["", "linear_schedule", "log_schedule", "exp_schedule"],
|
|
||||||
help="anneal schedule of training signal annealing. "
|
|
||||||
"tsa='' means not using TSA. See the paper for other schedules.")
|
|
||||||
flags.DEFINE_float(
|
|
||||||
"uda_confidence_thresh", default=-1,
|
|
||||||
help="The threshold on predicted probability on unsupervised data. If set,"
|
|
||||||
"UDA loss will only be calculated on unlabeled examples whose largest"
|
|
||||||
"probability is larger than the threshold")
|
|
||||||
flags.DEFINE_float(
|
|
||||||
"uda_softmax_temp", -1,
|
|
||||||
help="The temperature of the Softmax when making prediction on unlabeled"
|
|
||||||
"examples. -1 means to use normal Softmax")
|
|
||||||
flags.DEFINE_float(
|
|
||||||
"ent_min_coeff", default=0,
|
|
||||||
help="")
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"unsup_coeff", default=1,
|
|
||||||
help="The coefficient on the UDA loss. "
|
|
||||||
"setting unsup_coeff to 1 works for most settings. "
|
|
||||||
"When you have extermely few samples, consider increasing unsup_coeff")
|
|
||||||
|
|
||||||
# Experiment (data/checkpoint/directory) config
|
|
||||||
flags.DEFINE_string(
|
|
||||||
"data_dir", default=None,
|
|
||||||
help="Path to data directory containing `*.tfrecords`.")
|
|
||||||
flags.DEFINE_string(
|
|
||||||
"model_dir", default=None,
|
|
||||||
help="model dir of the saved checkpoints.")
|
|
||||||
flags.DEFINE_bool(
|
|
||||||
"do_train", default=True,
|
|
||||||
help="Whether to run training.")
|
|
||||||
flags.DEFINE_bool(
|
|
||||||
"do_eval", default=False,
|
|
||||||
help="Whether to run eval on the test set.")
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"dev_size", default=-1,
|
|
||||||
help="dev set size.")
|
|
||||||
flags.DEFINE_bool(
|
|
||||||
"verbose", default=False,
|
|
||||||
help="Whether to print additional information.")
|
|
||||||
|
|
||||||
# Training config
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"train_batch_size", default=32,
|
|
||||||
help="Size of train batch.")
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"eval_batch_size", default=8,
|
|
||||||
help="Size of evalation batch.")
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"train_steps", default=100000,
|
|
||||||
help="Total number of training steps.")
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"iterations", default=10000,
|
|
||||||
help="Number of iterations per repeat loop.")
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"save_steps", default=10000,
|
|
||||||
help="number of steps for model checkpointing.")
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"max_save", default=10,
|
|
||||||
help="Maximum number of checkpoints to save.")
|
|
||||||
|
|
||||||
# Model config
|
|
||||||
flags.DEFINE_enum(
|
|
||||||
"model_name", default="wrn",
|
|
||||||
enum_values=["wrn", "shake_shake_32", "shake_shake_96", "shake_shake_112", "pyramid_net", "LeNet"],
|
|
||||||
help="Name of the model")
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"num_classes", default=10,
|
|
||||||
help="Number of categories for classification.")
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"wrn_size", default=32,
|
|
||||||
help="The size of WideResNet. It should be set to 32 for WRN-28-2"
|
|
||||||
"and should be set to 160 for WRN-28-10")
|
|
||||||
|
|
||||||
# Optimization config
|
|
||||||
flags.DEFINE_float(
|
|
||||||
"learning_rate", default=0.03,
|
|
||||||
help="Maximum learning rate.")
|
|
||||||
flags.DEFINE_float(
|
|
||||||
"weight_decay_rate", default=5e-4,
|
|
||||||
help="Weight decay rate.")
|
|
||||||
flags.DEFINE_float(
|
|
||||||
"min_lr_ratio", default=0.004,
|
|
||||||
help="Minimum ratio learning rate.")
|
|
||||||
flags.DEFINE_integer(
|
|
||||||
"warmup_steps", default=20000,
|
|
||||||
help="Number of steps for linear lr warmup.")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
FLAGS = tf.flags.FLAGS
|
|
||||||
|
|
||||||
arg_scope = tf.contrib.framework.arg_scope
|
|
||||||
|
|
||||||
|
|
||||||
def get_tsa_threshold(schedule, global_step, num_train_steps, start, end):
|
|
||||||
step_ratio = tf.to_float(global_step) / tf.to_float(num_train_steps)
|
|
||||||
if schedule == "linear_schedule":
|
|
||||||
coeff = step_ratio
|
|
||||||
elif schedule == "exp_schedule":
|
|
||||||
scale = 5
|
|
||||||
# [exp(-5), exp(0)] = [1e-2, 1]
|
|
||||||
coeff = tf.exp((step_ratio - 1) * scale)
|
|
||||||
elif schedule == "log_schedule":
|
|
||||||
scale = 5
|
|
||||||
# [1 - exp(0), 1 - exp(-5)] = [0, 0.99]
|
|
||||||
coeff = 1 - tf.exp((-step_ratio) * scale)
|
|
||||||
return coeff * (end - start) + start
|
|
||||||
|
|
||||||
|
|
||||||
def setup_arg_scopes(is_training):
|
|
||||||
"""Sets up the argscopes that will be used when building an image model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
is_training: Is the model training or not.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Arg scopes to be put around the model being constructed.
|
|
||||||
"""
|
|
||||||
|
|
||||||
batch_norm_decay = 0.9
|
|
||||||
batch_norm_epsilon = 1e-5
|
|
||||||
batch_norm_params = {
|
|
||||||
# Decay for the moving averages.
|
|
||||||
"decay": batch_norm_decay,
|
|
||||||
# epsilon to prevent 0s in variance.
|
|
||||||
"epsilon": batch_norm_epsilon,
|
|
||||||
"scale": True,
|
|
||||||
# collection containing the moving mean and moving variance.
|
|
||||||
"is_training": is_training,
|
|
||||||
}
|
|
||||||
|
|
||||||
scopes = []
|
|
||||||
|
|
||||||
scopes.append(arg_scope([ops.batch_norm], **batch_norm_params))
|
|
||||||
return scopes
|
|
||||||
|
|
||||||
|
|
||||||
def build_model(inputs, num_classes, is_training, update_bn, hparams):
|
|
||||||
"""Constructs the vision model being trained/evaled.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
inputs: input features/images being fed to the image model build built.
|
|
||||||
num_classes: number of output classes being predicted.
|
|
||||||
is_training: is the model training or not.
|
|
||||||
hparams: additional hyperparameters associated with the image model.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The logits of the image model.
|
|
||||||
"""
|
|
||||||
scopes = setup_arg_scopes(is_training)
|
|
||||||
|
|
||||||
try:
|
|
||||||
from contextlib import nested
|
|
||||||
except ImportError:
|
|
||||||
from contextlib import ExitStack, contextmanager
|
|
||||||
|
|
||||||
@contextmanager
|
|
||||||
def nested(*contexts):
|
|
||||||
with ExitStack() as stack:
|
|
||||||
for ctx in contexts:
|
|
||||||
stack.enter_context(ctx)
|
|
||||||
yield contexts
|
|
||||||
|
|
||||||
with nested(*scopes):
|
|
||||||
if hparams.model_name == "pyramid_net":
|
|
||||||
logits = build_shake_drop_model(
|
|
||||||
inputs, num_classes, is_training)
|
|
||||||
elif hparams.model_name == "wrn":
|
|
||||||
logits = build_wrn_model(
|
|
||||||
inputs, num_classes, hparams.wrn_size, update_bn)
|
|
||||||
elif hparams.model_name == "shake_shake":
|
|
||||||
logits = build_shake_shake_model(
|
|
||||||
inputs, num_classes, hparams, is_training)
|
|
||||||
|
|
||||||
elif hparams.model_name == "LeNet":
|
|
||||||
logits = LeNet(inputs, num_classes)
|
|
||||||
|
|
||||||
return logits
|
|
||||||
|
|
||||||
|
|
||||||
def _kl_divergence_with_logits(p_logits, q_logits):
|
|
||||||
p = tf.nn.softmax(p_logits)
|
|
||||||
log_p = tf.nn.log_softmax(p_logits)
|
|
||||||
log_q = tf.nn.log_softmax(q_logits)
|
|
||||||
|
|
||||||
kl = tf.reduce_sum(p * (log_p - log_q), -1)
|
|
||||||
return kl
|
|
||||||
|
|
||||||
|
|
||||||
def anneal_sup_loss(sup_logits, sup_labels, sup_loss, global_step, metric_dict):
|
|
||||||
tsa_start = 1. / FLAGS.num_classes
|
|
||||||
eff_train_prob_threshold = get_tsa_threshold(
|
|
||||||
FLAGS.tsa, global_step, FLAGS.train_steps,
|
|
||||||
tsa_start, end=1)
|
|
||||||
|
|
||||||
one_hot_labels = tf.one_hot(
|
|
||||||
sup_labels, depth=FLAGS.num_classes, dtype=tf.float32)
|
|
||||||
sup_probs = tf.nn.softmax(sup_logits, axis=-1)
|
|
||||||
correct_label_probs = tf.reduce_sum(
|
|
||||||
one_hot_labels * sup_probs, axis=-1)
|
|
||||||
larger_than_threshold = tf.greater(
|
|
||||||
correct_label_probs, eff_train_prob_threshold)
|
|
||||||
loss_mask = 1 - tf.cast(larger_than_threshold, tf.float32)
|
|
||||||
loss_mask = tf.stop_gradient(loss_mask)
|
|
||||||
sup_loss = sup_loss * loss_mask
|
|
||||||
avg_sup_loss = (tf.reduce_sum(sup_loss) /
|
|
||||||
tf.maximum(tf.reduce_sum(loss_mask), 1))
|
|
||||||
metric_dict["sup/sup_trained_ratio"] = tf.reduce_mean(loss_mask)
|
|
||||||
metric_dict["sup/eff_train_prob_threshold"] = eff_train_prob_threshold
|
|
||||||
return sup_loss, avg_sup_loss
|
|
||||||
|
|
||||||
|
|
||||||
def get_ent(logits, return_mean=True):
|
|
||||||
log_prob = tf.nn.log_softmax(logits, axis=-1)
|
|
||||||
prob = tf.exp(log_prob)
|
|
||||||
ent = tf.reduce_sum(-prob * log_prob, axis=-1)
|
|
||||||
if return_mean:
|
|
||||||
ent = tf.reduce_mean(ent)
|
|
||||||
return ent
|
|
||||||
|
|
||||||
|
|
||||||
def get_model_fn(hparams):
|
|
||||||
def model_fn(features, labels, mode, params):
|
|
||||||
sup_labels = tf.reshape(features["label"], [-1])
|
|
||||||
|
|
||||||
#### Configuring the optimizer
|
|
||||||
global_step = tf.train.get_global_step()
|
|
||||||
metric_dict = {}
|
|
||||||
is_training = (mode == tf.estimator.ModeKeys.TRAIN)
|
|
||||||
if FLAGS.unsup_ratio > 0 and is_training:
|
|
||||||
all_images = tf.concat([features["image"],
|
|
||||||
features["ori_image"],
|
|
||||||
features["aug_image"]], 0)
|
|
||||||
else:
|
|
||||||
all_images = features["image"]
|
|
||||||
|
|
||||||
with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
|
|
||||||
all_logits = build_model(
|
|
||||||
inputs=all_images,
|
|
||||||
num_classes=FLAGS.num_classes,
|
|
||||||
is_training=is_training,
|
|
||||||
update_bn=True and is_training,
|
|
||||||
hparams=hparams,
|
|
||||||
)
|
|
||||||
|
|
||||||
sup_bsz = tf.shape(features["image"])[0]
|
|
||||||
sup_logits = all_logits[:sup_bsz]
|
|
||||||
|
|
||||||
sup_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
|
|
||||||
labels=sup_labels,
|
|
||||||
logits=sup_logits)
|
|
||||||
sup_prob = tf.nn.softmax(sup_logits, axis=-1)
|
|
||||||
metric_dict["sup/pred_prob"] = tf.reduce_mean(
|
|
||||||
tf.reduce_max(sup_prob, axis=-1))
|
|
||||||
if FLAGS.tsa:
|
|
||||||
sup_loss, avg_sup_loss = anneal_sup_loss(sup_logits, sup_labels, sup_loss,
|
|
||||||
global_step, metric_dict)
|
|
||||||
else:
|
|
||||||
avg_sup_loss = tf.reduce_mean(sup_loss)
|
|
||||||
total_loss = avg_sup_loss
|
|
||||||
|
|
||||||
if FLAGS.unsup_ratio > 0 and is_training:
|
|
||||||
aug_bsz = tf.shape(features["ori_image"])[0]
|
|
||||||
|
|
||||||
ori_logits = all_logits[sup_bsz : sup_bsz + aug_bsz]
|
|
||||||
aug_logits = all_logits[sup_bsz + aug_bsz:]
|
|
||||||
if FLAGS.uda_softmax_temp != -1:
|
|
||||||
ori_logits_tgt = ori_logits / FLAGS.uda_softmax_temp
|
|
||||||
else:
|
|
||||||
ori_logits_tgt = ori_logits
|
|
||||||
ori_prob = tf.nn.softmax(ori_logits, axis=-1)
|
|
||||||
aug_prob = tf.nn.softmax(aug_logits, axis=-1)
|
|
||||||
metric_dict["unsup/ori_prob"] = tf.reduce_mean(
|
|
||||||
tf.reduce_max(ori_prob, axis=-1))
|
|
||||||
metric_dict["unsup/aug_prob"] = tf.reduce_mean(
|
|
||||||
tf.reduce_max(aug_prob, axis=-1))
|
|
||||||
|
|
||||||
aug_loss = _kl_divergence_with_logits(
|
|
||||||
p_logits=tf.stop_gradient(ori_logits_tgt),
|
|
||||||
q_logits=aug_logits)
|
|
||||||
|
|
||||||
if FLAGS.uda_confidence_thresh != -1:
|
|
||||||
ori_prob = tf.nn.softmax(ori_logits, axis=-1)
|
|
||||||
largest_prob = tf.reduce_max(ori_prob, axis=-1)
|
|
||||||
loss_mask = tf.cast(tf.greater(
|
|
||||||
largest_prob, FLAGS.uda_confidence_thresh), tf.float32)
|
|
||||||
metric_dict["unsup/high_prob_ratio"] = tf.reduce_mean(loss_mask)
|
|
||||||
loss_mask = tf.stop_gradient(loss_mask)
|
|
||||||
aug_loss = aug_loss * loss_mask
|
|
||||||
metric_dict["unsup/high_prob_loss"] = tf.reduce_mean(aug_loss)
|
|
||||||
|
|
||||||
if FLAGS.ent_min_coeff > 0:
|
|
||||||
ent_min_coeff = FLAGS.ent_min_coeff
|
|
||||||
metric_dict["unsup/ent_min_coeff"] = ent_min_coeff
|
|
||||||
per_example_ent = get_ent(ori_logits)
|
|
||||||
ent_min_loss = tf.reduce_mean(per_example_ent)
|
|
||||||
total_loss = total_loss + ent_min_coeff * ent_min_loss
|
|
||||||
|
|
||||||
avg_unsup_loss = tf.reduce_mean(aug_loss)
|
|
||||||
total_loss += FLAGS.unsup_coeff * avg_unsup_loss
|
|
||||||
metric_dict["unsup/loss"] = avg_unsup_loss
|
|
||||||
|
|
||||||
total_loss = utils.decay_weights(
|
|
||||||
total_loss,
|
|
||||||
FLAGS.weight_decay_rate)
|
|
||||||
|
|
||||||
#### Check model parameters
|
|
||||||
num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
|
|
||||||
tf.logging.info("#params: {}".format(num_params))
|
|
||||||
|
|
||||||
if FLAGS.verbose:
|
|
||||||
format_str = "{{:<{0}s}}\t{{}}".format(
|
|
||||||
max([len(v.name) for v in tf.trainable_variables()]))
|
|
||||||
for v in tf.trainable_variables():
|
|
||||||
tf.logging.info(format_str.format(v.name, v.get_shape()))
|
|
||||||
|
|
||||||
#### Evaluation mode
|
|
||||||
if mode == tf.estimator.ModeKeys.EVAL:
|
|
||||||
#### Metric function for classification
|
|
||||||
def metric_fn(per_example_loss, label_ids, logits):
|
|
||||||
# classification loss & accuracy
|
|
||||||
loss = tf.metrics.mean(per_example_loss)
|
|
||||||
|
|
||||||
predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
|
|
||||||
accuracy = tf.metrics.accuracy(label_ids, predictions)
|
|
||||||
|
|
||||||
ret_dict = {
|
|
||||||
"eval/classify_loss": loss,
|
|
||||||
"eval/classify_accuracy": accuracy
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret_dict
|
|
||||||
|
|
||||||
eval_metrics = (metric_fn, [sup_loss, sup_labels, sup_logits])
|
|
||||||
|
|
||||||
#### Constucting evaluation TPUEstimatorSpec.
|
|
||||||
eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
|
|
||||||
mode=mode,
|
|
||||||
loss=total_loss,
|
|
||||||
eval_metrics=eval_metrics)
|
|
||||||
|
|
||||||
return eval_spec
|
|
||||||
|
|
||||||
# increase the learning rate linearly
|
|
||||||
if FLAGS.warmup_steps > 0:
|
|
||||||
warmup_lr = tf.to_float(global_step) / tf.to_float(FLAGS.warmup_steps) \
|
|
||||||
* FLAGS.learning_rate
|
|
||||||
else:
|
|
||||||
warmup_lr = 0.0
|
|
||||||
|
|
||||||
# decay the learning rate using the cosine schedule
|
|
||||||
decay_lr = tf.train.cosine_decay(
|
|
||||||
FLAGS.learning_rate,
|
|
||||||
global_step=global_step-FLAGS.warmup_steps,
|
|
||||||
decay_steps=FLAGS.train_steps-FLAGS.warmup_steps,
|
|
||||||
alpha=FLAGS.min_lr_ratio)
|
|
||||||
|
|
||||||
learning_rate = tf.where(global_step < FLAGS.warmup_steps,
|
|
||||||
warmup_lr, decay_lr)
|
|
||||||
|
|
||||||
optimizer = tf.train.MomentumOptimizer(
|
|
||||||
learning_rate=learning_rate,
|
|
||||||
momentum=0.9,
|
|
||||||
use_nesterov=True)
|
|
||||||
|
|
||||||
if FLAGS.use_tpu:
|
|
||||||
optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
|
|
||||||
|
|
||||||
grads_and_vars = optimizer.compute_gradients(total_loss)
|
|
||||||
gradients, variables = zip(*grads_and_vars)
|
|
||||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
|
||||||
with tf.control_dependencies(update_ops):
|
|
||||||
train_op = optimizer.apply_gradients(
|
|
||||||
zip(gradients, variables), global_step=tf.train.get_global_step())
|
|
||||||
|
|
||||||
#### Creating training logging hook
|
|
||||||
# compute accuracy
|
|
||||||
sup_pred = tf.argmax(sup_logits, axis=-1, output_type=sup_labels.dtype)
|
|
||||||
is_correct = tf.to_float(tf.equal(sup_pred, sup_labels))
|
|
||||||
acc = tf.reduce_mean(is_correct)
|
|
||||||
metric_dict["sup/sup_loss"] = avg_sup_loss
|
|
||||||
metric_dict["training/loss"] = total_loss
|
|
||||||
metric_dict["sup/acc"] = acc
|
|
||||||
metric_dict["training/lr"] = learning_rate
|
|
||||||
metric_dict["training/step"] = global_step
|
|
||||||
|
|
||||||
if not FLAGS.use_tpu:
|
|
||||||
log_info = ("step [{training/step}] lr {training/lr:.6f} "
|
|
||||||
"loss {training/loss:.4f} "
|
|
||||||
"sup/acc {sup/acc:.4f} sup/loss {sup/sup_loss:.6f} ")
|
|
||||||
if FLAGS.unsup_ratio > 0:
|
|
||||||
log_info += "unsup/loss {unsup/loss:.6f} "
|
|
||||||
formatter = lambda kwargs: log_info.format(**kwargs)
|
|
||||||
logging_hook = tf.train.LoggingTensorHook(
|
|
||||||
tensors=metric_dict,
|
|
||||||
every_n_iter=FLAGS.iterations,
|
|
||||||
formatter=formatter)
|
|
||||||
training_hooks = [logging_hook]
|
|
||||||
#### Constucting training TPUEstimatorSpec.
|
|
||||||
train_spec = tf.contrib.tpu.TPUEstimatorSpec(
|
|
||||||
mode=mode, loss=total_loss, train_op=train_op,
|
|
||||||
training_hooks=training_hooks)
|
|
||||||
else:
|
|
||||||
#### Constucting training TPUEstimatorSpec.
|
|
||||||
host_call = utils.construct_scalar_host_call(
|
|
||||||
metric_dict=metric_dict,
|
|
||||||
model_dir=params["model_dir"],
|
|
||||||
prefix="",
|
|
||||||
reduce_fn=tf.reduce_mean)
|
|
||||||
train_spec = tf.contrib.tpu.TPUEstimatorSpec(
|
|
||||||
mode=mode, loss=total_loss, train_op=train_op,
|
|
||||||
host_call=host_call)
|
|
||||||
|
|
||||||
return train_spec
|
|
||||||
|
|
||||||
return model_fn
|
|
||||||
|
|
||||||
|
|
||||||
def train(hparams):
|
|
||||||
##### Create input function
|
|
||||||
if FLAGS.unsup_ratio == 0:
|
|
||||||
FLAGS.aug_copy = 0
|
|
||||||
if FLAGS.dev_size != -1:
|
|
||||||
FLAGS.do_train = True
|
|
||||||
FLAGS.do_eval = True
|
|
||||||
if FLAGS.do_train:
|
|
||||||
train_input_fn = data.get_input_fn(
|
|
||||||
data_dir=FLAGS.data_dir,
|
|
||||||
split="train",
|
|
||||||
task_name=FLAGS.task_name,
|
|
||||||
sup_size=FLAGS.sup_size,
|
|
||||||
unsup_ratio=FLAGS.unsup_ratio,
|
|
||||||
aug_copy=FLAGS.aug_copy,
|
|
||||||
)
|
|
||||||
|
|
||||||
if FLAGS.do_eval:
|
|
||||||
if FLAGS.dev_size != -1:
|
|
||||||
eval_input_fn = data.get_input_fn(
|
|
||||||
data_dir=FLAGS.data_dir,
|
|
||||||
split="dev",
|
|
||||||
task_name=FLAGS.task_name,
|
|
||||||
sup_size=FLAGS.dev_size,
|
|
||||||
unsup_ratio=0,
|
|
||||||
aug_copy=0)
|
|
||||||
eval_size = FLAGS.dev_size
|
|
||||||
else:
|
|
||||||
eval_input_fn = data.get_input_fn(
|
|
||||||
data_dir=FLAGS.data_dir,
|
|
||||||
split="test",
|
|
||||||
task_name=FLAGS.task_name,
|
|
||||||
sup_size=-1,
|
|
||||||
unsup_ratio=0,
|
|
||||||
aug_copy=0)
|
|
||||||
if FLAGS.task_name == "cifar10":
|
|
||||||
eval_size = 10000
|
|
||||||
elif FLAGS.task_name == "svhn":
|
|
||||||
eval_size = 26032
|
|
||||||
else:
|
|
||||||
assert False, "You need to specify the size of your test set."
|
|
||||||
eval_steps = eval_size // FLAGS.eval_batch_size
|
|
||||||
|
|
||||||
##### Get model function
|
|
||||||
model_fn = get_model_fn(hparams)
|
|
||||||
estimator = utils.get_TPU_estimator(FLAGS, model_fn)
|
|
||||||
|
|
||||||
#### Training
|
|
||||||
if FLAGS.dev_size != -1:
|
|
||||||
tf.logging.info("***** Running training and validation *****")
|
|
||||||
tf.logging.info(" Supervised batch size = %d", FLAGS.train_batch_size)
|
|
||||||
tf.logging.info(" Unsupervised batch size = %d",
|
|
||||||
FLAGS.train_batch_size * FLAGS.unsup_ratio)
|
|
||||||
tf.logging.info(" Num train steps = %d", FLAGS.train_steps)
|
|
||||||
curr_step = 0
|
|
||||||
while True:
|
|
||||||
if curr_step >= FLAGS.train_steps:
|
|
||||||
break
|
|
||||||
tf.logging.info("Current step {}".format(curr_step))
|
|
||||||
train_step = min(FLAGS.save_steps, FLAGS.train_steps - curr_step)
|
|
||||||
estimator.train(input_fn=train_input_fn, steps=train_step)
|
|
||||||
estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
|
|
||||||
curr_step += FLAGS.save_steps
|
|
||||||
else:
|
|
||||||
if FLAGS.do_train:
|
|
||||||
tf.logging.info("***** Running training *****")
|
|
||||||
tf.logging.info(" Supervised batch size = %d", FLAGS.train_batch_size)
|
|
||||||
tf.logging.info(" Unsupervised batch size = %d",
|
|
||||||
FLAGS.train_batch_size * FLAGS.unsup_ratio)
|
|
||||||
estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)
|
|
||||||
if FLAGS.do_eval:
|
|
||||||
tf.logging.info("***** Running evaluation *****")
|
|
||||||
results = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
|
|
||||||
tf.logging.info(">> Results:")
|
|
||||||
for key in results.keys():
|
|
||||||
tf.logging.info(" %s = %s", key, str(results[key]))
|
|
||||||
results[key] = results[key].item()
|
|
||||||
acc = results["eval/classify_accuracy"]
|
|
||||||
with tf.gfile.Open("{}/results.txt".format(FLAGS.model_dir), "w") as ouf:
|
|
||||||
ouf.write(str(acc))
|
|
||||||
|
|
||||||
|
|
||||||
def main(_):
|
|
||||||
|
|
||||||
if FLAGS.do_train:
|
|
||||||
tf.gfile.MakeDirs(FLAGS.model_dir)
|
|
||||||
flags_dict = tf.app.flags.FLAGS.flag_values_dict()
|
|
||||||
with tf.gfile.Open(os.path.join(FLAGS.model_dir, "FLAGS.json"), "w") as ouf:
|
|
||||||
json.dump(flags_dict, ouf)
|
|
||||||
hparams = tf.contrib.training.HParams()
|
|
||||||
|
|
||||||
if FLAGS.model_name == "wrn":
|
|
||||||
hparams.add_hparam("model_name", "wrn")
|
|
||||||
hparams.add_hparam("wrn_size", FLAGS.wrn_size)
|
|
||||||
elif FLAGS.model_name == "shake_shake_32":
|
|
||||||
hparams.add_hparam("model_name", "shake_shake")
|
|
||||||
hparams.add_hparam("shake_shake_widen_factor", 2)
|
|
||||||
elif FLAGS.model_name == "shake_shake_96":
|
|
||||||
hparams.add_hparam("model_name", "shake_shake")
|
|
||||||
hparams.add_hparam("shake_shake_widen_factor", 6)
|
|
||||||
elif FLAGS.model_name == "shake_shake_112":
|
|
||||||
hparams.add_hparam("model_name", "shake_shake")
|
|
||||||
hparams.add_hparam("shake_shake_widen_factor", 7)
|
|
||||||
elif FLAGS.model_name == "pyramid_net":
|
|
||||||
hparams.add_hparam("model_name", "pyramid_net")
|
|
||||||
|
|
||||||
elif FLAGS.model_name == "LeNet":
|
|
||||||
hparams.add_hparam("model_name", "LeNet")
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise ValueError("Not Valid Model Name: %s" % FLAGS.model_name)
|
|
||||||
|
|
||||||
train(hparams)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
tf.logging.set_verbosity(tf.logging.INFO)
|
|
||||||
tf.app.run()
|
|
|
@ -1,31 +0,0 @@
|
||||||
# coding=utf-8
|
|
||||||
# Copyright 2019 The Google UDA Team Authors.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
task_name=cifar10
|
|
||||||
|
|
||||||
python main.py \
|
|
||||||
--model_name="LeNet"\
|
|
||||||
--use_tpu=False \
|
|
||||||
--do_train=True \
|
|
||||||
--do_eval=True \
|
|
||||||
--task_name=${task_name} \
|
|
||||||
--sup_size=4000 \
|
|
||||||
--unsup_ratio=5 \
|
|
||||||
--train_batch_size=32 \
|
|
||||||
--data_dir=data/proc_data/${task_name} \
|
|
||||||
--model_dir=ckpt/cifar10_gpu \
|
|
||||||
--train_steps=400000 \
|
|
||||||
$@
|
|
|
@ -1,271 +0,0 @@
|
||||||
# code in this file is adpated from rpmcruz/autoaugment
|
|
||||||
# https://github.com/rpmcruz/autoaugment/blob/master/transformations.py
|
|
||||||
import random
|
|
||||||
|
|
||||||
import PIL, PIL.ImageOps, PIL.ImageEnhance, PIL.ImageDraw
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
def ShearX(img, v): # [-0.3, 0.3]
|
|
||||||
assert -0.3 <= v <= 0.3
|
|
||||||
if random.random() > 0.5:
|
|
||||||
v = -v
|
|
||||||
return img.transform(img.size, PIL.Image.AFFINE, (1, v, 0, 0, 1, 0))
|
|
||||||
|
|
||||||
|
|
||||||
def ShearY(img, v): # [-0.3, 0.3]
|
|
||||||
assert -0.3 <= v <= 0.3
|
|
||||||
if random.random() > 0.5:
|
|
||||||
v = -v
|
|
||||||
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, v, 1, 0))
|
|
||||||
|
|
||||||
|
|
||||||
def TranslateX(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
|
|
||||||
assert -0.45 <= v <= 0.45
|
|
||||||
if random.random() > 0.5:
|
|
||||||
v = -v
|
|
||||||
v = v * img.size[0]
|
|
||||||
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))
|
|
||||||
|
|
||||||
|
|
||||||
def TranslateXabs(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
|
|
||||||
assert 0 <= v
|
|
||||||
if random.random() > 0.5:
|
|
||||||
v = -v
|
|
||||||
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))
|
|
||||||
|
|
||||||
|
|
||||||
def TranslateY(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
|
|
||||||
assert -0.45 <= v <= 0.45
|
|
||||||
if random.random() > 0.5:
|
|
||||||
v = -v
|
|
||||||
v = v * img.size[1]
|
|
||||||
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))
|
|
||||||
|
|
||||||
|
|
||||||
def TranslateYabs(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
|
|
||||||
assert 0 <= v
|
|
||||||
if random.random() > 0.5:
|
|
||||||
v = -v
|
|
||||||
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))
|
|
||||||
|
|
||||||
|
|
||||||
def Rotate(img, v): # [-30, 30]
|
|
||||||
assert -30 <= v <= 30
|
|
||||||
if random.random() > 0.5:
|
|
||||||
v = -v
|
|
||||||
return img.rotate(v)
|
|
||||||
|
|
||||||
|
|
||||||
def AutoContrast(img, _):
|
|
||||||
return PIL.ImageOps.autocontrast(img)
|
|
||||||
|
|
||||||
|
|
||||||
def Invert(img, _):
|
|
||||||
return PIL.ImageOps.invert(img)
|
|
||||||
|
|
||||||
|
|
||||||
def Equalize(img, _):
|
|
||||||
return PIL.ImageOps.equalize(img)
|
|
||||||
|
|
||||||
|
|
||||||
def Flip(img, _): # not from the paper
|
|
||||||
return PIL.ImageOps.mirror(img)
|
|
||||||
|
|
||||||
def FlipLR(img, v):
|
|
||||||
return img.transpose(Image.FLIP_LEFT_RIGHT)
|
|
||||||
|
|
||||||
def FlipUD(img, v):
|
|
||||||
return img.transpose(Image.FLIP_TOP_BOTTOM)
|
|
||||||
|
|
||||||
def Solarize(img, v): # [0, 256]
|
|
||||||
assert 0 <= v <= 256
|
|
||||||
return PIL.ImageOps.solarize(img, v)
|
|
||||||
|
|
||||||
|
|
||||||
def SolarizeAdd(img, addition=0, threshold=128):
|
|
||||||
img_np = np.array(img).astype(np.int)
|
|
||||||
img_np = img_np + addition
|
|
||||||
img_np = np.clip(img_np, 0, 255)
|
|
||||||
img_np = img_np.astype(np.uint8)
|
|
||||||
img = Image.fromarray(img_np)
|
|
||||||
return PIL.ImageOps.solarize(img, threshold)
|
|
||||||
|
|
||||||
|
|
||||||
def Posterize(img, v): # [4, 8]
|
|
||||||
v = int(v)
|
|
||||||
v = max(1, v)
|
|
||||||
return PIL.ImageOps.posterize(img, v)
|
|
||||||
|
|
||||||
|
|
||||||
def Contrast(img, v): # [0.1,1.9]
|
|
||||||
assert 0.1 <= v <= 1.9
|
|
||||||
return PIL.ImageEnhance.Contrast(img).enhance(v)
|
|
||||||
|
|
||||||
|
|
||||||
def Color(img, v): # [0.1,1.9]
|
|
||||||
assert 0.1 <= v <= 1.9
|
|
||||||
return PIL.ImageEnhance.Color(img).enhance(v)
|
|
||||||
|
|
||||||
|
|
||||||
def Brightness(img, v): # [0.1,1.9]
|
|
||||||
assert 0.1 <= v <= 1.9
|
|
||||||
return PIL.ImageEnhance.Brightness(img).enhance(v)
|
|
||||||
|
|
||||||
|
|
||||||
def Sharpness(img, v): # [0.1,1.9]
|
|
||||||
assert 0.1 <= v <= 1.9
|
|
||||||
return PIL.ImageEnhance.Sharpness(img).enhance(v)
|
|
||||||
|
|
||||||
|
|
||||||
def Cutout(img, v): # [0, 60] => percentage: [0, 0.2]
|
|
||||||
assert 0.0 <= v <= 0.2
|
|
||||||
if v <= 0.:
|
|
||||||
return img
|
|
||||||
|
|
||||||
v = v * img.size[0]
|
|
||||||
return CutoutAbs(img, v)
|
|
||||||
|
|
||||||
|
|
||||||
def CutoutAbs(img, v): # [0, 60] => percentage: [0, 0.2]
|
|
||||||
# assert 0 <= v <= 20
|
|
||||||
if v < 0:
|
|
||||||
return img
|
|
||||||
w, h = img.size
|
|
||||||
x0 = np.random.uniform(w)
|
|
||||||
y0 = np.random.uniform(h)
|
|
||||||
|
|
||||||
x0 = int(max(0, x0 - v / 2.))
|
|
||||||
y0 = int(max(0, y0 - v / 2.))
|
|
||||||
x1 = min(w, x0 + v)
|
|
||||||
y1 = min(h, y0 + v)
|
|
||||||
|
|
||||||
xy = (x0, y0, x1, y1)
|
|
||||||
color = (125, 123, 114)
|
|
||||||
# color = (0, 0, 0)
|
|
||||||
img = img.copy()
|
|
||||||
PIL.ImageDraw.Draw(img).rectangle(xy, color)
|
|
||||||
return img
|
|
||||||
|
|
||||||
|
|
||||||
def SamplePairing(imgs): # [0, 0.4]
|
|
||||||
def f(img1, v):
|
|
||||||
i = np.random.choice(len(imgs))
|
|
||||||
img2 = PIL.Image.fromarray(imgs[i])
|
|
||||||
return PIL.Image.blend(img1, img2, v)
|
|
||||||
|
|
||||||
return f
|
|
||||||
|
|
||||||
|
|
||||||
def Identity(img, v):
|
|
||||||
return img
|
|
||||||
|
|
||||||
|
|
||||||
def augment_list(): # 16 oeprations and their ranges
|
|
||||||
# https://github.com/google-research/uda/blob/master/image/randaugment/policies.py#L57
|
|
||||||
l = [
|
|
||||||
(Identity, 0., 1.0),
|
|
||||||
(FlipUD, 0., 1.0),
|
|
||||||
(FlipLR, 0., 1.0),
|
|
||||||
(Rotate, 0, 30), # 4
|
|
||||||
(TranslateX, 0., 0.33), # 2
|
|
||||||
(TranslateY, 0., 0.33), # 3
|
|
||||||
(ShearX, 0., 0.3), # 0
|
|
||||||
(ShearY, 0., 0.3), # 1
|
|
||||||
#(AutoContrast, 0, 1), # 5
|
|
||||||
#(Invert, 0, 1), # 6
|
|
||||||
#(Equalize, 0, 1), # 7
|
|
||||||
(Contrast, 0.1, 1.9), # 10
|
|
||||||
(Color, 0.1, 1.9), # 11
|
|
||||||
(Brightness, 0.1, 1.9), # 12
|
|
||||||
(Sharpness, 0.1, 1.9), # 13
|
|
||||||
(Posterize, 4, 8), # 9
|
|
||||||
(Solarize, 1, 256), # 8
|
|
||||||
|
|
||||||
# (Cutout, 0, 0.2), # 14
|
|
||||||
# (SamplePairing(imgs), 0, 0.4), # 15
|
|
||||||
]
|
|
||||||
|
|
||||||
# https://github.com/tensorflow/tpu/blob/8462d083dd89489a79e3200bcc8d4063bf362186/models/official/efficientnet/autoaugment.py#L505
|
|
||||||
#l = [
|
|
||||||
# (AutoContrast, 0, 1),
|
|
||||||
# (Equalize, 0, 1),
|
|
||||||
# (Invert, 0, 1),
|
|
||||||
# (Rotate, 0, 30),
|
|
||||||
# (Posterize, 0, 4),
|
|
||||||
# (Solarize, 0, 256),
|
|
||||||
# (SolarizeAdd, 0, 110),
|
|
||||||
# (Color, 0.1, 1.9),
|
|
||||||
# (Contrast, 0.1, 1.9),
|
|
||||||
# (Brightness, 0.1, 1.9),
|
|
||||||
# (Sharpness, 0.1, 1.9),
|
|
||||||
# (ShearX, 0., 0.3),
|
|
||||||
# (ShearY, 0., 0.3),
|
|
||||||
# (CutoutAbs, 0, 40),
|
|
||||||
# (TranslateXabs, 0., 100),
|
|
||||||
# (TranslateYabs, 0., 100),
|
|
||||||
#]
|
|
||||||
|
|
||||||
return l
|
|
||||||
|
|
||||||
|
|
||||||
class Lighting(object):
|
|
||||||
"""Lighting noise(AlexNet - style PCA - based noise)"""
|
|
||||||
|
|
||||||
def __init__(self, alphastd, eigval, eigvec):
|
|
||||||
self.alphastd = alphastd
|
|
||||||
self.eigval = torch.Tensor(eigval)
|
|
||||||
self.eigvec = torch.Tensor(eigvec)
|
|
||||||
|
|
||||||
def __call__(self, img):
|
|
||||||
if self.alphastd == 0:
|
|
||||||
return img
|
|
||||||
|
|
||||||
alpha = img.new().resize_(3).normal_(0, self.alphastd)
|
|
||||||
rgb = self.eigvec.type_as(img).clone() \
|
|
||||||
.mul(alpha.view(1, 3).expand(3, 3)) \
|
|
||||||
.mul(self.eigval.view(1, 3).expand(3, 3)) \
|
|
||||||
.sum(1).squeeze()
|
|
||||||
|
|
||||||
return img.add(rgb.view(3, 1, 1).expand_as(img))
|
|
||||||
|
|
||||||
|
|
||||||
class CutoutDefault(object):
|
|
||||||
"""
|
|
||||||
Reference : https://github.com/quark0/darts/blob/master/cnn/utils.py
|
|
||||||
"""
|
|
||||||
def __init__(self, length):
|
|
||||||
self.length = length
|
|
||||||
|
|
||||||
def __call__(self, img):
|
|
||||||
h, w = img.size(1), img.size(2)
|
|
||||||
mask = np.ones((h, w), np.float32)
|
|
||||||
y = np.random.randint(h)
|
|
||||||
x = np.random.randint(w)
|
|
||||||
|
|
||||||
y1 = np.clip(y - self.length // 2, 0, h)
|
|
||||||
y2 = np.clip(y + self.length // 2, 0, h)
|
|
||||||
x1 = np.clip(x - self.length // 2, 0, w)
|
|
||||||
x2 = np.clip(x + self.length // 2, 0, w)
|
|
||||||
|
|
||||||
mask[y1: y2, x1: x2] = 0.
|
|
||||||
mask = torch.from_numpy(mask)
|
|
||||||
mask = mask.expand_as(img)
|
|
||||||
img *= mask
|
|
||||||
return img
|
|
||||||
|
|
||||||
PARAMETER_MAX = 1
|
|
||||||
class RandAugment:
|
|
||||||
def __init__(self, n, m):
|
|
||||||
self.n = n
|
|
||||||
self.m = m # [0, PARAMETER_MAX]
|
|
||||||
self.augment_list = augment_list()
|
|
||||||
|
|
||||||
def __call__(self, img):
|
|
||||||
ops = random.choices(self.augment_list, k=self.n)
|
|
||||||
for op, minval, maxval in ops:
|
|
||||||
val = (float(self.m) / PARAMETER_MAX) * float(maxval - minval) + minval
|
|
||||||
img = op(img, val)
|
|
||||||
|
|
||||||
return img
|
|
|
@ -1,98 +0,0 @@
|
||||||
import torch
|
|
||||||
import numpy as np
|
|
||||||
import torchvision
|
|
||||||
from PIL import Image
|
|
||||||
from torch import topk
|
|
||||||
import torch.nn.functional as F
|
|
||||||
from torch import topk
|
|
||||||
import cv2
|
|
||||||
from torchvision import transforms
|
|
||||||
import os
|
|
||||||
|
|
||||||
class SaveFeatures():
|
|
||||||
features=None
|
|
||||||
def __init__(self, m): self.hook = m.register_forward_hook(self.hook_fn)
|
|
||||||
def hook_fn(self, module, input, output): self.features = ((output.cpu()).data).numpy()
|
|
||||||
def remove(self): self.hook.remove()
|
|
||||||
|
|
||||||
def getCAM(feature_conv, weight_fc, class_idx):
|
|
||||||
_, nc, h, w = feature_conv.shape
|
|
||||||
cam = weight_fc[class_idx].dot(feature_conv.reshape((nc, h*w)))
|
|
||||||
cam = cam.reshape(h, w)
|
|
||||||
cam = cam - np.min(cam)
|
|
||||||
cam_img = cam / np.max(cam)
|
|
||||||
# cam_img = np.uint8(255 * cam_img)
|
|
||||||
return cam_img
|
|
||||||
|
|
||||||
def main(cam):
|
|
||||||
device = 'cuda:0'
|
|
||||||
model_name = 'resnet50'
|
|
||||||
root = 'NEW_SS'
|
|
||||||
|
|
||||||
os.makedirs(os.path.join(root + '_CAM', 'OK'), exist_ok=True)
|
|
||||||
os.makedirs(os.path.join(root + '_CAM', 'NOK'), exist_ok=True)
|
|
||||||
|
|
||||||
train_transform = transforms.Compose([
|
|
||||||
transforms.ToTensor(),
|
|
||||||
])
|
|
||||||
|
|
||||||
dataset = torchvision.datasets.ImageFolder(
|
|
||||||
root=root, transform=train_transform,
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = torch.utils.data.DataLoader(dataset, batch_size=1)
|
|
||||||
|
|
||||||
model = torchvision.models.__dict__[model_name](pretrained=False)
|
|
||||||
model.fc = torch.nn.Linear(model.fc.in_features, 2)
|
|
||||||
|
|
||||||
model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
|
|
||||||
model = model.to(device)
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
weight_softmax_params = list(model._modules.get('fc').parameters())
|
|
||||||
weight_softmax = np.squeeze(weight_softmax_params[0].cpu().data.numpy())
|
|
||||||
|
|
||||||
final_layer = model._modules.get('layer4')
|
|
||||||
|
|
||||||
activated_features = SaveFeatures(final_layer)
|
|
||||||
|
|
||||||
for i, (img, target ) in enumerate(loader):
|
|
||||||
img = img.to(device)
|
|
||||||
prediction = model(img)
|
|
||||||
pred_probabilities = F.softmax(prediction, dim=1).data.squeeze()
|
|
||||||
class_idx = topk(pred_probabilities,1)[1].int()
|
|
||||||
# if target.item() != class_idx:
|
|
||||||
# print(dataset.imgs[i][0])
|
|
||||||
|
|
||||||
if cam:
|
|
||||||
overlay = getCAM(activated_features.features, weight_softmax, class_idx )
|
|
||||||
|
|
||||||
import ipdb; ipdb.set_trace()
|
|
||||||
import PIL
|
|
||||||
from torchvision.transforms import ToPILImage
|
|
||||||
|
|
||||||
img = ToPILImage()(overlay).resize(size=(1280, 1024), resample=PIL.Image.BILINEAR)
|
|
||||||
img.save('heat-pil.jpg')
|
|
||||||
|
|
||||||
|
|
||||||
img = cv2.imread(dataset.imgs[i][0])
|
|
||||||
height, width, _ = img.shape
|
|
||||||
overlay = cv2.resize(overlay, (width, height))
|
|
||||||
heatmap = cv2.applyColorMap(overlay, cv2.COLORMAP_JET)
|
|
||||||
cv2.imwrite('heat-cv2.jpg', heatmap)
|
|
||||||
|
|
||||||
img = cv2.imread(dataset.imgs[i][0])
|
|
||||||
height, width, _ = img.shape
|
|
||||||
overlay = cv2.resize(overlay, (width, height))
|
|
||||||
heatmap = cv2.applyColorMap(overlay, cv2.COLORMAP_JET)
|
|
||||||
result = heatmap * 0.3 + img * 0.5
|
|
||||||
|
|
||||||
clss = dataset.imgs[i][0].split(os.sep)[1]
|
|
||||||
name = dataset.imgs[i][0].split(os.sep)[2].split('.')[0]
|
|
||||||
cv2.imwrite(os.path.join(root+"_CAM", clss, name + '.jpg'), result)
|
|
||||||
print(f'{os.path.join(root+"_CAM", clss, name + ".jpg")} saved')
|
|
||||||
|
|
||||||
activated_features.remove()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main(cam=True)
|
|
Binary file not shown.
File diff suppressed because it is too large
Load diff
|
@ -1,314 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import json, math, time, os
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import copy
|
|
||||||
import gc
|
|
||||||
|
|
||||||
from torchviz import make_dot
|
|
||||||
|
|
||||||
import torch
|
|
||||||
import torch.nn.functional as F
|
|
||||||
|
|
||||||
import time
|
|
||||||
|
|
||||||
class timer():
|
|
||||||
def __init__(self):
|
|
||||||
self._start_time=time.time()
|
|
||||||
def exec_time(self):
|
|
||||||
end = time.time()
|
|
||||||
res = end-self._start_time
|
|
||||||
self._start_time=end
|
|
||||||
return res
|
|
||||||
|
|
||||||
def print_graph(PyTorch_obj, fig_name='graph'):
|
|
||||||
graph=make_dot(PyTorch_obj) #Loss give the whole graph
|
|
||||||
graph.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
|
|
||||||
graph.render(fig_name)
|
|
||||||
|
|
||||||
def plot_res(log, fig_name='res', param_names=None):
|
|
||||||
|
|
||||||
epochs = [x["epoch"] for x in log]
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
|
|
||||||
|
|
||||||
ax[0].set_title('Loss')
|
|
||||||
ax[0].plot(epochs,[x["train_loss"] for x in log], label='Train')
|
|
||||||
ax[0].plot(epochs,[x["val_loss"] for x in log], label='Val')
|
|
||||||
ax[0].legend()
|
|
||||||
|
|
||||||
ax[1].set_title('Acc')
|
|
||||||
ax[1].plot(epochs,[x["acc"] for x in log])
|
|
||||||
|
|
||||||
if log[0]["param"]!= None:
|
|
||||||
if isinstance(log[0]["param"],float):
|
|
||||||
ax[2].set_title('Mag')
|
|
||||||
ax[2].plot(epochs,[x["param"] for x in log], label='Mag')
|
|
||||||
ax[2].legend()
|
|
||||||
else :
|
|
||||||
ax[2].set_title('Prob')
|
|
||||||
#for idx, _ in enumerate(log[0]["param"]):
|
|
||||||
#ax[2].plot(epochs,[x["param"][idx] for x in log], label='P'+str(idx))
|
|
||||||
if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])]
|
|
||||||
proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])]
|
|
||||||
ax[2].stackplot(epochs, proba, labels=param_names)
|
|
||||||
ax[2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
|
|
||||||
|
|
||||||
|
|
||||||
fig_name = fig_name.replace('.',',')
|
|
||||||
plt.savefig(fig_name)
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def plot_resV2(log, fig_name='res', param_names=None):
|
|
||||||
|
|
||||||
epochs = [x["epoch"] for x in log]
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(30, 15))
|
|
||||||
|
|
||||||
ax[0, 0].set_title('Loss')
|
|
||||||
ax[0, 0].plot(epochs,[x["train_loss"] for x in log], label='Train')
|
|
||||||
ax[0, 0].plot(epochs,[x["val_loss"] for x in log], label='Val')
|
|
||||||
ax[0, 0].legend()
|
|
||||||
|
|
||||||
ax[1, 0].set_title('Acc')
|
|
||||||
ax[1, 0].plot(epochs,[x["acc"] for x in log])
|
|
||||||
|
|
||||||
if log[0]["param"]!= None:
|
|
||||||
if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])]
|
|
||||||
#proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])]
|
|
||||||
proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])]
|
|
||||||
mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])]
|
|
||||||
|
|
||||||
ax[0, 1].set_title('Prob =f(epoch)')
|
|
||||||
ax[0, 1].stackplot(epochs, proba, labels=param_names)
|
|
||||||
#ax[0, 1].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
|
|
||||||
|
|
||||||
ax[1, 1].set_title('Prob =f(TF)')
|
|
||||||
mean = np.mean(proba, axis=1)
|
|
||||||
std = np.std(proba, axis=1)
|
|
||||||
ax[1, 1].bar(param_names, mean, yerr=std)
|
|
||||||
plt.sca(ax[1, 1]), plt.xticks(rotation=90)
|
|
||||||
|
|
||||||
ax[0, 2].set_title('Mag =f(epoch)')
|
|
||||||
ax[0, 2].stackplot(epochs, mag, labels=param_names)
|
|
||||||
ax[0, 2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
|
|
||||||
|
|
||||||
ax[1, 2].set_title('Mag =f(TF)')
|
|
||||||
mean = np.mean(mag, axis=1)
|
|
||||||
std = np.std(mag, axis=1)
|
|
||||||
ax[1, 2].bar(param_names, mean, yerr=std)
|
|
||||||
plt.sca(ax[1, 2]), plt.xticks(rotation=90)
|
|
||||||
|
|
||||||
|
|
||||||
fig_name = fig_name.replace('.',',')
|
|
||||||
plt.savefig(fig_name, bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def plot_compare(filenames, fig_name='res'):
|
|
||||||
|
|
||||||
all_data=[]
|
|
||||||
legend=""
|
|
||||||
for idx, file in enumerate(filenames):
|
|
||||||
legend+=str(idx)+'-'+file+'\n'
|
|
||||||
with open(file) as json_file:
|
|
||||||
data = json.load(json_file)
|
|
||||||
all_data.append(data)
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
|
|
||||||
|
|
||||||
for data_idx, log in enumerate(all_data):
|
|
||||||
log=log['Log']
|
|
||||||
epochs = [x["epoch"] for x in log]
|
|
||||||
|
|
||||||
ax[0].plot(epochs,[x["train_loss"] for x in log], label=str(data_idx)+'-Train')
|
|
||||||
ax[0].plot(epochs,[x["val_loss"] for x in log], label=str(data_idx)+'-Val')
|
|
||||||
|
|
||||||
ax[1].plot(epochs,[x["acc"] for x in log], label=str(data_idx))
|
|
||||||
#ax[1].text(x=0.5,y=0,s=str(data_idx)+'-'+filenames[data_idx], transform=ax[1].transAxes)
|
|
||||||
|
|
||||||
if log[0]["param"]!= None:
|
|
||||||
if isinstance(log[0]["param"],float):
|
|
||||||
ax[2].plot(epochs,[x["param"] for x in log], label=str(data_idx)+'-Mag')
|
|
||||||
|
|
||||||
else :
|
|
||||||
for idx, _ in enumerate(log[0]["param"]):
|
|
||||||
ax[2].plot(epochs,[x["param"][idx] for x in log], label=str(data_idx)+'-P'+str(idx))
|
|
||||||
|
|
||||||
fig.suptitle(legend)
|
|
||||||
ax[0].set_title('Loss')
|
|
||||||
ax[1].set_title('Acc')
|
|
||||||
ax[2].set_title('Param')
|
|
||||||
for a in ax: a.legend()
|
|
||||||
|
|
||||||
fig_name = fig_name.replace('.',',')
|
|
||||||
plt.savefig(fig_name, bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def plot_res_compare(filenames, fig_name='res'):
|
|
||||||
|
|
||||||
all_data=[]
|
|
||||||
#legend=""
|
|
||||||
for idx, file in enumerate(filenames):
|
|
||||||
#legend+=str(idx)+'-'+file+'\n'
|
|
||||||
with open(file) as json_file:
|
|
||||||
data = json.load(json_file)
|
|
||||||
all_data.append(data)
|
|
||||||
|
|
||||||
n_tf = [len(x["Param_names"]) for x in all_data]
|
|
||||||
acc = [x["Accuracy"] for x in all_data]
|
|
||||||
time = [x["Time"][0] for x in all_data]
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
|
|
||||||
|
|
||||||
ax[0].plot(n_tf, acc)
|
|
||||||
ax[1].plot(n_tf, time)
|
|
||||||
|
|
||||||
ax[0].set_title('Acc')
|
|
||||||
ax[1].set_title('Time')
|
|
||||||
#for a in ax: a.legend()
|
|
||||||
|
|
||||||
fig_name = fig_name.replace('.',',')
|
|
||||||
plt.savefig(fig_name, bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def plot_TF_res(log, tf_names, fig_name='res'):
|
|
||||||
|
|
||||||
mean = np.mean([x["param"] for x in log], axis=0)
|
|
||||||
std = np.std([x["param"] for x in log], axis=0)
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(1, 1, figsize=(30, 8), sharey=True)
|
|
||||||
ax.bar(tf_names, mean, yerr=std)
|
|
||||||
#ax.bar(tf_names, log[-1]["param"])
|
|
||||||
|
|
||||||
fig_name = fig_name.replace('.',',')
|
|
||||||
plt.savefig(fig_name, bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def viz_sample_data(imgs, labels, fig_name='data_sample'):
|
|
||||||
|
|
||||||
sample = imgs[0:25,].permute(0, 2, 3, 1).squeeze().cpu()
|
|
||||||
|
|
||||||
plt.figure(figsize=(10,10))
|
|
||||||
for i in range(25):
|
|
||||||
plt.subplot(5,5,i+1)
|
|
||||||
plt.xticks([])
|
|
||||||
plt.yticks([])
|
|
||||||
plt.grid(False)
|
|
||||||
plt.imshow(sample[i,].detach().numpy(), cmap=plt.cm.binary)
|
|
||||||
plt.xlabel(labels[i].item())
|
|
||||||
|
|
||||||
plt.savefig(fig_name)
|
|
||||||
print("Sample saved :", fig_name)
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def model_copy(src,dst, patch_copy=True, copy_grad=True):
|
|
||||||
#model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats)
|
|
||||||
|
|
||||||
dst.load_state_dict(src.state_dict()) #Do not copy gradient !
|
|
||||||
|
|
||||||
if patch_copy:
|
|
||||||
dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ?
|
|
||||||
dst['data_aug'].load_state_dict(src['data_aug'].state_dict())
|
|
||||||
|
|
||||||
#Copie des gradients
|
|
||||||
if copy_grad:
|
|
||||||
for paramName, paramValue, in src.named_parameters():
|
|
||||||
for netCopyName, netCopyValue, in dst.named_parameters():
|
|
||||||
if paramName == netCopyName:
|
|
||||||
netCopyValue.grad = paramValue.grad
|
|
||||||
#netCopyValue=copy.deepcopy(paramValue)
|
|
||||||
|
|
||||||
try: #Data_augV4
|
|
||||||
dst['data_aug']._input_info = src['data_aug']._input_info
|
|
||||||
dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def optim_copy(dopt, opt):
|
|
||||||
|
|
||||||
#inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state...
|
|
||||||
#opt_param=higher.optim.get_trainable_opt_params(diffopt)
|
|
||||||
|
|
||||||
for group_idx, group in enumerate(opt.param_groups):
|
|
||||||
# print('gp idx',group_idx)
|
|
||||||
for p_idx, p in enumerate(group['params']):
|
|
||||||
opt.state[p]=dopt.state[group_idx][p_idx]
|
|
||||||
|
|
||||||
def print_torch_mem(add_info=''):
|
|
||||||
|
|
||||||
nb=0
|
|
||||||
max_size=0
|
|
||||||
for obj in gc.get_objects():
|
|
||||||
#print(type(obj))
|
|
||||||
try:
|
|
||||||
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
|
|
||||||
#print(i, type(obj), obj.size())
|
|
||||||
size = np.sum(obj.size())
|
|
||||||
if(size>max_size): max_size=size
|
|
||||||
nb+=1
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
|
|
||||||
|
|
||||||
#print(add_info, "-Garbage size :",len(gc.garbage))
|
|
||||||
|
|
||||||
"""Simple GPU memory report."""
|
|
||||||
|
|
||||||
mega_bytes = 1024.0 * 1024.0
|
|
||||||
string = add_info + ' memory (MB)'
|
|
||||||
string += ' | allocated: {}'.format(
|
|
||||||
torch.cuda.memory_allocated() / mega_bytes)
|
|
||||||
string += ' | max allocated: {}'.format(
|
|
||||||
torch.cuda.max_memory_allocated() / mega_bytes)
|
|
||||||
string += ' | cached: {}'.format(torch.cuda.memory_cached() / mega_bytes)
|
|
||||||
string += ' | max cached: {}'.format(
|
|
||||||
torch.cuda.max_memory_cached()/ mega_bytes)
|
|
||||||
print(string)
|
|
||||||
|
|
||||||
def plot_TF_influence(log, fig_name='TF_influence', param_names=None):
|
|
||||||
proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])]
|
|
||||||
mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])]
|
|
||||||
|
|
||||||
plt.figure()
|
|
||||||
|
|
||||||
mean = np.mean(proba, axis=1)*np.mean(mag, axis=1) #Pourrait etre interessant de multiplier avant le mean
|
|
||||||
std = np.std(proba, axis=1)*np.std(mag, axis=1)
|
|
||||||
plt.bar(param_names, mean, yerr=std)
|
|
||||||
|
|
||||||
plt.xticks(rotation=90)
|
|
||||||
fig_name = fig_name.replace('.',',')
|
|
||||||
plt.savefig(fig_name, bbox_inches='tight')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
class loss_monitor(): #Voir https://github.com/pytorch/ignite
|
|
||||||
def __init__(self, patience, end_train=1):
|
|
||||||
self.patience = patience
|
|
||||||
self.end_train = end_train
|
|
||||||
self.counter = 0
|
|
||||||
self.best_score = None
|
|
||||||
self.reached_limit = 0
|
|
||||||
|
|
||||||
def register(self, loss):
|
|
||||||
if self.best_score is None:
|
|
||||||
self.best_score = loss
|
|
||||||
elif loss > self.best_score:
|
|
||||||
self.counter += 1
|
|
||||||
#if not self.reached_limit:
|
|
||||||
print("loss no improve counter", self.counter, self.reached_limit)
|
|
||||||
else:
|
|
||||||
self.best_score = loss
|
|
||||||
self.counter = 0
|
|
||||||
def limit_reached(self):
|
|
||||||
if self.counter >= self.patience:
|
|
||||||
self.counter = 0
|
|
||||||
self.reached_limit +=1
|
|
||||||
self.best_score = None
|
|
||||||
return self.reached_limit
|
|
||||||
|
|
||||||
def end_training(self):
|
|
||||||
if self.limit_reached() >= self.end_train:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.__init__(self.patience, self.end_train)
|
|
|
@ -1,102 +0,0 @@
|
||||||
import torch
|
|
||||||
import numpy as np
|
|
||||||
import torchvision
|
|
||||||
from PIL import Image
|
|
||||||
from torch import topk
|
|
||||||
from torch import nn
|
|
||||||
import torch.nn.functional as F
|
|
||||||
from torch import topk
|
|
||||||
import cv2
|
|
||||||
from torchvision import transforms
|
|
||||||
import os
|
|
||||||
|
|
||||||
class Lambda(nn.Module):
|
|
||||||
"Create a layer that simply calls `func` with `x`"
|
|
||||||
def __init__(self, func):
|
|
||||||
super().__init__()
|
|
||||||
self.func=func
|
|
||||||
def forward(self, x): return self.func(x)
|
|
||||||
|
|
||||||
class SaveFeatures():
|
|
||||||
activations, gradients = None, None
|
|
||||||
def __init__(self, m):
|
|
||||||
self.forward = m.register_forward_hook(self.forward_hook_fn)
|
|
||||||
self.backward = m.register_backward_hook(self.backward_hook_fn)
|
|
||||||
|
|
||||||
def forward_hook_fn(self, module, input, output):
|
|
||||||
self.activations = output.cpu().detach()
|
|
||||||
|
|
||||||
def backward_hook_fn(self, module, grad_input, grad_output):
|
|
||||||
self.gradients = grad_output[0].cpu().detach()
|
|
||||||
|
|
||||||
def remove(self):
|
|
||||||
self.forward.remove()
|
|
||||||
self.backward.remove()
|
|
||||||
|
|
||||||
def main(cam):
|
|
||||||
device = 'cuda:0'
|
|
||||||
model_name = 'resnet50'
|
|
||||||
root = '/mnt/md0/data/cifar10/tmp/cifar/train'
|
|
||||||
_root = 'cifar'
|
|
||||||
|
|
||||||
os.makedirs(os.path.join(_root + '_CAM'), exist_ok=True)
|
|
||||||
os.makedirs(os.path.join(_root + '_CAM'), exist_ok=True)
|
|
||||||
|
|
||||||
train_transform = transforms.Compose([
|
|
||||||
transforms.ToTensor(),
|
|
||||||
])
|
|
||||||
|
|
||||||
dataset = torchvision.datasets.ImageFolder(
|
|
||||||
root=root, transform=train_transform,
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = torch.utils.data.DataLoader(dataset, batch_size=1)
|
|
||||||
model = torchvision.models.__dict__[model_name](pretrained=True)
|
|
||||||
flat = list(model.children())
|
|
||||||
body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(loader.dataset.classes)))
|
|
||||||
model = nn.Sequential(body, head)
|
|
||||||
|
|
||||||
model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
|
|
||||||
model = model.to(device)
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
activated_features = SaveFeatures(model[0])
|
|
||||||
|
|
||||||
for i, (img, target ) in enumerate(loader):
|
|
||||||
img = img.to(device)
|
|
||||||
pred = model(img)
|
|
||||||
import ipdb; ipdb.set_trace()
|
|
||||||
# get the gradient of the output with respect to the parameters of the model
|
|
||||||
pred[:, target.item()].backward()
|
|
||||||
|
|
||||||
# import ipdb; ipdb.set_trace()
|
|
||||||
# pull the gradients out of the model
|
|
||||||
gradients = activated_features.gradients[0]
|
|
||||||
|
|
||||||
pooled_gradients = gradients.mean(1).mean(1)
|
|
||||||
|
|
||||||
# get the activations of the last convolutional layer
|
|
||||||
activations = activated_features.activations[0]
|
|
||||||
|
|
||||||
heatmap = F.relu(((activations*pooled_gradients[...,None,None])).sum(0))
|
|
||||||
heatmap /= torch.max(heatmap)
|
|
||||||
|
|
||||||
heatmap = heatmap.numpy()
|
|
||||||
|
|
||||||
|
|
||||||
image = cv2.imread(dataset.imgs[i][0])
|
|
||||||
heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]))
|
|
||||||
heatmap = np.uint8(255 * heatmap)
|
|
||||||
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
|
|
||||||
# superimposed_img = heatmap * 0.3 + image * 0.5
|
|
||||||
superimposed_img = heatmap
|
|
||||||
|
|
||||||
clss = dataset.imgs[i][0].split(os.sep)[1]
|
|
||||||
name = dataset.imgs[i][0].split(os.sep)[2].split('.')[0]
|
|
||||||
cv2.imwrite(os.path.join(_root+"_CAM", name + '.jpg'), superimposed_img)
|
|
||||||
print(f'{os.path.join(_root+"_CAM", name + ".jpg")} saved')
|
|
||||||
|
|
||||||
activated_features.remove()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main(cam=True)
|
|
|
@ -1,382 +0,0 @@
|
||||||
import datetime
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import torch
|
|
||||||
import torch.utils.data
|
|
||||||
from torch import nn
|
|
||||||
import torchvision
|
|
||||||
from torchvision import transforms
|
|
||||||
from PIL import ImageEnhance
|
|
||||||
import random
|
|
||||||
|
|
||||||
import utils
|
|
||||||
from fastprogress import master_bar, progress_bar
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
## DATA AUG ##
|
|
||||||
import higher
|
|
||||||
from dataug import *
|
|
||||||
from dataug_utils import *
|
|
||||||
tf_names = [
|
|
||||||
## Geometric TF ##
|
|
||||||
'Identity',
|
|
||||||
'FlipUD',
|
|
||||||
'FlipLR',
|
|
||||||
'Rotate',
|
|
||||||
'TranslateX',
|
|
||||||
'TranslateY',
|
|
||||||
'ShearX',
|
|
||||||
'ShearY',
|
|
||||||
|
|
||||||
## Color TF (Expect image in the range of [0, 1]) ##
|
|
||||||
#'Contrast',
|
|
||||||
#'Color',
|
|
||||||
#'Brightness',
|
|
||||||
#'Sharpness',
|
|
||||||
#'Posterize',
|
|
||||||
#'Solarize', #=>Image entre [0,1] #Pas opti pour des batch
|
|
||||||
]
|
|
||||||
|
|
||||||
class Lambda(nn.Module):
|
|
||||||
"Create a layer that simply calls `func` with `x`"
|
|
||||||
def __init__(self, func):
|
|
||||||
super().__init__()
|
|
||||||
self.func=func
|
|
||||||
def forward(self, x): return self.func(x)
|
|
||||||
|
|
||||||
class SubsetSampler(torch.utils.data.SubsetRandomSampler):
|
|
||||||
def __init__(self, indices):
|
|
||||||
super().__init__(indices)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return (self.indices[i] for i in range(len(self.indices)))
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.indices)
|
|
||||||
|
|
||||||
def sharpness(img, factor):
|
|
||||||
sharpness_factor = random.uniform(1, factor)
|
|
||||||
sharp = ImageEnhance.Sharpness(img)
|
|
||||||
sharped = sharp.enhance(sharpness_factor)
|
|
||||||
return sharped
|
|
||||||
|
|
||||||
def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, master_bar, Kldiv=False):
|
|
||||||
model.train()
|
|
||||||
metric_logger = utils.MetricLogger(delimiter=" ")
|
|
||||||
confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
|
|
||||||
header = 'Epoch: {}'.format(epoch)
|
|
||||||
for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=master_bar):
|
|
||||||
|
|
||||||
image, target = image.to(device), target.to(device)
|
|
||||||
|
|
||||||
if not Kldiv :
|
|
||||||
output = model(image)
|
|
||||||
#output = F.log_softmax(output, dim=1)
|
|
||||||
loss = criterion(output, target) #Pas de softmax ?
|
|
||||||
|
|
||||||
else : #Consume x2 memory
|
|
||||||
model.augment(mode=False)
|
|
||||||
output = model(image)
|
|
||||||
model.augment(mode=True)
|
|
||||||
log_sup=F.log_softmax(output, dim=1)
|
|
||||||
sup_loss = F.cross_entropy(log_sup, target)
|
|
||||||
|
|
||||||
aug_output = model(image)
|
|
||||||
log_aug=F.log_softmax(aug_output, dim=1)
|
|
||||||
aug_loss=F.cross_entropy(log_aug, target)
|
|
||||||
|
|
||||||
#KL div w/ logits - Similarite predictions (distributions)
|
|
||||||
KL_loss = F.softmax(output, dim=1)*(log_sup-log_aug)
|
|
||||||
KL_loss = KL_loss.sum(dim=-1)
|
|
||||||
#KL_loss = F.kl_div(aug_logits, sup_logits, reduction='none')
|
|
||||||
KL_loss = KL_loss.mean()
|
|
||||||
|
|
||||||
unsupp_coeff = 1
|
|
||||||
loss = sup_loss + (aug_loss + KL_loss) * unsupp_coeff
|
|
||||||
#print(sup_loss.item(), (aug_loss + KL_loss).item())
|
|
||||||
|
|
||||||
optimizer.zero_grad()
|
|
||||||
loss.backward()
|
|
||||||
optimizer.step()
|
|
||||||
|
|
||||||
acc1 = utils.accuracy(output, target)[0]
|
|
||||||
batch_size = image.shape[0]
|
|
||||||
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
|
|
||||||
metric_logger.update(loss=loss.item())
|
|
||||||
|
|
||||||
confmat.update(target.flatten(), output.argmax(1).flatten())
|
|
||||||
|
|
||||||
|
|
||||||
return metric_logger.loss.global_avg, confmat
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate(model, criterion, data_loader, device):
|
|
||||||
model.eval()
|
|
||||||
metric_logger = utils.MetricLogger(delimiter=" ")
|
|
||||||
confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
|
|
||||||
header = 'Test:'
|
|
||||||
missed = []
|
|
||||||
with torch.no_grad():
|
|
||||||
for i, (image, target) in metric_logger.log_every(data_loader, leave=False, header=header, parent=None):
|
|
||||||
image, target = image.to(device), target.to(device)
|
|
||||||
output = model(image)
|
|
||||||
loss = criterion(output, target)
|
|
||||||
if target.item() != output.topk(1)[1].item():
|
|
||||||
missed.append(data_loader.dataset.imgs[data_loader.sampler.indices[i]])
|
|
||||||
|
|
||||||
confmat.update(target.flatten(), output.argmax(1).flatten())
|
|
||||||
|
|
||||||
acc1 = utils.accuracy(output, target)[0]
|
|
||||||
batch_size = image.shape[0]
|
|
||||||
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
|
|
||||||
metric_logger.update(loss=loss.item())
|
|
||||||
|
|
||||||
|
|
||||||
return metric_logger.loss.global_avg, missed, confmat
|
|
||||||
|
|
||||||
def get_train_valid_loader(args, augment, random_seed, valid_size=0.1, shuffle=True, num_workers=4, pin_memory=True):
|
|
||||||
"""
|
|
||||||
Utility function for loading and returning train and valid
|
|
||||||
multi-process iterators over the CIFAR-10 dataset. A sample
|
|
||||||
9x9 grid of the images can be optionally displayed.
|
|
||||||
If using CUDA, num_workers should be set to 1 and pin_memory to True.
|
|
||||||
Params
|
|
||||||
------
|
|
||||||
- data_dir: path directory to the dataset.
|
|
||||||
- batch_size: how many samples per batch to load.
|
|
||||||
- augment: whether to apply the data augmentation scheme
|
|
||||||
mentioned in the paper. Only applied on the train split.
|
|
||||||
- random_seed: fix seed for reproducibility.
|
|
||||||
- valid_size: percentage split of the training set used for
|
|
||||||
the validation set. Should be a float in the range [0, 1].
|
|
||||||
- shuffle: whether to shuffle the train/validation indices.
|
|
||||||
- show_sample: plot 9x9 sample grid of the dataset.
|
|
||||||
- num_workers: number of subprocesses to use when loading the dataset.
|
|
||||||
- pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
|
|
||||||
True if using GPU.
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
- train_loader: training set iterator.
|
|
||||||
- valid_loader: validation set iterator.
|
|
||||||
"""
|
|
||||||
error_msg = "[!] valid_size should be in the range [0, 1]."
|
|
||||||
assert ((valid_size >= 0) and (valid_size <= 1)), error_msg
|
|
||||||
|
|
||||||
# normalize = transforms.Normalize(
|
|
||||||
# mean=[0.4914, 0.4822, 0.4465],
|
|
||||||
# std=[0.2023, 0.1994, 0.2010],
|
|
||||||
# )
|
|
||||||
|
|
||||||
# define transforms
|
|
||||||
if augment:
|
|
||||||
train_transform = transforms.Compose([
|
|
||||||
# transforms.ColorJitter(brightness=0.3),
|
|
||||||
# transforms.Lambda(lambda img: sharpness(img, 5)),
|
|
||||||
transforms.RandomHorizontalFlip(),
|
|
||||||
transforms.ToTensor(),
|
|
||||||
# normalize,
|
|
||||||
])
|
|
||||||
|
|
||||||
valid_transform = transforms.Compose([
|
|
||||||
# transforms.ColorJitter(brightness=0.3),
|
|
||||||
# transforms.RandomHorizontalFlip(),
|
|
||||||
transforms.ToTensor(),
|
|
||||||
# normalize,
|
|
||||||
])
|
|
||||||
else:
|
|
||||||
train_transform = transforms.Compose([
|
|
||||||
transforms.ToTensor(),
|
|
||||||
# normalize,
|
|
||||||
])
|
|
||||||
|
|
||||||
valid_transform = transforms.Compose([
|
|
||||||
transforms.ToTensor(),
|
|
||||||
# normalize,
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
# load the dataset
|
|
||||||
train_dataset = torchvision.datasets.ImageFolder(
|
|
||||||
root=args.data_path, transform=train_transform
|
|
||||||
)
|
|
||||||
|
|
||||||
valid_dataset = torchvision.datasets.ImageFolder(
|
|
||||||
root=args.data_path, transform=valid_transform
|
|
||||||
)
|
|
||||||
|
|
||||||
num_train = len(train_dataset)
|
|
||||||
indices = list(range(num_train))
|
|
||||||
split = int(np.floor(valid_size * num_train))
|
|
||||||
|
|
||||||
if shuffle:
|
|
||||||
np.random.seed(random_seed)
|
|
||||||
np.random.shuffle(indices)
|
|
||||||
|
|
||||||
train_idx, valid_idx = indices[split:], indices[:split]
|
|
||||||
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx) if not args.test_only else SubsetSampler(train_idx)
|
|
||||||
valid_sampler = SubsetSampler(valid_idx)
|
|
||||||
|
|
||||||
train_loader = torch.utils.data.DataLoader(
|
|
||||||
train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=train_sampler,
|
|
||||||
num_workers=num_workers, pin_memory=pin_memory,
|
|
||||||
)
|
|
||||||
valid_loader = torch.utils.data.DataLoader(
|
|
||||||
valid_dataset, batch_size=1, sampler=valid_sampler,
|
|
||||||
num_workers=num_workers, pin_memory=pin_memory,
|
|
||||||
)
|
|
||||||
|
|
||||||
imgs = np.asarray(train_dataset.imgs)
|
|
||||||
|
|
||||||
# print('Train')
|
|
||||||
# print(imgs[train_idx])
|
|
||||||
#print('Valid')
|
|
||||||
#print(imgs[valid_idx])
|
|
||||||
|
|
||||||
tgt = [0,0]
|
|
||||||
for _, targets in train_loader:
|
|
||||||
for target in targets:
|
|
||||||
tgt[target]+=1
|
|
||||||
print("Train targets :", tgt)
|
|
||||||
|
|
||||||
tgt = [0,0]
|
|
||||||
for _, targets in valid_loader:
|
|
||||||
for target in targets:
|
|
||||||
tgt[target]+=1
|
|
||||||
print("Valid targets :", tgt)
|
|
||||||
|
|
||||||
return (train_loader, valid_loader)
|
|
||||||
|
|
||||||
def main(args):
|
|
||||||
print(args)
|
|
||||||
|
|
||||||
device = torch.device(args.device)
|
|
||||||
|
|
||||||
torch.backends.cudnn.benchmark = True
|
|
||||||
|
|
||||||
|
|
||||||
#augment = True if not args.test_only else False
|
|
||||||
|
|
||||||
if not args.test_only and args.augment=='flip' : augment = True
|
|
||||||
else : augment = False
|
|
||||||
|
|
||||||
print("Augment", augment)
|
|
||||||
data_loader, data_loader_test = get_train_valid_loader(args=args, pin_memory=True, augment=augment,
|
|
||||||
num_workers=args.workers, valid_size=0.3, random_seed=999)
|
|
||||||
|
|
||||||
print("Creating model")
|
|
||||||
model = torchvision.models.__dict__[args.model](pretrained=True)
|
|
||||||
flat = list(model.children())
|
|
||||||
|
|
||||||
body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(data_loader.dataset.classes)))
|
|
||||||
model = nn.Sequential(body, head)
|
|
||||||
|
|
||||||
Kldiv=False
|
|
||||||
if not args.test_only and (args.augment=='Rand' or args.augment=='RandKL'):
|
|
||||||
tf_dict = {k: TF.TF_dict[k] for k in tf_names}
|
|
||||||
model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
|
|
||||||
|
|
||||||
if args.augment=='RandKL': Kldiv=True
|
|
||||||
|
|
||||||
model['data_aug']['mag'].data = model['data_aug']['mag'].data * args.magnitude
|
|
||||||
print("Augmodel")
|
|
||||||
|
|
||||||
# model.fc = nn.Linear(model.fc.in_features, 2)
|
|
||||||
# import ipdb; ipdb.set_trace()
|
|
||||||
|
|
||||||
criterion = nn.CrossEntropyLoss().to(device)
|
|
||||||
|
|
||||||
# optimizer = torch.optim.SGD(
|
|
||||||
# model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
|
|
||||||
|
|
||||||
optimizer = torch.optim.Adam(
|
|
||||||
model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
|
|
||||||
|
|
||||||
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
|
|
||||||
optimizer,
|
|
||||||
lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9)
|
|
||||||
|
|
||||||
es = utils.EarlyStopping() if not (args.augment=='Rand' or args.augment=='RandKL') else utils.EarlyStopping(augmented_model=True)
|
|
||||||
|
|
||||||
if args.test_only:
|
|
||||||
model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
|
|
||||||
model = model.to(device)
|
|
||||||
print('TEST')
|
|
||||||
_, missed, _ = evaluate(model, criterion, data_loader_test, device=device)
|
|
||||||
print(missed)
|
|
||||||
print('TRAIN')
|
|
||||||
_, missed, _ = evaluate(model, criterion, data_loader, device=device)
|
|
||||||
print(missed)
|
|
||||||
return
|
|
||||||
|
|
||||||
model = model.to(device)
|
|
||||||
|
|
||||||
print("Start training")
|
|
||||||
start_time = time.time()
|
|
||||||
mb = master_bar(range(args.epochs))
|
|
||||||
|
|
||||||
for epoch in mb:
|
|
||||||
_, train_confmat = train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, mb, Kldiv)
|
|
||||||
lr_scheduler.step( (epoch+1)*len(data_loader) )
|
|
||||||
val_loss, _, valid_confmat = evaluate(model, criterion, data_loader_test, device=device)
|
|
||||||
es(val_loss, model)
|
|
||||||
|
|
||||||
# print('Valid Missed')
|
|
||||||
# print(valid_missed)
|
|
||||||
|
|
||||||
# print('Train')
|
|
||||||
# print(train_confmat)
|
|
||||||
#print('Valid')
|
|
||||||
#print(valid_confmat)
|
|
||||||
|
|
||||||
# if es.early_stop:
|
|
||||||
# break
|
|
||||||
|
|
||||||
total_time = time.time() - start_time
|
|
||||||
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
|
||||||
print('Training time {}'.format(total_time_str))
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
import argparse
|
|
||||||
parser = argparse.ArgumentParser(description='PyTorch Classification Training')
|
|
||||||
|
|
||||||
parser.add_argument('--data-path', default='/github/smart_augmentation/salvador/data', help='dataset')
|
|
||||||
parser.add_argument('--model', default='resnet18', help='model') #'resnet18'
|
|
||||||
parser.add_argument('--device', default='cuda:0', help='device')
|
|
||||||
parser.add_argument('-b', '--batch-size', default=8, type=int)
|
|
||||||
parser.add_argument('--epochs', default=3, type=int, metavar='N',
|
|
||||||
help='number of total epochs to run')
|
|
||||||
parser.add_argument('-j', '--workers', default=0, type=int, metavar='N',
|
|
||||||
help='number of data loading workers (default: 16)')
|
|
||||||
parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate')
|
|
||||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
|
||||||
help='momentum')
|
|
||||||
parser.add_argument('--wd', '--weight-decay', default=4e-5, type=float,
|
|
||||||
metavar='W', help='weight decay (default: 1e-4)',
|
|
||||||
dest='weight_decay')
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--test-only",
|
|
||||||
dest="test_only",
|
|
||||||
help="Only test the model",
|
|
||||||
action="store_true",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument('-a', '--augment', default='None', type=str,
|
|
||||||
metavar='N', help='Data augment',
|
|
||||||
dest='augment')
|
|
||||||
parser.add_argument('-m', '--magnitude', default=1.0, type=float,
|
|
||||||
metavar='N', help='Augmentation magnitude',
|
|
||||||
dest='magnitude')
|
|
||||||
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
return args
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
args = parse_args()
|
|
||||||
main(args)
|
|
|
@ -1,585 +0,0 @@
|
||||||
import datetime
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import torch
|
|
||||||
import torch.utils.data
|
|
||||||
from torch import nn
|
|
||||||
import torchvision
|
|
||||||
from torchvision import transforms
|
|
||||||
from PIL import ImageEnhance
|
|
||||||
import random
|
|
||||||
|
|
||||||
import utils
|
|
||||||
from fastprogress import master_bar, progress_bar
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
|
|
||||||
## DATA AUG ##
|
|
||||||
import higher
|
|
||||||
from dataug import *
|
|
||||||
from dataug_utils import *
|
|
||||||
tf_names = [
|
|
||||||
## Geometric TF ##
|
|
||||||
'Identity',
|
|
||||||
'FlipUD',
|
|
||||||
'FlipLR',
|
|
||||||
'Rotate',
|
|
||||||
'TranslateX',
|
|
||||||
'TranslateY',
|
|
||||||
'ShearX',
|
|
||||||
'ShearY',
|
|
||||||
|
|
||||||
## Color TF (Expect image in the range of [0, 1]) ##
|
|
||||||
'Contrast',
|
|
||||||
'Color',
|
|
||||||
'Brightness',
|
|
||||||
'Sharpness',
|
|
||||||
'Posterize',
|
|
||||||
'Solarize', #=>Image entre [0,1] #Pas opti pour des batch
|
|
||||||
]
|
|
||||||
|
|
||||||
def compute_vaLoss(model, dl_it, dl):
|
|
||||||
device = next(model.parameters()).device
|
|
||||||
try:
|
|
||||||
xs, ys = next(dl_it)
|
|
||||||
except StopIteration: #Fin epoch val
|
|
||||||
dl_it = iter(dl)
|
|
||||||
xs, ys = next(dl_it)
|
|
||||||
xs, ys = xs.to(device), ys.to(device)
|
|
||||||
|
|
||||||
model.eval() #Validation sans transfornations !
|
|
||||||
|
|
||||||
return F.cross_entropy(model(xs), ys)
|
|
||||||
|
|
||||||
def model_copy(src,dst, patch_copy=True, copy_grad=True):
|
|
||||||
#model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats)
|
|
||||||
|
|
||||||
dst.load_state_dict(src.state_dict()) #Do not copy gradient !
|
|
||||||
|
|
||||||
if patch_copy:
|
|
||||||
dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ?
|
|
||||||
dst['data_aug'].load_state_dict(src['data_aug'].state_dict())
|
|
||||||
|
|
||||||
#Copie des gradients
|
|
||||||
if copy_grad:
|
|
||||||
for paramName, paramValue, in src.named_parameters():
|
|
||||||
for netCopyName, netCopyValue, in dst.named_parameters():
|
|
||||||
if paramName == netCopyName:
|
|
||||||
netCopyValue.grad = paramValue.grad
|
|
||||||
#netCopyValue=copy.deepcopy(paramValue)
|
|
||||||
|
|
||||||
try: #Data_augV4
|
|
||||||
dst['data_aug']._input_info = src['data_aug']._input_info
|
|
||||||
dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def optim_copy(dopt, opt):
|
|
||||||
|
|
||||||
#inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state...
|
|
||||||
#opt_param=higher.optim.get_trainable_opt_params(diffopt)
|
|
||||||
|
|
||||||
for group_idx, group in enumerate(opt.param_groups):
|
|
||||||
# print('gp idx',group_idx)
|
|
||||||
for p_idx, p in enumerate(group['params']):
|
|
||||||
opt.state[p]=dopt.state[group_idx][p_idx]
|
|
||||||
|
|
||||||
|
|
||||||
#############
|
|
||||||
|
|
||||||
class Lambda(nn.Module):
|
|
||||||
"Create a layer that simply calls `func` with `x`"
|
|
||||||
def __init__(self, func):
|
|
||||||
super().__init__()
|
|
||||||
self.func=func
|
|
||||||
def forward(self, x): return self.func(x)
|
|
||||||
|
|
||||||
class SubsetSampler(torch.utils.data.SubsetRandomSampler):
|
|
||||||
def __init__(self, indices):
|
|
||||||
super().__init__(indices)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return (self.indices[i] for i in range(len(self.indices)))
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.indices)
|
|
||||||
|
|
||||||
def sharpness(img, factor):
|
|
||||||
sharpness_factor = random.uniform(1, factor)
|
|
||||||
sharp = ImageEnhance.Sharpness(img)
|
|
||||||
sharped = sharp.enhance(sharpness_factor)
|
|
||||||
return sharped
|
|
||||||
|
|
||||||
def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, master_bar):
|
|
||||||
model.train()
|
|
||||||
metric_logger = utils.MetricLogger(delimiter=" ")
|
|
||||||
confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
|
|
||||||
header = 'Epoch: {}'.format(epoch)
|
|
||||||
for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=master_bar):
|
|
||||||
|
|
||||||
image, target = image.to(device), target.to(device)
|
|
||||||
output = model(image)
|
|
||||||
loss = criterion(output, target)
|
|
||||||
|
|
||||||
optimizer.zero_grad()
|
|
||||||
loss.backward()
|
|
||||||
optimizer.step()
|
|
||||||
|
|
||||||
acc1 = utils.accuracy(output, target)[0]
|
|
||||||
batch_size = image.shape[0]
|
|
||||||
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
|
|
||||||
metric_logger.update(loss=loss.item())
|
|
||||||
|
|
||||||
confmat.update(target.flatten(), output.argmax(1).flatten())
|
|
||||||
|
|
||||||
|
|
||||||
return metric_logger.loss.global_avg, confmat
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate(model, criterion, data_loader, device):
|
|
||||||
model.eval()
|
|
||||||
metric_logger = utils.MetricLogger(delimiter=" ")
|
|
||||||
confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
|
|
||||||
header = 'Test:'
|
|
||||||
missed = []
|
|
||||||
with torch.no_grad():
|
|
||||||
for i, (image, target) in metric_logger.log_every(data_loader, leave=False, header=header, parent=None):
|
|
||||||
image, target = image.to(device), target.to(device)
|
|
||||||
output = model(image)
|
|
||||||
loss = criterion(output, target)
|
|
||||||
if target.item() != output.topk(1)[1].item():
|
|
||||||
missed.append(data_loader.dataset.imgs[data_loader.sampler.indices[i]])
|
|
||||||
|
|
||||||
confmat.update(target.flatten(), output.argmax(1).flatten())
|
|
||||||
|
|
||||||
acc1 = utils.accuracy(output, target)[0]
|
|
||||||
batch_size = image.shape[0]
|
|
||||||
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
|
|
||||||
metric_logger.update(loss=loss.item())
|
|
||||||
|
|
||||||
|
|
||||||
return metric_logger.loss.global_avg, missed, confmat
|
|
||||||
|
|
||||||
def get_train_valid_loader(args, augment, random_seed, train_size=0.5, test_size=0.1, shuffle=True, num_workers=4, pin_memory=True):
|
|
||||||
"""
|
|
||||||
Utility function for loading and returning train and valid
|
|
||||||
multi-process iterators over the CIFAR-10 dataset. A sample
|
|
||||||
9x9 grid of the images can be optionally displayed.
|
|
||||||
If using CUDA, num_workers should be set to 1 and pin_memory to True.
|
|
||||||
Params
|
|
||||||
------
|
|
||||||
- data_dir: path directory to the dataset.
|
|
||||||
- batch_size: how many samples per batch to load.
|
|
||||||
- augment: whether to apply the data augmentation scheme
|
|
||||||
mentioned in the paper. Only applied on the train split.
|
|
||||||
- random_seed: fix seed for reproducibility.
|
|
||||||
- valid_size: percentage split of the training set used for
|
|
||||||
the validation set. Should be a float in the range [0, 1].
|
|
||||||
- shuffle: whether to shuffle the train/validation indices.
|
|
||||||
- show_sample: plot 9x9 sample grid of the dataset.
|
|
||||||
- num_workers: number of subprocesses to use when loading the dataset.
|
|
||||||
- pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
|
|
||||||
True if using GPU.
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
- train_loader: training set iterator.
|
|
||||||
- valid_loader: validation set iterator.
|
|
||||||
"""
|
|
||||||
error_msg = "[!] test_size should be in the range [0, 1]."
|
|
||||||
assert ((test_size >= 0) and (test_size <= 1)), error_msg
|
|
||||||
|
|
||||||
# normalize = transforms.Normalize(
|
|
||||||
# mean=[0.4914, 0.4822, 0.4465],
|
|
||||||
# std=[0.2023, 0.1994, 0.2010],
|
|
||||||
# )
|
|
||||||
|
|
||||||
# define transforms
|
|
||||||
if augment:
|
|
||||||
train_transform = transforms.Compose([
|
|
||||||
# transforms.ColorJitter(brightness=0.3),
|
|
||||||
# transforms.Lambda(lambda img: sharpness(img, 5)),
|
|
||||||
transforms.RandomHorizontalFlip(),
|
|
||||||
transforms.ToTensor(),
|
|
||||||
# normalize,
|
|
||||||
])
|
|
||||||
|
|
||||||
valid_transform = transforms.Compose([
|
|
||||||
# transforms.ColorJitter(brightness=0.3),
|
|
||||||
# transforms.RandomHorizontalFlip(),
|
|
||||||
transforms.ToTensor(),
|
|
||||||
# normalize,
|
|
||||||
])
|
|
||||||
else:
|
|
||||||
train_transform = transforms.Compose([
|
|
||||||
transforms.ToTensor(),
|
|
||||||
# normalize,
|
|
||||||
])
|
|
||||||
|
|
||||||
valid_transform = transforms.Compose([
|
|
||||||
transforms.ToTensor(),
|
|
||||||
# normalize,
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
# load the dataset
|
|
||||||
train_dataset = torchvision.datasets.ImageFolder(
|
|
||||||
root=args.data_path, transform=train_transform
|
|
||||||
)
|
|
||||||
|
|
||||||
test_dataset = torchvision.datasets.ImageFolder(
|
|
||||||
root=args.data_path, transform=valid_transform
|
|
||||||
)
|
|
||||||
|
|
||||||
num_train = len(train_dataset)
|
|
||||||
indices = list(range(num_train))
|
|
||||||
split = int(np.floor(test_size * num_train))
|
|
||||||
|
|
||||||
if shuffle:
|
|
||||||
np.random.seed(random_seed)
|
|
||||||
np.random.shuffle(indices)
|
|
||||||
|
|
||||||
train_idx, test_idx = indices[split:], indices[:split]
|
|
||||||
train_idx, valid_idx = train_idx[:int(len(train_idx)*train_size)], train_idx[int(len(train_idx)*train_size):]
|
|
||||||
print("\nTrain", len(train_idx), "\nValid", len(valid_idx), "\nTest", len(test_idx))
|
|
||||||
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx) if not args.test_only else SubsetSampler(train_idx)
|
|
||||||
valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx) if not args.test_only else SubsetSampler(valid_idx)
|
|
||||||
test_sampler = SubsetSampler(test_idx)
|
|
||||||
|
|
||||||
train_loader = torch.utils.data.DataLoader(
|
|
||||||
train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=train_sampler,
|
|
||||||
num_workers=num_workers, pin_memory=pin_memory,
|
|
||||||
)
|
|
||||||
valid_loader = torch.utils.data.DataLoader(
|
|
||||||
train_dataset, batch_size=args.batch_size if not args.test_only else 1, sampler=valid_sampler,
|
|
||||||
num_workers=num_workers, pin_memory=pin_memory,
|
|
||||||
)
|
|
||||||
test_loader = torch.utils.data.DataLoader(
|
|
||||||
test_dataset, batch_size=1, sampler=test_sampler,
|
|
||||||
num_workers=num_workers, pin_memory=pin_memory,
|
|
||||||
)
|
|
||||||
|
|
||||||
imgs = np.asarray(train_dataset.imgs)
|
|
||||||
|
|
||||||
# print('Train')
|
|
||||||
# print(imgs[train_idx])
|
|
||||||
#print('Valid')
|
|
||||||
#print(imgs[valid_idx])
|
|
||||||
|
|
||||||
return (train_loader, valid_loader, test_loader)
|
|
||||||
|
|
||||||
def main(args):
|
|
||||||
print(args)
|
|
||||||
|
|
||||||
device = torch.device(args.device)
|
|
||||||
|
|
||||||
torch.backends.cudnn.benchmark = True
|
|
||||||
|
|
||||||
#augment = True if not args.test_only else False
|
|
||||||
augment = False
|
|
||||||
|
|
||||||
data_loader, dl_val, data_loader_test = get_train_valid_loader(args=args, pin_memory=True, augment=augment,
|
|
||||||
num_workers=args.workers, train_size=0.99, test_size=0.2, random_seed=999)
|
|
||||||
|
|
||||||
print("Creating model")
|
|
||||||
model = torchvision.models.__dict__[args.model](pretrained=True)
|
|
||||||
flat = list(model.children())
|
|
||||||
|
|
||||||
body, head = nn.Sequential(*flat[:-2]), nn.Sequential(flat[-2], Lambda(func=lambda x: torch.flatten(x, 1)), nn.Linear(flat[-1].in_features, len(data_loader.dataset.classes)))
|
|
||||||
model = nn.Sequential(body, head)
|
|
||||||
|
|
||||||
# model.fc = nn.Linear(model.fc.in_features, 2)
|
|
||||||
# import ipdb; ipdb.set_trace()
|
|
||||||
|
|
||||||
criterion = nn.CrossEntropyLoss().to(device)
|
|
||||||
|
|
||||||
# optimizer = torch.optim.SGD(
|
|
||||||
# model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
|
|
||||||
'''
|
|
||||||
optimizer = torch.optim.Adam(
|
|
||||||
model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
|
|
||||||
|
|
||||||
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
|
|
||||||
optimizer,
|
|
||||||
lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9)
|
|
||||||
'''
|
|
||||||
es = utils.EarlyStopping()
|
|
||||||
|
|
||||||
if args.test_only:
|
|
||||||
model.load_state_dict(torch.load('checkpoint.pt', map_location=lambda storage, loc: storage))
|
|
||||||
model = model.to(device)
|
|
||||||
print('TEST')
|
|
||||||
_, missed, _ = evaluate(model, criterion, data_loader_test, device=device)
|
|
||||||
print(missed)
|
|
||||||
print('TRAIN')
|
|
||||||
_, missed, _ = evaluate(model, criterion, data_loader, device=device)
|
|
||||||
print(missed)
|
|
||||||
return
|
|
||||||
|
|
||||||
model = model.to(device)
|
|
||||||
|
|
||||||
print("Start training")
|
|
||||||
start_time = time.time()
|
|
||||||
mb = master_bar(range(args.epochs))
|
|
||||||
"""
|
|
||||||
for epoch in mb:
|
|
||||||
_, train_confmat = train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, mb)
|
|
||||||
lr_scheduler.step( (epoch+1)*len(data_loader) )
|
|
||||||
val_loss, _, valid_confmat = evaluate(model, criterion, data_loader_test, device=device)
|
|
||||||
es(val_loss, model)
|
|
||||||
|
|
||||||
# print('Valid Missed')
|
|
||||||
# print(valid_missed)
|
|
||||||
|
|
||||||
|
|
||||||
# print('Train')
|
|
||||||
# print(train_confmat)
|
|
||||||
print('Valid')
|
|
||||||
print(valid_confmat)
|
|
||||||
|
|
||||||
# if es.early_stop:
|
|
||||||
# break
|
|
||||||
|
|
||||||
total_time = time.time() - start_time
|
|
||||||
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
|
||||||
print('Training time {}'.format(total_time_str))
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
#######
|
|
||||||
|
|
||||||
inner_it = args.inner_it
|
|
||||||
dataug_epoch_start=0
|
|
||||||
print_freq=1
|
|
||||||
KLdiv=False
|
|
||||||
|
|
||||||
tf_dict = {k: TF.TF_dict[k] for k in tf_names}
|
|
||||||
model = Augmented_model(Data_augV5(TF_dict=tf_dict, N_TF=3, mix_dist=0.0, fixed_prob=False, fixed_mag=False, shared_mag=False), model).to(device)
|
|
||||||
#model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
|
|
||||||
|
|
||||||
val_loss=torch.tensor(0) #Necessaire si pas de metastep sur une epoch
|
|
||||||
dl_val_it = iter(dl_val)
|
|
||||||
countcopy=0
|
|
||||||
|
|
||||||
#if inner_it!=0:
|
|
||||||
meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=args.lr) #lr=1e-2
|
|
||||||
#inner_opt = torch.optim.SGD(model['model'].parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #lr=1e-2 / momentum=0.9
|
|
||||||
inner_opt = torch.optim.Adam(model['model'].parameters(), lr=args.lr, weight_decay=args.weight_decay)
|
|
||||||
|
|
||||||
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
|
|
||||||
inner_opt,
|
|
||||||
lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9)
|
|
||||||
|
|
||||||
high_grad_track = True
|
|
||||||
if inner_it == 0:
|
|
||||||
high_grad_track=False
|
|
||||||
|
|
||||||
model.train()
|
|
||||||
model.augment(mode=False)
|
|
||||||
|
|
||||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
|
||||||
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel,track_higher_grads=high_grad_track)
|
|
||||||
|
|
||||||
i=0
|
|
||||||
|
|
||||||
for epoch in mb:
|
|
||||||
|
|
||||||
metric_logger = utils.MetricLogger(delimiter=" ")
|
|
||||||
confmat = utils.ConfusionMatrix(num_classes=len(data_loader.dataset.classes))
|
|
||||||
header = 'Epoch: {}'.format(epoch)
|
|
||||||
|
|
||||||
t0 = time.process_time()
|
|
||||||
for _, (image, target) in metric_logger.log_every(data_loader, header=header, parent=mb):
|
|
||||||
#for i, (xs, ys) in enumerate(dl_train):
|
|
||||||
#print_torch_mem("it"+str(i))
|
|
||||||
i+=1
|
|
||||||
image, target = image.to(device), target.to(device)
|
|
||||||
|
|
||||||
if(not KLdiv):
|
|
||||||
#Methode uniforme
|
|
||||||
logits = fmodel(image) # modified `params` can also be passed as a kwarg
|
|
||||||
output = F.log_softmax(logits, dim=1)
|
|
||||||
loss = F.cross_entropy(output, target, reduction='none') # no need to call loss.backwards()
|
|
||||||
|
|
||||||
if fmodel._data_augmentation: #Weight loss
|
|
||||||
w_loss = fmodel['data_aug'].loss_weight()#.to(device)
|
|
||||||
loss = loss * w_loss
|
|
||||||
loss = loss.mean()
|
|
||||||
|
|
||||||
else:
|
|
||||||
#Methode KL div
|
|
||||||
fmodel.augment(mode=False)
|
|
||||||
sup_logits = fmodel(xs)
|
|
||||||
log_sup=F.log_softmax(sup_logits, dim=1)
|
|
||||||
fmodel.augment(mode=True)
|
|
||||||
loss = F.cross_entropy(log_sup, ys)
|
|
||||||
|
|
||||||
if fmodel._data_augmentation:
|
|
||||||
aug_logits = fmodel(xs)
|
|
||||||
log_aug=F.log_softmax(aug_logits, dim=1)
|
|
||||||
aug_loss=0
|
|
||||||
if epoch>50: #debut differe ?
|
|
||||||
#KL div w/ logits - Similarite predictions (distributions)
|
|
||||||
aug_loss = F.softmax(sup_logits, dim=1)*(log_sup-log_aug)
|
|
||||||
aug_loss=aug_loss.sum(dim=-1)
|
|
||||||
#aug_loss = F.kl_div(aug_logits, sup_logits, reduction='none')
|
|
||||||
w_loss = fmodel['data_aug'].loss_weight() #Weight loss
|
|
||||||
aug_loss = (w_loss * aug_loss).mean()
|
|
||||||
|
|
||||||
aug_loss += (F.cross_entropy(log_aug, ys , reduction='none') * w_loss).mean()
|
|
||||||
#print(aug_loss)
|
|
||||||
unsupp_coeff = 1
|
|
||||||
loss += aug_loss * unsupp_coeff
|
|
||||||
|
|
||||||
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
|
|
||||||
|
|
||||||
if(high_grad_track and i%inner_it==0): #Perform Meta step
|
|
||||||
#print("meta")
|
|
||||||
#Peu utile si high_grad_track = False
|
|
||||||
val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val) + fmodel['data_aug'].reg_loss()
|
|
||||||
#print_graph(val_loss)
|
|
||||||
|
|
||||||
val_loss.backward()
|
|
||||||
|
|
||||||
countcopy+=1
|
|
||||||
model_copy(src=fmodel, dst=model)
|
|
||||||
optim_copy(dopt=diffopt, opt=inner_opt)
|
|
||||||
|
|
||||||
#if epoch>50:
|
|
||||||
meta_opt.step()
|
|
||||||
model['data_aug'].adjust_param(soft=False) #Contrainte sum(proba)=1
|
|
||||||
#model['data_aug'].next_TF_set()
|
|
||||||
|
|
||||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
|
||||||
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
|
|
||||||
|
|
||||||
|
|
||||||
acc1 = utils.accuracy(output, target)[0]
|
|
||||||
batch_size = image.shape[0]
|
|
||||||
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
|
|
||||||
metric_logger.update(loss=loss.item())
|
|
||||||
|
|
||||||
confmat.update(target.flatten(), output.argmax(1).flatten())
|
|
||||||
|
|
||||||
if(not high_grad_track and (torch.cuda.memory_cached()/1024.0**2)>20000):
|
|
||||||
countcopy+=1
|
|
||||||
print_torch_mem("copy")
|
|
||||||
model_copy(src=fmodel, dst=model)
|
|
||||||
optim_copy(dopt=diffopt, opt=inner_opt)
|
|
||||||
val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val)
|
|
||||||
|
|
||||||
#Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False)
|
|
||||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
|
||||||
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
|
|
||||||
print_torch_mem("copy")
|
|
||||||
|
|
||||||
if(not high_grad_track):
|
|
||||||
countcopy+=1
|
|
||||||
print_torch_mem("end copy")
|
|
||||||
model_copy(src=fmodel, dst=model)
|
|
||||||
optim_copy(dopt=diffopt, opt=inner_opt)
|
|
||||||
val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val)
|
|
||||||
|
|
||||||
#Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False)
|
|
||||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
|
||||||
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
|
|
||||||
print_torch_mem("end copy")
|
|
||||||
|
|
||||||
|
|
||||||
tf = time.process_time()
|
|
||||||
|
|
||||||
|
|
||||||
#### Print ####
|
|
||||||
if(print_freq and epoch%print_freq==0):
|
|
||||||
print('-'*9)
|
|
||||||
print('Epoch : %d'%(epoch))
|
|
||||||
print('Time : %.00f'%(tf - t0))
|
|
||||||
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
|
|
||||||
print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
|
|
||||||
print('TF Proba :', model['data_aug']['prob'].data)
|
|
||||||
#print('proba grad',model['data_aug']['prob'].grad)
|
|
||||||
print('TF Mag :', model['data_aug']['mag'].data)
|
|
||||||
#print('Mag grad',model['data_aug']['mag'].grad)
|
|
||||||
#print('Reg loss:', model['data_aug'].reg_loss().item())
|
|
||||||
#print('Aug loss', aug_loss.item())
|
|
||||||
#############
|
|
||||||
#### Log ####
|
|
||||||
#print(type(model['data_aug']) is dataug.Data_augV5)
|
|
||||||
'''
|
|
||||||
param = [{'p': p.item(), 'm':model['data_aug']['mag'].item()} for p in model['data_aug']['prob']] if model['data_aug']._shared_mag else [{'p': p.item(), 'm': m.item()} for p, m in zip(model['data_aug']['prob'], model['data_aug']['mag'])]
|
|
||||||
data={
|
|
||||||
"epoch": epoch,
|
|
||||||
"train_loss": loss.item(),
|
|
||||||
"val_loss": val_loss.item(),
|
|
||||||
"acc": accuracy,
|
|
||||||
"time": tf - t0,
|
|
||||||
|
|
||||||
"param": param #if isinstance(model['data_aug'], Data_augV5)
|
|
||||||
#else [p.item() for p in model['data_aug']['prob']],
|
|
||||||
}
|
|
||||||
log.append(data)
|
|
||||||
'''
|
|
||||||
#############
|
|
||||||
|
|
||||||
train_confmat=confmat
|
|
||||||
lr_scheduler.step( (epoch+1)*len(data_loader) )
|
|
||||||
|
|
||||||
test_loss, _, test_confmat = evaluate(model, criterion, data_loader_test, device=device)
|
|
||||||
es(test_loss, model)
|
|
||||||
|
|
||||||
# print('Valid Missed')
|
|
||||||
# print(valid_missed)
|
|
||||||
|
|
||||||
|
|
||||||
# print('Train')
|
|
||||||
# print(train_confmat)
|
|
||||||
print('Test')
|
|
||||||
print(test_confmat)
|
|
||||||
|
|
||||||
# if es.early_stop:
|
|
||||||
# break
|
|
||||||
|
|
||||||
total_time = time.time() - start_time
|
|
||||||
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
|
||||||
print('Training time {}'.format(total_time_str))
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
import argparse
|
|
||||||
parser = argparse.ArgumentParser(description='PyTorch Classification Training')
|
|
||||||
|
|
||||||
parser.add_argument('--data-path', default='/github/smart_augmentation/salvador/data', help='dataset')
|
|
||||||
parser.add_argument('--model', default='resnet18', help='model') #'resnet18'
|
|
||||||
parser.add_argument('--device', default='cuda:0', help='device')
|
|
||||||
parser.add_argument('-b', '--batch-size', default=8, type=int)
|
|
||||||
parser.add_argument('--epochs', default=3, type=int, metavar='N',
|
|
||||||
help='number of total epochs to run')
|
|
||||||
parser.add_argument('-j', '--workers', default=0, type=int, metavar='N',
|
|
||||||
help='number of data loading workers (default: 16)')
|
|
||||||
parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate')
|
|
||||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
|
||||||
help='momentum')
|
|
||||||
parser.add_argument('--wd', '--weight-decay', default=4e-5, type=float,
|
|
||||||
metavar='W', help='weight decay (default: 1e-4)',
|
|
||||||
dest='weight_decay')
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--test-only",
|
|
||||||
dest="test_only",
|
|
||||||
help="Only test the model",
|
|
||||||
action="store_true",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument('--in_it', '--inner_it', default=0, type=int,
|
|
||||||
metavar='N', help='higher inner_it',
|
|
||||||
dest='inner_it')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
return args
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
args = parse_args()
|
|
||||||
main(args)
|
|
|
@ -1,346 +0,0 @@
|
||||||
import torch
|
|
||||||
import kornia
|
|
||||||
import random
|
|
||||||
|
|
||||||
### Available TF for Dataug ###
|
|
||||||
'''
|
|
||||||
TF_dict={ #Dataugv4
|
|
||||||
## Geometric TF ##
|
|
||||||
'Identity' : (lambda x, mag: x),
|
|
||||||
'FlipUD' : (lambda x, mag: flipUD(x)),
|
|
||||||
'FlipLR' : (lambda x, mag: flipLR(x)),
|
|
||||||
'Rotate': (lambda x, mag: rotate(x, angle=torch.tensor([rand_int(mag, maxval=30)for _ in x], device=x.device))),
|
|
||||||
'TranslateX': (lambda x, mag: translate(x, translation=torch.tensor([[rand_int(mag, maxval=20), 0] for _ in x], device=x.device))),
|
|
||||||
'TranslateY': (lambda x, mag: translate(x, translation=torch.tensor([[0, rand_int(mag, maxval=20)] for _ in x], device=x.device))),
|
|
||||||
'ShearX': (lambda x, mag: shear(x, shear=torch.tensor([[rand_float(mag, maxval=0.3), 0] for _ in x], device=x.device))),
|
|
||||||
'ShearY': (lambda x, mag: shear(x, shear=torch.tensor([[0, rand_float(mag, maxval=0.3)] for _ in x], device=x.device))),
|
|
||||||
|
|
||||||
## Color TF (Expect image in the range of [0, 1]) ##
|
|
||||||
'Contrast': (lambda x, mag: contrast(x, contrast_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
|
|
||||||
'Color':(lambda x, mag: color(x, color_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
|
|
||||||
'Brightness':(lambda x, mag: brightness(x, brightness_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
|
|
||||||
'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=torch.tensor([rand_float(mag, minval=0.1, maxval=1.9) for _ in x], device=x.device))),
|
|
||||||
'Posterize': (lambda x, mag: posterize(x, bits=torch.tensor([rand_int(mag, minval=4, maxval=8) for _ in x], device=x.device))),
|
|
||||||
'Solarize': (lambda x, mag: solarize(x, thresholds=torch.tensor([rand_int(mag,minval=1, maxval=256)/256. for _ in x], device=x.device))) , #=>Image entre [0,1] #Pas opti pour des batch
|
|
||||||
|
|
||||||
#Non fonctionnel
|
|
||||||
#'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
|
|
||||||
#'Equalize': (lambda mag: None),
|
|
||||||
}
|
|
||||||
'''
|
|
||||||
'''
|
|
||||||
TF_dict={ #Dataugv5 #AutoAugment
|
|
||||||
## Geometric TF ##
|
|
||||||
'Identity' : (lambda x, mag: x),
|
|
||||||
'FlipUD' : (lambda x, mag: flipUD(x)),
|
|
||||||
'FlipLR' : (lambda x, mag: flipLR(x)),
|
|
||||||
'Rotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30))),
|
|
||||||
'TranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=0))),
|
|
||||||
'TranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=1))),
|
|
||||||
'ShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=0))),
|
|
||||||
'ShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=1))),
|
|
||||||
|
|
||||||
## Color TF (Expect image in the range of [0, 1]) ##
|
|
||||||
'Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
|
|
||||||
'Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
|
|
||||||
'Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
|
|
||||||
'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
|
|
||||||
'Posterize': (lambda x, mag: posterize(x, bits=rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
|
|
||||||
'Solarize': (lambda x, mag: solarize(x, thresholds=rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
|
|
||||||
|
|
||||||
#Non fonctionnel
|
|
||||||
#'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
|
|
||||||
#'Equalize': (lambda mag: None),
|
|
||||||
}
|
|
||||||
'''
|
|
||||||
TF_dict={ #Dataugv5
|
|
||||||
## Geometric TF ##
|
|
||||||
'Identity' : (lambda x, mag: x),
|
|
||||||
'FlipUD' : (lambda x, mag: flipUD(x)),
|
|
||||||
'FlipLR' : (lambda x, mag: flipLR(x)),
|
|
||||||
'Rotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30))),
|
|
||||||
'TranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=0))),
|
|
||||||
'TranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=1))),
|
|
||||||
'ShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=0))),
|
|
||||||
'ShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=1))),
|
|
||||||
|
|
||||||
## Color TF (Expect image in the range of [0, 1]) ##
|
|
||||||
'Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
|
|
||||||
'Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
|
|
||||||
'Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
|
|
||||||
'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
|
|
||||||
'Posterize': (lambda x, mag: posterize(x, bits=rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
|
|
||||||
'Solarize': (lambda x, mag: solarize(x, thresholds=rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
|
|
||||||
|
|
||||||
#Color TF (Common mag scale)
|
|
||||||
'+Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
|
|
||||||
'+Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
|
|
||||||
'+Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
|
|
||||||
'+Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
|
|
||||||
'-Contrast': (lambda x, mag: contrast(x, contrast_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
|
|
||||||
'-Color':(lambda x, mag: color(x, color_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
|
|
||||||
'-Brightness':(lambda x, mag: brightness(x, brightness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
|
|
||||||
'-Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
|
|
||||||
'=Posterize': (lambda x, mag: posterize(x, bits=invScale_rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
|
|
||||||
'=Solarize': (lambda x, mag: solarize(x, thresholds=invScale_rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
|
|
||||||
|
|
||||||
|
|
||||||
'BRotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30*3))),
|
|
||||||
'BTranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20*3), zero_pos=0))),
|
|
||||||
'BTranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20*3), zero_pos=1))),
|
|
||||||
'BShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3*3), zero_pos=0))),
|
|
||||||
'BShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3*3), zero_pos=1))),
|
|
||||||
|
|
||||||
'BadTranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=20*2, maxval=20*3), zero_pos=0))),
|
|
||||||
'BadTranslateX_neg': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-20*3, maxval=-20*2), zero_pos=0))),
|
|
||||||
'BadTranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=20*2, maxval=20*3), zero_pos=1))),
|
|
||||||
'BadTranslateY_neg': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-20*3, maxval=-20*2), zero_pos=1))),
|
|
||||||
|
|
||||||
'BadColor':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
|
|
||||||
'BadSharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
|
|
||||||
'BadContrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
|
|
||||||
'BadBrightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*2))),
|
|
||||||
|
|
||||||
#Non fonctionnel
|
|
||||||
#'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
|
|
||||||
#'Equalize': (lambda mag: None),
|
|
||||||
}
|
|
||||||
|
|
||||||
TF_no_mag={'Identity', 'FlipUD', 'FlipLR'}
|
|
||||||
TF_ignore_mag= TF_no_mag | {'Solarize', 'Posterize'}
|
|
||||||
|
|
||||||
def int_image(float_image): #ATTENTION : legere perte d'info (granularite : 1/256 = 0.0039)
|
|
||||||
return (float_image*255.).type(torch.uint8)
|
|
||||||
|
|
||||||
def float_image(int_image):
|
|
||||||
return int_image.type(torch.float)/255.
|
|
||||||
|
|
||||||
#def rand_inverse(value):
|
|
||||||
# return value if random.random() < 0.5 else -value
|
|
||||||
|
|
||||||
#def rand_int(mag, maxval, minval=None): #[(-maxval,minval), maxval]
|
|
||||||
# real_max = int_parameter(mag, maxval=maxval)
|
|
||||||
# if not minval : minval = -real_max
|
|
||||||
# return random.randint(minval, real_max)
|
|
||||||
|
|
||||||
#def rand_float(mag, maxval, minval=None): #[(-maxval,minval), maxval]
|
|
||||||
# real_max = float_parameter(mag, maxval=maxval)
|
|
||||||
# if not minval : minval = -real_max
|
|
||||||
# return random.uniform(minval, real_max)
|
|
||||||
|
|
||||||
def rand_floats(size, mag, maxval, minval=None): #[(-maxval,minval), maxval]
|
|
||||||
real_mag = float_parameter(mag, maxval=maxval)
|
|
||||||
if not minval : minval = -real_mag
|
|
||||||
#return random.uniform(minval, real_max)
|
|
||||||
return minval + (real_mag-minval) * torch.rand(size, device=mag.device) #[min_val, real_mag]
|
|
||||||
|
|
||||||
def invScale_rand_floats(size, mag, maxval, minval):
|
|
||||||
#Mag=[0,PARAMETER_MAX] => [PARAMETER_MAX, 0] = [maxval, minval]
|
|
||||||
real_mag = float_parameter(float(PARAMETER_MAX) - mag, maxval=maxval-minval)+minval
|
|
||||||
return real_mag + (maxval-real_mag) * torch.rand(size, device=mag.device) #[real_mag, max_val]
|
|
||||||
|
|
||||||
def zero_stack(tensor, zero_pos):
|
|
||||||
if zero_pos==0:
|
|
||||||
return torch.stack((tensor, torch.zeros((tensor.shape[0],), device=tensor.device)), dim=1)
|
|
||||||
if zero_pos==1:
|
|
||||||
return torch.stack((torch.zeros((tensor.shape[0],), device=tensor.device), tensor), dim=1)
|
|
||||||
else:
|
|
||||||
raise Exception("Invalid zero_pos : ", zero_pos)
|
|
||||||
|
|
||||||
#https://github.com/tensorflow/models/blob/fc2056bce6ab17eabdc139061fef8f4f2ee763ec/research/autoaugment/augmentation_transforms.py#L137
|
|
||||||
PARAMETER_MAX = 1 # What is the max 'level' a transform could be predicted
|
|
||||||
def float_parameter(level, maxval):
|
|
||||||
"""Helper function to scale `val` between 0 and maxval .
|
|
||||||
Args:
|
|
||||||
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
|
|
||||||
maxval: Maximum value that the operation can have. This will be scaled
|
|
||||||
to level/PARAMETER_MAX.
|
|
||||||
Returns:
|
|
||||||
A float that results from scaling `maxval` according to `level`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
#return float(level) * maxval / PARAMETER_MAX
|
|
||||||
return (level * maxval / PARAMETER_MAX)#.to(torch.float)
|
|
||||||
|
|
||||||
#def int_parameter(level, maxval): #Perte de gradient
|
|
||||||
"""Helper function to scale `val` between 0 and maxval .
|
|
||||||
Args:
|
|
||||||
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
|
|
||||||
maxval: Maximum value that the operation can have. This will be scaled
|
|
||||||
to level/PARAMETER_MAX.
|
|
||||||
Returns:
|
|
||||||
An int that results from scaling `maxval` according to `level`.
|
|
||||||
"""
|
|
||||||
#return int(level * maxval / PARAMETER_MAX)
|
|
||||||
# return (level * maxval / PARAMETER_MAX)
|
|
||||||
|
|
||||||
def flipLR(x):
|
|
||||||
device = x.device
|
|
||||||
(batch_size, channels, h, w) = x.shape
|
|
||||||
|
|
||||||
M =torch.tensor( [[[-1., 0., w-1],
|
|
||||||
[ 0., 1., 0.],
|
|
||||||
[ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
|
|
||||||
|
|
||||||
# warp the original image by the found transform
|
|
||||||
return kornia.warp_perspective(x, M, dsize=(h, w))
|
|
||||||
|
|
||||||
def flipUD(x):
|
|
||||||
device = x.device
|
|
||||||
(batch_size, channels, h, w) = x.shape
|
|
||||||
|
|
||||||
M =torch.tensor( [[[ 1., 0., 0.],
|
|
||||||
[ 0., -1., h-1],
|
|
||||||
[ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
|
|
||||||
|
|
||||||
# warp the original image by the found transform
|
|
||||||
return kornia.warp_perspective(x, M, dsize=(h, w))
|
|
||||||
|
|
||||||
def rotate(x, angle):
|
|
||||||
return kornia.rotate(x, angle=angle.type(torch.float)) #Kornia ne supporte pas les int
|
|
||||||
|
|
||||||
def translate(x, translation):
|
|
||||||
#print(translation)
|
|
||||||
return kornia.translate(x, translation=translation.type(torch.float)) #Kornia ne supporte pas les int
|
|
||||||
|
|
||||||
def shear(x, shear):
|
|
||||||
return kornia.shear(x, shear=shear)
|
|
||||||
|
|
||||||
def contrast(x, contrast_factor):
|
|
||||||
return kornia.adjust_contrast(x, contrast_factor=contrast_factor) #Expect image in the range of [0, 1]
|
|
||||||
|
|
||||||
#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageEnhance.py
|
|
||||||
def color(x, color_factor):
|
|
||||||
(batch_size, channels, h, w) = x.shape
|
|
||||||
|
|
||||||
gray_x = kornia.rgb_to_grayscale(x)
|
|
||||||
gray_x = gray_x.repeat_interleave(channels, dim=1)
|
|
||||||
return blend(gray_x, x, color_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
|
|
||||||
|
|
||||||
def brightness(x, brightness_factor):
|
|
||||||
device = x.device
|
|
||||||
|
|
||||||
return blend(torch.zeros(x.size(), device=device), x, brightness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
|
|
||||||
|
|
||||||
def sharpeness(x, sharpness_factor):
|
|
||||||
device = x.device
|
|
||||||
(batch_size, channels, h, w) = x.shape
|
|
||||||
|
|
||||||
k = torch.tensor([[[ 1., 1., 1.],
|
|
||||||
[ 1., 5., 1.],
|
|
||||||
[ 1., 1., 1.]]], device=device) #Smooth Filter : https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageFilter.py
|
|
||||||
smooth_x = kornia.filter2D(x, kernel=k, border_type='reflect', normalized=True) #Peut etre necessaire de s'occuper du channel Alhpa differement
|
|
||||||
|
|
||||||
return blend(smooth_x, x, sharpness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
|
|
||||||
|
|
||||||
#https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py
|
|
||||||
def posterize(x, bits):
|
|
||||||
bits = bits.type(torch.uint8) #Perte du gradient
|
|
||||||
x = int_image(x) #Expect image in the range of [0, 1]
|
|
||||||
|
|
||||||
mask = ~(2 ** (8 - bits) - 1).type(torch.uint8)
|
|
||||||
|
|
||||||
(batch_size, channels, h, w) = x.shape
|
|
||||||
mask = mask.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
|
|
||||||
|
|
||||||
return float_image(x & mask)
|
|
||||||
|
|
||||||
def auto_contrast(x): #PAS OPTIMISE POUR DES BATCH #EXTRA LENT
|
|
||||||
# Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
|
|
||||||
print("Warning : Pas encore check !")
|
|
||||||
(batch_size, channels, h, w) = x.shape
|
|
||||||
x = int_image(x) #Expect image in the range of [0, 1]
|
|
||||||
#print('Start',x[0])
|
|
||||||
for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
|
|
||||||
#print(img.shape)
|
|
||||||
for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
|
|
||||||
#print(chan.shape)
|
|
||||||
hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
|
|
||||||
|
|
||||||
# find lowest/highest samples after preprocessing
|
|
||||||
for lo in range(256):
|
|
||||||
if hist[lo]:
|
|
||||||
break
|
|
||||||
for hi in range(255, -1, -1):
|
|
||||||
if hist[hi]:
|
|
||||||
break
|
|
||||||
if hi <= lo:
|
|
||||||
# don't bother
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
scale = 255.0 / (hi - lo)
|
|
||||||
offset = -lo * scale
|
|
||||||
for ix in range(256):
|
|
||||||
n_ix = int(ix * scale + offset)
|
|
||||||
if n_ix < 0: n_ix = 0
|
|
||||||
elif n_ix > 255: n_ix = 255
|
|
||||||
|
|
||||||
chan[chan==ix]=n_ix
|
|
||||||
x[im_idx, chan_idx]=chan
|
|
||||||
|
|
||||||
#print('End',x[0])
|
|
||||||
return float_image(x)
|
|
||||||
|
|
||||||
def equalize(x): #PAS OPTIMISE POUR DES BATCH
|
|
||||||
raise Exception(self, "not implemented")
|
|
||||||
# Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
|
|
||||||
(batch_size, channels, h, w) = x.shape
|
|
||||||
x = int_image(x) #Expect image in the range of [0, 1]
|
|
||||||
#print('Start',x[0])
|
|
||||||
for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
|
|
||||||
#print(img.shape)
|
|
||||||
for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
|
|
||||||
#print(chan.shape)
|
|
||||||
hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
|
|
||||||
|
|
||||||
return float_image(x)
|
|
||||||
|
|
||||||
def solarize(x, thresholds):
|
|
||||||
batch_size, channels, h, w = x.shape
|
|
||||||
#imgs=[]
|
|
||||||
#for idx, t in enumerate(thresholds): #Operation par image
|
|
||||||
# mask = x[idx] > t #Perte du gradient
|
|
||||||
#In place
|
|
||||||
# inv_x = 1-x[idx][mask]
|
|
||||||
# x[idx][mask]=inv_x
|
|
||||||
#
|
|
||||||
|
|
||||||
#Out of place
|
|
||||||
# im = x[idx]
|
|
||||||
# inv_x = 1-im[mask]
|
|
||||||
|
|
||||||
# imgs.append(im.masked_scatter(mask,inv_x))
|
|
||||||
|
|
||||||
#idxs=torch.tensor(range(x.shape[0]), device=x.device)
|
|
||||||
#idxs=idxs.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
|
|
||||||
#x=x.scatter(dim=0, index=idxs, src=torch.stack(imgs))
|
|
||||||
#
|
|
||||||
|
|
||||||
thresholds = thresholds.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
|
|
||||||
#print(thresholds.grad_fn)
|
|
||||||
x=torch.where(x>thresholds,1-x, x)
|
|
||||||
#print(mask.grad_fn)
|
|
||||||
|
|
||||||
#x=x.min(thresholds)
|
|
||||||
#inv_x = 1-x[mask]
|
|
||||||
#x=x.where(x<thresholds,1-x)
|
|
||||||
#x[mask]=inv_x
|
|
||||||
#x=x.masked_scatter(mask, inv_x)
|
|
||||||
|
|
||||||
return x
|
|
||||||
|
|
||||||
#https://github.com/python-pillow/Pillow/blob/9c78c3f97291bd681bc8637922d6a2fa9415916c/src/PIL/Image.py#L2818
|
|
||||||
def blend(x,y,alpha): #out = image1 * (1.0 - alpha) + image2 * alpha
|
|
||||||
#return kornia.add_weighted(src1=x, alpha=(1-alpha), src2=y, beta=alpha, gamma=0) #out=src1∗alpha+src2∗beta+gamma #Ne fonctionne pas pour des batch de alpha
|
|
||||||
|
|
||||||
if not isinstance(x, torch.Tensor):
|
|
||||||
raise TypeError("x should be a tensor. Got {}".format(type(x)))
|
|
||||||
|
|
||||||
if not isinstance(y, torch.Tensor):
|
|
||||||
raise TypeError("y should be a tensor. Got {}".format(type(y)))
|
|
||||||
|
|
||||||
(batch_size, channels, h, w) = x.shape
|
|
||||||
alpha = alpha.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
|
|
||||||
res = x*(1-alpha) + y*alpha
|
|
||||||
|
|
||||||
return res
|
|
|
@ -1,202 +0,0 @@
|
||||||
from __future__ import print_function
|
|
||||||
from collections import defaultdict, deque
|
|
||||||
import datetime
|
|
||||||
import math
|
|
||||||
import time
|
|
||||||
import torch
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
import os
|
|
||||||
from fastprogress import progress_bar
|
|
||||||
|
|
||||||
class SmoothedValue(object):
|
|
||||||
"""Track a series of values and provide access to smoothed values over a
|
|
||||||
window or the global series average.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, window_size=20, fmt=None):
|
|
||||||
if fmt is None:
|
|
||||||
fmt = "{global_avg:.4f}"
|
|
||||||
self.deque = deque(maxlen=window_size)
|
|
||||||
self.total = 0.0
|
|
||||||
self.count = 0
|
|
||||||
self.fmt = fmt
|
|
||||||
|
|
||||||
def update(self, value, n=1):
|
|
||||||
self.deque.append(value)
|
|
||||||
self.count += n
|
|
||||||
self.total += value * n
|
|
||||||
|
|
||||||
@property
|
|
||||||
def median(self):
|
|
||||||
d = torch.tensor(list(self.deque))
|
|
||||||
return d.median().item()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def avg(self):
|
|
||||||
d = torch.tensor(list(self.deque), dtype=torch.float32)
|
|
||||||
return d.mean().item()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def global_avg(self):
|
|
||||||
return self.total / self.count
|
|
||||||
|
|
||||||
@property
|
|
||||||
def max(self):
|
|
||||||
return max(self.deque)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def value(self):
|
|
||||||
return self.deque[-1]
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self.fmt.format(
|
|
||||||
median=self.median,
|
|
||||||
avg=self.avg,
|
|
||||||
global_avg=self.global_avg,
|
|
||||||
max=self.max,
|
|
||||||
value=self.value)
|
|
||||||
|
|
||||||
|
|
||||||
class ConfusionMatrix(object):
|
|
||||||
def __init__(self, num_classes):
|
|
||||||
self.num_classes = num_classes
|
|
||||||
self.mat = None
|
|
||||||
|
|
||||||
def update(self, a, b):
|
|
||||||
n = self.num_classes
|
|
||||||
if self.mat is None:
|
|
||||||
self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)
|
|
||||||
with torch.no_grad():
|
|
||||||
k = (a >= 0) & (a < n)
|
|
||||||
inds = n * a[k].to(torch.int64) + b[k]
|
|
||||||
self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.mat.zero_()
|
|
||||||
|
|
||||||
def compute(self):
|
|
||||||
h = self.mat.float()
|
|
||||||
acc_global = torch.diag(h).sum() / h.sum()
|
|
||||||
acc = torch.diag(h) / h.sum(1)
|
|
||||||
return acc_global, acc
|
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
acc_global, acc = self.compute()
|
|
||||||
return (
|
|
||||||
'global correct: {:.1f}\n'
|
|
||||||
'average row correct: {}').format(
|
|
||||||
acc_global.item() * 100,
|
|
||||||
['{:.1f}'.format(i) for i in (acc * 100).tolist()])
|
|
||||||
|
|
||||||
|
|
||||||
class MetricLogger(object):
|
|
||||||
def __init__(self, delimiter="\t"):
|
|
||||||
self.meters = defaultdict(SmoothedValue)
|
|
||||||
self.delimiter = delimiter
|
|
||||||
|
|
||||||
def update(self, **kwargs):
|
|
||||||
for k, v in kwargs.items():
|
|
||||||
if isinstance(v, torch.Tensor):
|
|
||||||
v = v.item()
|
|
||||||
assert isinstance(v, (float, int))
|
|
||||||
self.meters[k].update(v)
|
|
||||||
|
|
||||||
def __getattr__(self, attr):
|
|
||||||
if attr in self.meters:
|
|
||||||
return self.meters[attr]
|
|
||||||
if attr in self.__dict__:
|
|
||||||
return self.__dict__[attr]
|
|
||||||
raise AttributeError("'{}' object has no attribute '{}'".format(
|
|
||||||
type(self).__name__, attr))
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
loss_str = []
|
|
||||||
for name, meter in self.meters.items():
|
|
||||||
loss_str.append(
|
|
||||||
"{}: {}".format(name, str(meter))
|
|
||||||
)
|
|
||||||
return self.delimiter.join(loss_str)
|
|
||||||
|
|
||||||
|
|
||||||
def add_meter(self, name, meter):
|
|
||||||
self.meters[name] = meter
|
|
||||||
|
|
||||||
def log_every(self, iterable, parent, header=None, **kwargs):
|
|
||||||
if not header:
|
|
||||||
header = ''
|
|
||||||
log_msg = self.delimiter.join([
|
|
||||||
'{meters}'
|
|
||||||
])
|
|
||||||
|
|
||||||
progrss = progress_bar(iterable, parent=parent, **kwargs)
|
|
||||||
|
|
||||||
for idx, obj in enumerate(progrss):
|
|
||||||
yield idx, obj
|
|
||||||
progrss.comment = log_msg.format(
|
|
||||||
meters=str(self))
|
|
||||||
|
|
||||||
print('{header} {meters}'.format(header=header, meters=str(self)))
|
|
||||||
|
|
||||||
def accuracy(output, target, topk=(1,)):
|
|
||||||
"""Computes the accuracy over the k top predictions for the specified values of k"""
|
|
||||||
with torch.no_grad():
|
|
||||||
maxk = max(topk)
|
|
||||||
batch_size = target.size(0)
|
|
||||||
|
|
||||||
_, pred = output.topk(maxk, 1, True, True)
|
|
||||||
pred = pred.t()
|
|
||||||
correct = pred.eq(target[None])
|
|
||||||
|
|
||||||
res = []
|
|
||||||
for k in topk:
|
|
||||||
correct_k = correct[:k].flatten().sum(dtype=torch.float32)
|
|
||||||
res.append(correct_k * (100.0 / batch_size))
|
|
||||||
return res
|
|
||||||
|
|
||||||
class EarlyStopping:
|
|
||||||
"""Early stops the training if validation loss doesn't improve after a given patience."""
|
|
||||||
def __init__(self, patience=7, verbose=False, delta=0, augmented_model=False):
|
|
||||||
"""
|
|
||||||
Args:
|
|
||||||
patience (int): How long to wait after last time validation loss improved.
|
|
||||||
Default: 7
|
|
||||||
verbose (bool): If True, prints a message for each validation loss improvement.
|
|
||||||
Default: False
|
|
||||||
delta (float): Minimum change in the monitored quantity to qualify as an improvement.
|
|
||||||
Default: 0
|
|
||||||
"""
|
|
||||||
self.patience = patience
|
|
||||||
self.verbose = verbose
|
|
||||||
self.counter = 0
|
|
||||||
self.best_score = None
|
|
||||||
self.early_stop = False
|
|
||||||
self.val_loss_min = np.Inf
|
|
||||||
self.delta = delta
|
|
||||||
|
|
||||||
self.augmented_model = augmented_model
|
|
||||||
|
|
||||||
def __call__(self, val_loss, model):
|
|
||||||
|
|
||||||
score = -val_loss
|
|
||||||
|
|
||||||
if self.best_score is None:
|
|
||||||
self.best_score = score
|
|
||||||
self.save_checkpoint(val_loss, model)
|
|
||||||
elif score < self.best_score - self.delta:
|
|
||||||
self.counter += 1
|
|
||||||
# print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
|
|
||||||
# if self.counter >= self.patience:
|
|
||||||
# self.early_stop = True
|
|
||||||
else:
|
|
||||||
self.best_score = score
|
|
||||||
self.save_checkpoint(val_loss, model)
|
|
||||||
self.counter = 0
|
|
||||||
|
|
||||||
def save_checkpoint(self, val_loss, model):
|
|
||||||
'''Saves model when validation loss decrease.'''
|
|
||||||
if self.verbose:
|
|
||||||
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
|
|
||||||
torch.save(model.state_dict(), 'checkpoint.pt') if not self.augmented_model else torch.save(model['model'].state_dict(), 'checkpoint.pt')
|
|
||||||
self.val_loss_min = val_loss
|
|
Loading…
Add table
Add a link
Reference in a new issue