Rangement

This commit is contained in:
Harle, Antoine (Contracteur) 2020-01-24 14:32:37 -05:00
parent f83c73ec17
commit f507ff4741
16 changed files with 85 additions and 46 deletions

95
higher/smart_aug/compare_res.py Executable file
View file

@ -0,0 +1,95 @@
from utils import *
if __name__ == "__main__":
#'''
files=[
"res/log/Aug_mod(Data_augV5(Mix0.8-23TFx4-Mag)-LeNet)-100 epochs (dataug:0)- 1 in_it.json",
#"res/brutus-tests/log/Aug_mod(Data_augV5(Uniform-14TFx3-MagFxSh)-LeNet)-150epochs(dataug:0)-10in_it-0.json",
#"res/brutus-tests/log/Aug_mod(Data_augV5(Uniform-14TFx3-MagFxSh)-LeNet)-150epochs(dataug:0)-10in_it-1.json",
#"res/brutus-tests/log/Aug_mod(Data_augV5(Uniform-14TFx3-MagFxSh)-LeNet)-150epochs(dataug:0)-10in_it-2.json",
#"res/log/Aug_mod(RandAugUDA(18TFx2-Mag1)-LeNet)-100 epochs (dataug:0)- 0 in_it.json",
]
for idx, file in enumerate(files):
#legend+=str(idx)+'-'+file+'\n'
with open(file) as json_file:
data = json.load(json_file)
plot_resV2(data['Log'], fig_name=file.replace('.json','').replace('log/',''), param_names=data['Param_names'])
#plot_TF_influence(data['Log'], param_names=data['Param_names'])
#'''
## Loss , Acc, Proba = f(epoch) ##
#plot_compare(filenames=files, fig_name="res/compare")
'''
## Acc, Time, Epochs = f(n_tf) ##
#fig_name="res/TF_nb_tests_compare"
fig_name="res/TF_seq_tests_compare"
inner_its = [0, 10]
dataug_epoch_starts= [0]
TF_nb = 14#[len(TF.TF_dict)] #range(10,len(TF.TF_dict)+1) #[len(TF.TF_dict)]
N_seq_TF= [1, 2, 3, 4, 6] #[1]
fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
for in_it in inner_its:
for dataug in dataug_epoch_starts:
#n_tf = TF_nb
#filenames =["res/TF_nb_tests/log/Aug_mod(Data_augV4(Uniform-{} TF)-LeNet)-200 epochs (dataug:{})- {} in_it.json".format(n_tf, dataug, in_it) for n_tf in TF_nb]
#filenames =["res/TF_nb_tests/log/Aug_mod(Data_augV4(Uniform-{} TF x {})-LeNet)-200 epochs (dataug:{})- {} in_it.json".format(n_tf, 1, dataug, in_it) for n_tf in TF_nb]
n_tf = N_seq_TF
#filenames =["res/TF_nb_tests/log/Aug_mod(Data_augV4(Uniform-{} TF x {})-LeNet)-200 epochs (dataug:{})- {} in_it.json".format(TF_nb, n_tf, dataug, in_it) for n_tf in N_seq_TF]
filenames =["res/TF_nb_tests/log/Aug_mod(Data_augV4(Uniform-{} TF x {})-LeNet)-200 epochs (dataug:{})- {} in_it.json".format(TF_nb, n_tf, dataug, in_it) for n_tf in N_seq_TF]
all_data=[]
#legend=""
for idx, file in enumerate(filenames):
#legend+=str(idx)+'-'+file+'\n'
with open(file) as json_file:
data = json.load(json_file)
all_data.append(data)
acc = [x["Accuracy"] for x in all_data]
epochs = [len(x["Log"]) for x in all_data]
time = [x["Time"][0] for x in all_data]
#for i in range(len(time)): time[i] *= epochs[i] #Estimation temps total
ax[0].plot(n_tf, acc, label="{} in_it/{} dataug".format(in_it,dataug))
ax[1].plot(n_tf, time, label="{} in_it/{} dataug".format(in_it,dataug))
ax[2].plot(n_tf, epochs, label="{} in_it/{} dataug".format(in_it,dataug))
#for data in all_data:
#print(np.mean([x["param"] for x in data["Log"]], axis=0))
#print(len(data["Param_names"]), np.argsort(np.argsort(np.mean([x["param"] for x in data["Log"]], axis=0))))
ax[0].set_title('Acc')
ax[1].set_title('Time')
ax[2].set_title('Epochs')
for a in ax: a.legend()
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name, bbox_inches='tight')
plt.close()
'''
#Res print
'''
nb_run=3
accs = []
times = []
files = ["res/brutus-tests/log/Aug_mod(Data_augV5(Uniform-14TFx3-Mag)-LeNet)-150epochs(dataug:0)-1in_it-%s.json"%str(run) for run in range(nb_run)]
for idx, file in enumerate(files):
#legend+=str(idx)+'-'+file+'\n'
with open(file) as json_file:
data = json.load(json_file)
accs.append(data['Accuracy'])
times.append(data['Time'][0])
print(idx, data['Accuracy'])
print(files[0], np.mean(accs), np.std(accs), np.mean(times))
'''

51
higher/smart_aug/datasets.py Executable file
View file

@ -0,0 +1,51 @@
""" Dataset definition.
MNIST / CIFAR10
"""
import torch
from torch.utils.data import SubsetRandomSampler
import torchvision
BATCH_SIZE = 300
TEST_SIZE = 300
#TEST_SIZE = 10000 #legerement +Rapide / + Consomation memoire !
download_data=False
num_workers=2 #4
pin_memory=False #True :+ GPU memory / + Lent
#ATTENTION : Dataug (Kornia) Expect image in the range of [0, 1]
#transform_train = torchvision.transforms.Compose([
# torchvision.transforms.RandomHorizontalFlip(),
# torchvision.transforms.ToTensor(),
# torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #CIFAR10
#])
transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
#torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #CIFAR10
])
#data_train = torchvision.datasets.MNIST(
# "./data", train=True, download=True,
# transform=torchvision.transforms.Compose([
# #torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
# torchvision.transforms.ToTensor()
# ])
#)
#data_test = torchvision.datasets.MNIST(
# "./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
#)
### Classic Dataset ###
data_train = torchvision.datasets.CIFAR10("../data", train=True, download=download_data, transform=transform)
#data_val = torchvision.datasets.CIFAR10("../data", train=True, download=download_data, transform=transform)
data_test = torchvision.datasets.CIFAR10("../data", train=False, download=download_data, transform=transform)
train_subset_indices=range(int(len(data_train)/2))
val_subset_indices=range(int(len(data_train)/2),len(data_train))
#train_subset_indices=range(BATCH_SIZE*10)
#val_subset_indices=range(BATCH_SIZE*10, BATCH_SIZE*20)
dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices), num_workers=num_workers, pin_memory=pin_memory)
dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices), num_workers=num_workers, pin_memory=pin_memory)
dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)

985
higher/smart_aug/dataug.py Executable file
View file

@ -0,0 +1,985 @@
""" Data augmentation modules.
Features a custom implementaiton of RandAugment (RandAug), as well as a data augmentation modules allowing gradient propagation.
Typical usage:
aug_model = Augmented_model(Data_AugV5, model)
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import *
#import kornia
#import random
import numpy as np
import copy
import transformations as TF
class Data_augV5(nn.Module): #Optimisation jointe (mag, proba)
"""Data augmentation module with learnable parameters.
Applies transformations (TF) to batch of data.
Each TF is defined by a (name, probability of application, magnitude of distorsion) tuple which can be learned. For the full definiton of the TF, see transformations.py.
The TF probabilities defines a distribution from which we sample the TF applied.
Be warry, that the order of sequential application of TF is not taken into account. See Data_augV7.
Attributes:
_data_augmentation (bool): Wether TF will be applied during forward pass.
_TF_dict (dict) : A dictionnary containing the data transformations (TF) to be applied.
_TF (list) : List of TF names.
_nb_tf (int) : Number of TF used.
_N_seqTF (int) : Number of TF to be applied sequentially to each inputs
_shared_mag (bool) : Wether to share a single magnitude parameters for all TF.
_fixed_mag (bool): Wether to lock the TF magnitudes.
_fixed_prob (bool): Wether to lock the TF probabilies.
_samples (list): Sampled TF index during last forward pass.
_mix_dist (bool): Wether we use a mix of an uniform distribution and the real distribution (TF probabilites). If False, only a uniform distribution is used.
_fixed_mix (bool): Wether we lock the mix distribution factor.
_params (nn.ParameterDict): Learnable parameters.
_reg_tgt (Tensor): Target for the magnitude regularisation. Only used when _fixed_mag is set to false (ie. we learn the magnitudes).
_reg_mask (list): Mask selecting the TF considered for the regularisation.
"""
def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mix_dist=0.0, fixed_prob=False, fixed_mag=True, shared_mag=True):
"""Init Data_augv5.
Args:
TF_dict (dict): A dictionnary containing the data transformations (TF) to be applied. (default: use all available TF from transformations.py)
N_TF (int): Number of TF to be applied sequentially to each inputs. (default: 1)
mix_dist (float): Proportion [0.0, 1.0] of the real distribution used for sampling/selection of the TF. Distribution = (1-mix_dist)*Uniform_distribution + mix_dist*Real_distribution. If None is given, try to learn this parameter. (default: 0)
fixed_prob (bool): Wether to lock the TF probabilies. (default: False)
fixed_mag (bool): Wether to lock the TF magnitudes. (default: True)
shared_mag (bool): Wether to share a single magnitude parameters for all TF. (default: True)
"""
super(Data_augV5, self).__init__()
assert len(TF_dict)>0
assert N_TF>=0
self._data_augmentation = True
#TF
self._TF_dict = TF_dict
self._TF= list(self._TF_dict.keys())
self._nb_tf= len(self._TF)
self._N_seqTF = N_TF
#Mag
self._shared_mag = shared_mag
self._fixed_mag = fixed_mag
if not self._fixed_mag and len([tf for tf in self._TF if tf not in TF.TF_ignore_mag])==0:
print("WARNING: Mag would be fixed as current TF doesn't allow gradient propagation:",self._TF)
self._fixed_mag=True
#Distribution
self._fixed_prob=fixed_prob
self._samples = []
self._mix_dist = False
if mix_dist != 0.0: #Mix dist
self._mix_dist = True
self._fixed_mix=True
if mix_dist is None: #Learn Mix dist
self._fixed_mix = False
mix_dist=0.5
#Params
init_mag = float(TF.PARAMETER_MAX) if self._fixed_mag else float(TF.PARAMETER_MAX)/2
self._params = nn.ParameterDict({
"prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme
"mag" : nn.Parameter(torch.tensor(init_mag) if self._shared_mag
else torch.tensor(init_mag).repeat(self._nb_tf)), #[0, PARAMETER_MAX]
"mix_dist": nn.Parameter(torch.tensor(mix_dist).clamp(min=0.0,max=0.999))
})
#for tf in TF.TF_no_grad :
# if tf in self._TF: self._params['mag'].data[self._TF.index(tf)]=float(TF.PARAMETER_MAX) #TF fixe a max parameter
#for t in TF.TF_no_mag: self._params['mag'][self._TF.index(t)].data-=self._params['mag'][self._TF.index(t)].data #Mag inutile pour les TF ignore_mag
#Mag regularisation
if not self._fixed_mag:
if self._shared_mag :
self._reg_tgt = torch.tensor(TF.PARAMETER_MAX, dtype=torch.float) #Encourage amplitude max
else:
self._reg_mask=[self._TF.index(t) for t in self._TF if t not in TF.TF_ignore_mag]
self._reg_tgt=torch.full(size=(len(self._reg_mask),), fill_value=TF.PARAMETER_MAX) #Encourage amplitude max
def forward(self, x):
""" Main method of the Data augmentation module.
Args:
x (Tensor): Batch of data.
Returns:
Tensor : Batch of tranformed data.
"""
self._samples = []
if self._data_augmentation:# and TF.random.random() < 0.5:
device = x.device
batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
for _ in range(self._N_seqTF):
## Echantillonage ##
uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=1)
if not self._mix_dist:
self._distrib = uniforme_dist
else:
prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"]
mix_dist = self._params["mix_dist"].detach() if self._fixed_mix else self._params["mix_dist"]
self._distrib = (mix_dist*prob+(1-mix_dist)*uniforme_dist)#.softmax(dim=1) #Mix distrib reel / uniforme avec mix_factor
cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*self._distrib)
sample = cat_distrib.sample()
self._samples.append(sample)
## Transformations ##
x = self.apply_TF(x, sample)
return x
def apply_TF(self, x, sampled_TF):
""" Applies the sampled transformations.
Args:
x (Tensor): Batch of data.
sampled_TF (Tensor): Indexes of the TF to be applied to each element of data.
Returns:
Tensor: Batch of tranformed data.
"""
device = x.device
batch_size, channels, h, w = x.shape
smps_x=[]
for tf_idx in range(self._nb_tf):
mask = sampled_TF==tf_idx #Create selection mask
smp_x = x[mask] #torch.masked_select() ? (Necessite d'expand le mask au meme dim)
if smp_x.shape[0]!=0: #if there's data to TF
magnitude=self._params["mag"] if self._shared_mag else self._params["mag"][tf_idx]
if self._fixed_mag: magnitude=magnitude.detach() #Fmodel tente systematiquement de tracker les gradient de tout les param
tf=self._TF[tf_idx]
#In place
#x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude)
#Out of place
smp_x = self._TF_dict[tf](x=smp_x, mag=magnitude)
idx= mask.nonzero()
idx= idx.expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
x=x.scatter(dim=0, index=idx, src=smp_x)
return x
def adjust_param(self, soft=False): #Detach from gradient ?
""" Enforce limitations to the learned parameters.
Ensure that the parameters value stays in the right intevals. This should be called after each update of those parameters.
Args:
soft (bool): Wether to use a softmax function for TF probabilites. Not Recommended as it tends to lock the probabilities, preventing them to be learned. (default: False)
"""
if not self._fixed_prob:
if soft :
self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible
else:
self._params['prob'].data = self._params['prob'].data.clamp(min=1/(self._nb_tf*100),max=1.0)
self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
if not self._fixed_mag:
self._params['mag'].data = self._params['mag'].data.clamp(min=TF.PARAMETER_MIN, max=TF.PARAMETER_MAX)
if not self._fixed_mix:
self._params['mix_dist'].data = self._params['mix_dist'].data.clamp(min=0.0, max=0.999)
def loss_weight(self):
""" Weights for the loss.
Compute the weights for the loss of each inputs depending on wich TF was applied to them.
Should be applied to the loss before reduction.
Do nottake into account the order of application of the TF. See Data_augV7.
Returns:
Tensor : Loss weights.
"""
if len(self._samples)==0 : return 1 #Pas d'echantillon = pas de ponderation
prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"]
#Plusieurs TF sequentielles (Attention ne prend pas en compte ordre !)
w_loss = torch.zeros((self._samples[0].shape[0],self._nb_tf), device=self._samples[0].device)
for sample in self._samples:
tmp_w = torch.zeros(w_loss.size(),device=w_loss.device)
tmp_w.scatter_(dim=1, index=sample.view(-1,1), value=1/self._N_seqTF)
w_loss += tmp_w
w_loss = w_loss * prob/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss)
w_loss = torch.sum(w_loss,dim=1)
return w_loss
def reg_loss(self, reg_factor=0.005):
""" Regularisation term used to learn the magnitudes.
Use an L2 loss to encourage high magnitudes TF.
Args:
reg_factor (float): Factor by wich the regularisation loss is multiplied. (default: 0.005)
Returns:
Tensor containing the regularisation loss value.
"""
if self._fixed_mag:
return torch.tensor(0)
else:
#return reg_factor * F.l1_loss(self._params['mag'][self._reg_mask], target=self._reg_tgt, reduction='mean')
mags = self._params['mag'] if self._params['mag'].shape==torch.Size([]) else self._params['mag'][self._reg_mask]
max_mag_reg = reg_factor * F.mse_loss(mags, target=self._reg_tgt.to(mags.device), reduction='mean')
return max_mag_reg
def train(self, mode=True):
""" Set the module training mode.
Args:
mode (bool): Wether to learn the parameter of the module. None would not change mode. (default: None)
"""
#if mode is None :
# mode=self._data_augmentation
self.augment(mode=mode) #Inutile si mode=None
super(Data_augV5, self).train(mode)
return self
def eval(self):
""" Set the module to evaluation mode.
"""
return self.train(mode=False)
def augment(self, mode=True):
""" Set the augmentation mode.
Args:
mode (bool): Wether to perform data augmentation on the forward pass. (default: True)
"""
self._data_augmentation=mode
def __getitem__(self, key):
"""Access to the learnable parameters
Args:
key (string): Name of the learnable parameter to access.
Returns:
nn.Parameter.
"""
return self._params[key]
def __str__(self):
"""Name of the module
Returns:
String containing the name of the module as well as the higher levels parameters.
"""
dist_param=''
if self._fixed_prob: dist_param+='Fx'
mag_param='Mag'
if self._fixed_mag: mag_param+= 'Fx'
if self._shared_mag: mag_param+= 'Sh'
if not self._mix_dist:
return "Data_augV5(Uniform%s-%dTFx%d-%s)" % (dist_param, self._nb_tf, self._N_seqTF, mag_param)
elif self._fixed_mix:
return "Data_augV5(Mix%.1f%s-%dTFx%d-%s)" % (self._params['mix_dist'].item(),dist_param, self._nb_tf, self._N_seqTF, mag_param)
else:
return "Data_augV5(Mix%s-%dTFx%d-%s)" % (dist_param, self._nb_tf, self._N_seqTF, mag_param)
class Data_augV7(nn.Module): #Proba sequentielles
"""Data augmentation module with learnable parameters.
Applies transformations (TF) to batch of data.
Each TF is defined by a (name, probability of application, magnitude of distorsion) tuple which can be learned. For the full definiton of the TF, see transformations.py.
The TF probabilities defines a distribution from which we sample the TF applied.
Replace the use of TF by TF sets which are combinaisons of classic TF.
Attributes:
_data_augmentation (bool): Wether TF will be applied during forward pass.
_TF_dict (dict) : A dictionnary containing the data transformations (TF) to be applied.
_TF (list) : List of TF names.
_nb_tf (int) : Number of TF used.
_N_seqTF (int) : Number of TF to be applied sequentially to each inputs
_shared_mag (bool) : Wether to share a single magnitude parameters for all TF.
_fixed_mag (bool): Wether to lock the TF magnitudes.
_fixed_prob (bool): Wether to lock the TF probabilies.
_samples (list): Sampled TF index during last forward pass.
_mix_dist (bool): Wether we use a mix of an uniform distribution and the real distribution (TF probabilites). If False, only a uniform distribution is used.
_fixed_mix (bool): Wether we lock the mix distribution factor.
_params (nn.ParameterDict): Learnable parameters.
_reg_tgt (Tensor): Target for the magnitude regularisation. Only used when _fixed_mag is set to false (ie. we learn the magnitudes).
_reg_mask (list): Mask selecting the TF considered for the regularisation.
"""
def __init__(self, TF_dict=TF.TF_dict, N_TF=2, mix_dist=0.0, fixed_prob=False, fixed_mag=True, shared_mag=True):
"""Init Data_augv7.
Args:
TF_dict (dict): A dictionnary containing the data transformations (TF) to be applied. (default: use all available TF from transformations.py)
N_TF (int): Number of TF to be applied sequentially to each inputs. Minimum 2, otherwise prefer using Data_augV5. (default: 2)
mix_dist (float): Proportion [0.0, 1.0] of the real distribution used for sampling/selection of the TF. Distribution = (1-mix_dist)*Uniform_distribution + mix_dist*Real_distribution. If None is given, try to learn this parameter. (default: 0)
fixed_prob (bool): Wether to lock the TF probabilies. (default: False)
fixed_mag (bool): Wether to lock the TF magnitudes. (default: True)
shared_mag (bool): Wether to share a single magnitude parameters for all TF. (default: True)
"""
super(Data_augV7, self).__init__()
assert len(TF_dict)>0
assert N_TF>=0
if N_TF<2:
print("WARNING: Data_augv7 isn't designed to use less than 2 sequentials TF. Please use Data_augv5 instead.")
self._data_augmentation = True
#TF
self._TF_dict = TF_dict
self._TF= list(self._TF_dict.keys())
self._nb_tf= len(self._TF)
self._N_seqTF = N_TF
#Mag
self._shared_mag = shared_mag
self._fixed_mag = fixed_mag
if not self._fixed_mag and len([tf for tf in self._TF if tf not in TF.TF_ignore_mag])==0:
print("WARNING: Mag would be fixed as current TF doesn't allow gradient propagation:",self._TF)
self._fixed_mag=True
#Distribution
self._fixed_prob=fixed_prob
self._samples = []
self._mix_dist = False
if mix_dist != 0.0: #Mix dist
self._mix_dist = True
self._fixed_mix=True
if mix_dist is None: #Learn Mix dist
self._fixed_mix = False
mix_dist=0.5
#TF sets
#import itertools
#itertools.product(range(self._nb_tf), repeat=self._N_seqTF)
#no_consecutive={idx for idx, t in enumerate(self._TF) if t in {'FlipUD', 'FlipLR'}} #Specific No consecutive ops
no_consecutive={idx for idx, t in enumerate(self._TF) if t not in {'Identity'}} #No consecutive same ops (except Identity)
cons_test = (lambda i, idxs: i in no_consecutive and len(idxs)!=0 and i==idxs[-1]) #Exclude selected consecutive
def generate_TF_sets(n_TF, set_size, idx_prefix=[]): #Generate every arrangement (with reuse) of TF (exclude cons_test arrangement)
TF_sets=[]
if set_size>1:
for i in range(n_TF):
if not cons_test(i, idx_prefix):
TF_sets += generate_TF_sets(n_TF, set_size=set_size-1, idx_prefix=idx_prefix+[i])
else:
TF_sets+=[[idx_prefix+[i]] for i in range(n_TF) if not cons_test(i, idx_prefix)]
return TF_sets
self._TF_sets=torch.ByteTensor(generate_TF_sets(self._nb_tf, self._N_seqTF)).squeeze()
self._nb_TF_sets=len(self._TF_sets)
print("Number of TF sets:",self._nb_TF_sets)
#print(self._TF_sets)
self._prob_mem=torch.zeros(self._nb_TF_sets)
#Params
init_mag = float(TF.PARAMETER_MAX) if self._fixed_mag else float(TF.PARAMETER_MAX)/2
self._params = nn.ParameterDict({
"prob": nn.Parameter(torch.ones(self._nb_TF_sets)/self._nb_TF_sets), #Distribution prob uniforme
"mag" : nn.Parameter(torch.tensor(init_mag) if self._shared_mag
else torch.tensor(init_mag).repeat(self._nb_tf)), #[0, PARAMETER_MAX]
"mix_dist": nn.Parameter(torch.tensor(mix_dist).clamp(min=0.0,max=0.999))
})
#for tf in TF.TF_no_grad :
# if tf in self._TF: self._params['mag'].data[self._TF.index(tf)]=float(TF.PARAMETER_MAX) #TF fixe a max parameter
#for t in TF.TF_no_mag: self._params['mag'][self._TF.index(t)].data-=self._params['mag'][self._TF.index(t)].data #Mag inutile pour les TF ignore_mag
#Mag regularisation
if not self._fixed_mag:
if self._shared_mag :
self._reg_tgt = torch.FloatTensor(TF.PARAMETER_MAX) #Encourage amplitude max
else:
self._reg_mask=[idx for idx,t in enumerate(self._TF) if t not in TF.TF_ignore_mag]
self._reg_tgt=torch.full(size=(len(self._reg_mask),), fill_value=TF.PARAMETER_MAX) #Encourage amplitude max
def forward(self, x):
""" Main method of the Data augmentation module.
Args:
x (Tensor): Batch of data.
Returns:
Tensor : Batch of tranformed data.
"""
self._samples = None
if self._data_augmentation:# and TF.random.random() < 0.5:
device = x.device
batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
## Echantillonage ##
uniforme_dist = torch.ones(1,self._nb_TF_sets,device=device).softmax(dim=1)
if not self._mix_dist:
self._distrib = uniforme_dist
else:
prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"]
mix_dist = self._params["mix_dist"].detach() if self._fixed_mix else self._params["mix_dist"]
self._distrib = (mix_dist*prob+(1-mix_dist)*uniforme_dist)#.softmax(dim=1) #Mix distrib reel / uniforme avec mix_factor
cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_TF_sets), device=device)*self._distrib)
sample = cat_distrib.sample()
self._samples=sample
TF_samples=self._TF_sets[sample,:].to(device) #[Batch_size, TFseq]
for i in range(self._N_seqTF):
## Transformations ##
x = self.apply_TF(x, TF_samples[:,i])
return x
def apply_TF(self, x, sampled_TF):
""" Applies the sampled transformations.
Args:
x (Tensor): Batch of data.
sampled_TF (Tensor): Indexes of the TF to be applied to each element of data.
Returns:
Tensor: Batch of tranformed data.
"""
device = x.device
batch_size, channels, h, w = x.shape
smps_x=[]
for tf_idx in range(self._nb_tf):
mask = sampled_TF==tf_idx #Create selection mask
smp_x = x[mask] #torch.masked_select() ? (Necessite d'expand le mask au meme dim)
if smp_x.shape[0]!=0: #if there's data to TF
magnitude=self._params["mag"] if self._shared_mag else self._params["mag"][tf_idx]
if self._fixed_mag: magnitude=magnitude.detach() #Fmodel tente systematiquement de tracker les gradient de tout les param
tf=self._TF[tf_idx]
#In place
#x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude)
#Out of place
smp_x = self._TF_dict[tf](x=smp_x, mag=magnitude)
idx= mask.nonzero()
idx= idx.expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
x=x.scatter(dim=0, index=idx, src=smp_x)
return x
def adjust_param(self, soft=False): #Detach from gradient ?
""" Enforce limitations to the learned parameters.
Ensure that the parameters value stays in the right intevals. This should be called after each update of those parameters.
Args:
soft (bool): Wether to use a softmax function for TF probabilites. Not Recommended as it tends to lock the probabilities, preventing them to be learned. (default: False)
"""
if not self._fixed_prob:
if soft :
self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible
else:
self._params['prob'].data = self._params['prob'].data.clamp(min=1/(self._nb_tf*100),max=1.0)
self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
if not self._fixed_mag:
self._params['mag'].data = self._params['mag'].data.clamp(min=TF.PARAMETER_MIN, max=TF.PARAMETER_MAX)
if not self._fixed_mix:
self._params['mix_dist'].data = self._params['mix_dist'].data.clamp(min=0.0, max=0.999)
def loss_weight(self):
""" Weights for the loss.
Compute the weights for the loss of each inputs depending on wich TF was applied to them.
Should be applied to the loss before reduction.
Returns:
Tensor : Loss weights.
"""
if self._samples is None : return 1 #Pas d'echantillon = pas de ponderation
prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"]
w_loss = torch.zeros((self._samples.shape[0],self._nb_TF_sets), device=self._samples.device)
w_loss.scatter_(1, self._samples.view(-1,1), 1)
w_loss = w_loss * prob/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss)
w_loss = torch.sum(w_loss,dim=1)
return w_loss
def reg_loss(self, reg_factor=0.005):
""" Regularisation term used to learn the magnitudes.
Use an L2 loss to encourage high magnitudes TF.
Args:
reg_factor (float): Factor by wich the regularisation loss is multiplied. (default: 0.005)
Returns:
Tensor containing the regularisation loss value.
"""
if self._fixed_mag:
return torch.tensor(0)
else:
#return reg_factor * F.l1_loss(self._params['mag'][self._reg_mask], target=self._reg_tgt, reduction='mean')
mags = self._params['mag'] if self._params['mag'].shape==torch.Size([]) else self._params['mag'][self._reg_mask]
max_mag_reg = reg_factor * F.mse_loss(mags, target=self._reg_tgt.to(mags.device), reduction='mean')
return max_mag_reg
def TF_prob(self):
""" Gives an estimation of the individual TF probabilities.
Be warry that the probability returned isn't exact. The TF distribution isn't fully represented by those.
Each probability should be taken individualy. They only represent the chance for a specific TF to be picked at least once.
Returms:
Tensor containing the single TF probabilities of applications.
"""
if torch.all(self._params['prob']!=self._prob_mem.to(self._params['prob'].device)): #Prevent recompute if originial prob didn't changed
self._prob_mem=self._params['prob'].data.detach_()
self._single_TF_prob=torch.zeros(self._nb_tf)
for idx_tf in range(self._nb_tf):
for i, t_set in enumerate(self._TF_sets):
#uni, count = np.unique(t_set, return_counts=True)
#if idx_tf in uni:
# res[idx_tf]+=self._params['prob'][i]*int(count[np.where(uni==idx_tf)])
if idx_tf in t_set:
self._single_TF_prob[idx_tf]+=self._params['prob'][i]
return self._single_TF_prob
def train(self, mode=True):
""" Set the module training mode.
Args:
mode (bool): Wether to learn the parameter of the module. None would not change mode. (default: None)
"""
#if mode is None :
# mode=self._data_augmentation
self.augment(mode=mode) #Inutile si mode=None
super(Data_augV7, self).train(mode)
return self
def eval(self):
""" Set the module to evaluation mode.
"""
return self.train(mode=False)
def augment(self, mode=True):
""" Set the augmentation mode.
Args:
mode (bool): Wether to perform data augmentation on the forward pass. (default: True)
"""
self._data_augmentation=mode
def __getitem__(self, key):
"""Access to the learnable parameters
Args:
key (string): Name of the learnable parameter to access.
Returns:
nn.Parameter.
"""
if key == 'prob': #Override prob access
return self.TF_prob()
return self._params[key]
def __str__(self):
"""Name of the module
Returns:
String containing the name of the module as well as the higher levels parameters.
"""
dist_param=''
if self._fixed_prob: dist_param+='Fx'
mag_param='Mag'
if self._fixed_mag: mag_param+= 'Fx'
if self._shared_mag: mag_param+= 'Sh'
if not self._mix_dist:
return "Data_augV7(Uniform%s-%dTFx%d-%s)" % (dist_param, self._nb_tf, self._N_seqTF, mag_param)
elif self._fixed_mix:
return "Data_augV7(Mix%.1f%s-%dTFx%d-%s)" % (self._params['mix_dist'].item(),dist_param, self._nb_tf, self._N_seqTF, mag_param)
else:
return "Data_augV7(Mix%s-%dTFx%d-%s)" % (dist_param, self._nb_tf, self._N_seqTF, mag_param)
class RandAug(nn.Module): #RandAugment = UniformFx-MagFxSh + rapide
"""RandAugment implementation.
Applies transformations (TF) to batch of data.
Each TF is defined by a (name, probability of application, magnitude of distorsion) tuple. For the full definiton of the TF, see transformations.py.
The TF probabilities are ignored and, instead selected randomly.
Attributes:
_data_augmentation (bool): Wether TF will be applied during forward pass.
_TF_dict (dict) : A dictionnary containing the data transformations (TF) to be applied.
_TF (list) : List of TF names.
_nb_tf (int) : Number of TF used.
_N_seqTF (int) : Number of TF to be applied sequentially to each inputs
_shared_mag (bool) : Wether to share a single magnitude parameters for all TF. Should be True.
_fixed_mag (bool): Wether to lock the TF magnitudes. Should be True.
_params (nn.ParameterDict): Data augmentation parameters.
"""
def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mag=TF.PARAMETER_MAX):
"""Init RandAug.
Args:
TF_dict (dict): A dictionnary containing the data transformations (TF) to be applied. (default: use all available TF from transformations.py)
N_TF (int): Number of TF to be applied sequentially to each inputs. (default: 1)
mag (float): Magnitude of the TF. Should be between [PARAMETER_MIN, PARAMETER_MAX] defined in transformations.py. (default: PARAMETER_MAX)
"""
super(RandAug, self).__init__()
self._data_augmentation = True
self._TF_dict = TF_dict
self._TF= list(self._TF_dict.keys())
self._nb_tf= len(self._TF)
self._N_seqTF = N_TF
self.mag=nn.Parameter(torch.tensor(float(mag)))
self._params = nn.ParameterDict({
"prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Ignored
"mag" : nn.Parameter(torch.tensor(float(mag))),
})
self._shared_mag = True
self._fixed_mag = True
self._params['mag'].data = self._params['mag'].data.clamp(min=TF.PARAMETER_MIN, max=TF.PARAMETER_MAX)
def forward(self, x):
""" Main method of the Data augmentation module.
Args:
x (Tensor): Batch of data.
Returns:
Tensor : Batch of tranformed data.
"""
if self._data_augmentation:# and TF.random.random() < 0.5:
device = x.device
batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
for _ in range(self._N_seqTF):
## Echantillonage ## == sampled_ops = np.random.choice(transforms, N)
uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=1)
cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*uniforme_dist)
sample = cat_distrib.sample()
## Transformations ##
x = self.apply_TF(x, sample)
return x
def apply_TF(self, x, sampled_TF):
""" Applies the sampled transformations.
Args:
x (Tensor): Batch of data.
sampled_TF (Tensor): Indexes of the TF to be applied to each element of data.
Returns:
Tensor: Batch of tranformed data.
"""
smps_x=[]
for tf_idx in range(self._nb_tf):
mask = sampled_TF==tf_idx #Create selection mask
smp_x = x[mask] #torch.masked_select() ? (NEcessite d'expand le mask au meme dim)
if smp_x.shape[0]!=0: #if there's data to TF
magnitude=self._params["mag"].detach()
tf=self._TF[tf_idx]
#print(magnitude)
#In place
x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude)
return x
def adjust_param(self, soft=False):
"""Not used
"""
pass #Pas de parametre a opti
def loss_weight(self):
"""Not used
"""
return 1 #Pas d'echantillon = pas de ponderation
def reg_loss(self, reg_factor=0.005):
"""Not used
"""
return torch.tensor(0) #Pas de regularisation
def train(self, mode=None):
""" Set the module training mode.
Args:
mode (bool): Wether to learn the parameter of the module. None would not change mode. (default: None)
"""
if mode is None :
mode=self._data_augmentation
self.augment(mode=mode) #Inutile si mode=None
super(RandAug, self).train(mode)
def eval(self):
""" Set the module to evaluation mode.
"""
self.train(mode=False)
def augment(self, mode=True):
""" Set the augmentation mode.
Args:
mode (bool): Wether to perform data augmentation on the forward pass. (default: True)
"""
self._data_augmentation=mode
def __getitem__(self, key):
"""Access to the learnable parameters
Args:
key (string): Name of the learnable parameter to access.
Returns:
nn.Parameter.
"""
return self._params[key]
def __str__(self):
"""Name of the module
Returns:
String containing the name of the module as well as the higher levels parameters.
"""
return "RandAug(%dTFx%d-Mag%d)" % (self._nb_tf, self._N_seqTF, self.mag)
import higher
class Higher_model(nn.Module):
"""Model wrapper for higher gradient tracking.
Keep in memory the orginial model and it's functionnal, higher, version.
Might not be needed anymore if Higher implement detach for fmodel.
see : https://github.com/facebookresearch/higher
TODO: Get rid of the original model if not needed by user.
Attributes:
_name (string): Name of the model.
_mods (nn.ModuleDict): Models (Orginial and Higher version).
"""
def __init__(self, model):
"""Init Higher_model.
Args:
model (nn.Module): Network for which higher gradients can be tracked.
"""
super(Higher_model, self).__init__()
self._name = model.__str__()
self._mods = nn.ModuleDict({
'original': model,
'functional': higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
})
def get_diffopt(self, opt, grad_callback=None, track_higher_grads=True):
"""Get a differentiable version of an Optimizer.
Higher/Differentiable optimizer required to be used for higher gradient tracking.
Usage : diffopt.step(loss) == (opt.zero_grad, loss.backward, opt.step)
Be warry that if track_higher_grads is set to True, a new state of the model would be saved each time diffopt.step() is called.
Thus increasing memory consumption. The detach_() method should be called to reset the gradient tape and prevent memory saturation.
Args:
opt (torch.optim): Optimizer to make differentiable.
grad_callback (fct(grads)=grads): Function applied to the list of gradients parameters (ex: clipping). (default: None)
track_higher_grads (bool): Wether higher gradient are tracked. If True, the graph/states will be retained to allow backpropagation. (default: True)
Returns:
(Higher.DifferentiableOptimizer): Differentiable version of the optimizer.
"""
return higher.optim.get_diff_optim(opt,
self._mods['original'].parameters(),
fmodel=self._mods['functional'],
grad_callback=grad_callback,
track_higher_grads=track_higher_grads)
def forward(self, x):
""" Main method of the model.
Args:
x (Tensor): Batch of data.
Returns:
Tensor : Output of the network. Should be logits.
"""
return self._mods['functional'](x)
def detach_(self):
"""Detach from the graph.
Needed to limit the number of state kept in memory.
"""
tmp = self._mods['functional'].fast_params
self._mods['functional']._fast_params=[]
self._mods['functional'].update_params(tmp)
for p in self._mods['functional'].fast_params:
p.detach_().requires_grad_()
def state_dict(self):
"""Returns a dictionary containing a whole state of the module.
"""
return self._mods['functional'].state_dict()
def __getitem__(self, key):
"""Access to modules
Args:
key (string): Name of the module to access.
Returns:
nn.Module.
"""
return self._mods[key]
def __str__(self):
"""Name of the module
Returns:
String containing the name of the module.
"""
return self._name
class Augmented_model(nn.Module):
"""Wrapper for a Data Augmentation module and a model.
Attributes:
_mods (nn.ModuleDict): A dictionary containing the modules.
_data_augmentation (bool): Wether data augmentation should be used.
"""
def __init__(self, data_augmenter, model):
"""Init Augmented Model.
By default, data augmentation will be performed.
Args:
data_augmenter (nn.Module): Data augmentation module.
model (nn.Module): Network.
"""
super(Augmented_model, self).__init__()
self._mods = nn.ModuleDict({
'data_aug': data_augmenter,
'model': model
})
self.augment(mode=True)
def forward(self, x):
""" Main method of the Augmented model.
Perform the forward pass of both modules.
Args:
x (Tensor): Batch of data.
Returns:
Tensor : Output of the networks. Should be logits.
"""
return self._mods['model'](self._mods['data_aug'](x))
def augment(self, mode=True):
""" Set the augmentation mode.
Args:
mode (bool): Wether to perform data augmentation on the forward pass. (default: True)
"""
self._data_augmentation=mode
self._mods['data_aug'].augment(mode)
def train(self, mode=True):
""" Set the module training mode.
Args:
mode (bool): Wether to learn the parameter of the module. (default: None)
"""
#if mode is None :
# mode=self._data_augmentation
super(Augmented_model, self).train(mode)
self._mods['data_aug'].augment(mode=self._data_augmentation) #Restart if needed data augmentation
return self
def eval(self):
""" Set the module to evaluation mode.
"""
#return self.train(mode=False)
super(Augmented_model, self).train(mode=False)
self._mods['data_aug'].augment(mode=False)
return self
def items(self):
"""Return an iterable of the ModuleDict key/value pairs.
"""
return self._mods.items()
def update(self, modules):
"""Update the module dictionnary.
The new dictionnary should keep the same structure.
"""
assert(self._mods.keys()==modules.keys())
self._mods.update(modules)
def is_augmenting(self):
""" Return wether data augmentation is applied.
Returns:
bool : True if data augmentation is applied.
"""
return self._data_augmentation
def TF_names(self):
""" Get the transformations names used by the data augmentation module.
Returns:
list : names of the transformations of the data augmentation module.
"""
try:
return self._mods['data_aug']._TF
except:
return None
def __getitem__(self, key):
"""Access to the modules.
Args:
key (string): Name of the module to access.
Returns:
nn.Module.
"""
return self._mods[key]
def __str__(self):
"""Name of the module
Returns:
String containing the name of the module as well as the higher levels parameters.
"""
return "Aug_mod("+str(self._mods['data_aug'])+"-"+str(self._mods['model'])+")"

26
higher/smart_aug/model.py Executable file
View file

@ -0,0 +1,26 @@
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
## Basic CNN ##
class LeNet(nn.Module):
def __init__(self, num_inp, num_out):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(num_inp, 20, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(20, 50, 5)
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(5*5*50, 500)
self.fc2 = nn.Linear(500, num_out)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
def __str__(self):
return "LeNet"

View file

@ -0,0 +1,490 @@
# coding=utf-8
# Copyright 2019 The Google UDA Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Transforms used in the Augmentation Policies.
Copied from AutoAugment: https://github.com/tensorflow/models/blob/master/research/autoaugment/
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import random
import numpy as np
# pylint:disable=g-multiple-import
from PIL import ImageOps, ImageEnhance, ImageFilter, Image
# pylint:enable=g-multiple-import
#import tensorflow as tf
#FLAGS = tf.flags.FLAGS
IMAGE_SIZE = 32
# What is the dataset mean and std of the images on the training set
PARAMETER_MAX = 10 # What is the max 'level' a transform could be predicted
def get_mean_and_std():
#if FLAGS.task_name == "cifar10":
means = [0.49139968, 0.48215841, 0.44653091]
stds = [0.24703223, 0.24348513, 0.26158784]
#elif FLAGS.task_name == "svhn":
# means = [0.4376821, 0.4437697, 0.47280442]
# stds = [0.19803012, 0.20101562, 0.19703614]
#else:
# assert False
return means, stds
def random_flip(x):
"""Flip the input x horizontally with 50% probability."""
if np.random.rand(1)[0] > 0.5:
return np.fliplr(x)
return x
def zero_pad_and_crop(img, amount=4):
"""Zero pad by `amount` zero pixels on each side then take a random crop.
Args:
img: numpy image that will be zero padded and cropped.
amount: amount of zeros to pad `img` with horizontally and verically.
Returns:
The cropped zero padded img. The returned numpy array will be of the same
shape as `img`.
"""
padded_img = np.zeros((img.shape[0] + amount * 2, img.shape[1] + amount * 2,
img.shape[2]))
padded_img[amount:img.shape[0] + amount, amount:
img.shape[1] + amount, :] = img
top = np.random.randint(low=0, high=2 * amount)
left = np.random.randint(low=0, high=2 * amount)
new_img = padded_img[top:top + img.shape[0], left:left + img.shape[1], :]
return new_img
def create_cutout_mask(img_height, img_width, num_channels, size):
"""Creates a zero mask used for cutout of shape `img_height` x `img_width`.
Args:
img_height: Height of image cutout mask will be applied to.
img_width: Width of image cutout mask will be applied to.
num_channels: Number of channels in the image.
size: Size of the zeros mask.
Returns:
A mask of shape `img_height` x `img_width` with all ones except for a
square of zeros of shape `size` x `size`. This mask is meant to be
elementwise multiplied with the original image. Additionally returns
the `upper_coord` and `lower_coord` which specify where the cutout mask
will be applied.
"""
assert img_height == img_width
# Sample center where cutout mask will be applied
height_loc = np.random.randint(low=0, high=img_height)
width_loc = np.random.randint(low=0, high=img_width)
# Determine upper right and lower left corners of patch
upper_coord = (max(0, height_loc - size // 2), max(0, width_loc - size // 2))
lower_coord = (min(img_height, height_loc + size // 2),
min(img_width, width_loc + size // 2))
mask_height = lower_coord[0] - upper_coord[0]
mask_width = lower_coord[1] - upper_coord[1]
assert mask_height > 0
assert mask_width > 0
mask = np.ones((img_height, img_width, num_channels))
zeros = np.zeros((mask_height, mask_width, num_channels))
mask[upper_coord[0]:lower_coord[0], upper_coord[1]:lower_coord[1], :] = (
zeros)
return mask, upper_coord, lower_coord
def cutout_numpy(img, size=16):
"""Apply cutout with mask of shape `size` x `size` to `img`.
The cutout operation is from the paper https://arxiv.org/abs/1708.04552.
This operation applies a `size`x`size` mask of zeros to a random location
within `img`.
Args:
img: Numpy image that cutout will be applied to.
size: Height/width of the cutout mask that will be
Returns:
A numpy tensor that is the result of applying the cutout mask to `img`.
"""
img_height, img_width, num_channels = (img.shape[0], img.shape[1],
img.shape[2])
assert len(img.shape) == 3
mask, _, _ = create_cutout_mask(img_height, img_width, num_channels, size)
return img * mask
def float_parameter(level, maxval):
"""Helper function to scale `val` between 0 and maxval .
Args:
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
maxval: Maximum value that the operation can have. This will be scaled
to level/PARAMETER_MAX.
Returns:
A float that results from scaling `maxval` according to `level`.
"""
return float(level) * maxval / PARAMETER_MAX
def int_parameter(level, maxval):
"""Helper function to scale `val` between 0 and maxval .
Args:
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
maxval: Maximum value that the operation can have. This will be scaled
to level/PARAMETER_MAX.
Returns:
An int that results from scaling `maxval` according to `level`.
"""
return int(level * maxval / PARAMETER_MAX)
def pil_wrap(img, use_mean_std):
"""Convert the `img` numpy tensor to a PIL Image."""
if use_mean_std:
MEANS, STDS = get_mean_and_std()
else:
MEANS = [0, 0, 0]
STDS = [1, 1, 1]
img_ori = (img * STDS + MEANS) * 255
return Image.fromarray(
np.uint8((img * STDS + MEANS) * 255.0)).convert('RGBA')
def pil_unwrap(pil_img, use_mean_std, img_shape):
"""Converts the PIL img to a numpy array."""
if use_mean_std:
MEANS, STDS = get_mean_and_std()
else:
MEANS = [0, 0, 0]
STDS = [1, 1, 1]
pic_array = np.array(pil_img.getdata()).reshape((img_shape[0], img_shape[1], 4)) / 255.0
i1, i2 = np.where(pic_array[:, :, 3] == 0)
pic_array = (pic_array[:, :, :3] - MEANS) / STDS
pic_array[i1, i2] = [0, 0, 0]
return pic_array
def apply_policy(policy, img, use_mean_std=True):
"""Apply the `policy` to the numpy `img`.
Args:
policy: A list of tuples with the form (name, probability, level) where
`name` is the name of the augmentation operation to apply, `probability`
is the probability of applying the operation and `level` is what strength
the operation to apply.
img: Numpy image that will have `policy` applied to it.
Returns:
The result of applying `policy` to `img`.
"""
img_shape = img.shape
pil_img = pil_wrap(img, use_mean_std)
for xform in policy:
assert len(xform) == 3
name, probability, level = xform
xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(
probability, level, img_shape)
pil_img = xform_fn(pil_img)
return pil_unwrap(pil_img, use_mean_std, img_shape)
class TransformFunction(object):
"""Wraps the Transform function for pretty printing options."""
def __init__(self, func, name):
self.f = func
self.name = name
def __repr__(self):
return '<' + self.name + '>'
def __call__(self, pil_img):
return self.f(pil_img)
class TransformT(object):
"""Each instance of this class represents a specific transform."""
def __init__(self, name, xform_fn):
self.name = name
self.xform = xform_fn
def pil_transformer(self, probability, level, img_shape):
def return_function(im):
if random.random() < probability:
im = self.xform(im, level, img_shape)
return im
name = self.name + '({:.1f},{})'.format(probability, level)
return TransformFunction(return_function, name)
################## Transform Functions ##################
identity = TransformT('identity', lambda pil_img, level, _: pil_img)
flip_lr = TransformT(
'FlipLR',
lambda pil_img, level, _: pil_img.transpose(Image.FLIP_LEFT_RIGHT))
flip_ud = TransformT(
'FlipUD',
lambda pil_img, level, _: pil_img.transpose(Image.FLIP_TOP_BOTTOM))
# pylint:disable=g-long-lambda
auto_contrast = TransformT(
'AutoContrast',
lambda pil_img, level, _: ImageOps.autocontrast(
pil_img.convert('RGB')).convert('RGBA'))
equalize = TransformT(
'Equalize',
lambda pil_img, level, _: ImageOps.equalize(
pil_img.convert('RGB')).convert('RGBA'))
invert = TransformT(
'Invert',
lambda pil_img, level, _: ImageOps.invert(
pil_img.convert('RGB')).convert('RGBA'))
# pylint:enable=g-long-lambda
blur = TransformT(
'Blur', lambda pil_img, level, _: pil_img.filter(ImageFilter.BLUR))
smooth = TransformT(
'Smooth',
lambda pil_img, level, _: pil_img.filter(ImageFilter.SMOOTH))
def _rotate_impl(pil_img, level, _):
"""Rotates `pil_img` from -30 to 30 degrees depending on `level`."""
degrees = int_parameter(level, 30)
if random.random() > 0.5:
degrees = -degrees
return pil_img.rotate(degrees)
rotate = TransformT('Rotate', _rotate_impl)
def _posterize_impl(pil_img, level, _):
"""Applies PIL Posterize to `pil_img`."""
level = int_parameter(level, 4)
return ImageOps.posterize(pil_img.convert('RGB'), 4 - level).convert('RGBA')
posterize = TransformT('Posterize', _posterize_impl)
def _shear_x_impl(pil_img, level, img_shape):
"""Applies PIL ShearX to `pil_img`.
The ShearX operation shears the image along the horizontal axis with `level`
magnitude.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had ShearX applied to it.
"""
level = float_parameter(level, 0.3)
if random.random() > 0.5:
level = -level
return pil_img.transform(
(img_shape[0], img_shape[1]),
Image.AFFINE,
(1, level, 0, 0, 1, 0))
shear_x = TransformT('ShearX', _shear_x_impl)
def _shear_y_impl(pil_img, level, img_shape):
"""Applies PIL ShearY to `pil_img`.
The ShearY operation shears the image along the vertical axis with `level`
magnitude.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had ShearX applied to it.
"""
level = float_parameter(level, 0.3)
if random.random() > 0.5:
level = -level
return pil_img.transform(
(img_shape[0], img_shape[1]),
Image.AFFINE,
(1, 0, 0, level, 1, 0))
shear_y = TransformT('ShearY', _shear_y_impl)
def _translate_x_impl(pil_img, level, img_shape):
"""Applies PIL TranslateX to `pil_img`.
Translate the image in the horizontal direction by `level`
number of pixels.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had TranslateX applied to it.
"""
level = int_parameter(level, 10)
if random.random() > 0.5:
level = -level
return pil_img.transform(
(img_shape[0], img_shape[1]),
Image.AFFINE,
(1, 0, level, 0, 1, 0))
translate_x = TransformT('TranslateX', _translate_x_impl)
def _translate_y_impl(pil_img, level, img_shape):
"""Applies PIL TranslateY to `pil_img`.
Translate the image in the vertical direction by `level`
number of pixels.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had TranslateY applied to it.
"""
level = int_parameter(level, 10)
if random.random() > 0.5:
level = -level
return pil_img.transform(
(img_shape[0], img_shape[1]),
Image.AFFINE,
(1, 0, 0, 0, 1, level))
translate_y = TransformT('TranslateY', _translate_y_impl)
def _crop_impl(pil_img, level, img_shape, interpolation=Image.BILINEAR):
"""Applies a crop to `pil_img` with the size depending on the `level`."""
cropped = pil_img.crop((level, level, img_shape[0] - level, img_shape[1] - level))
resized = cropped.resize((img_shape[0], img_shape[1]), interpolation)
return resized
crop_bilinear = TransformT('CropBilinear', _crop_impl)
def _solarize_impl(pil_img, level, _):
"""Applies PIL Solarize to `pil_img`.
Translate the image in the vertical direction by `level`
number of pixels.
Args:
pil_img: Image in PIL object.
level: Strength of the operation specified as an Integer from
[0, `PARAMETER_MAX`].
Returns:
A PIL Image that has had Solarize applied to it.
"""
level = int_parameter(level, 256)
return ImageOps.solarize(pil_img.convert('RGB'), 256 - level).convert('RGBA')
solarize = TransformT('Solarize', _solarize_impl)
def _cutout_pil_impl(pil_img, level, img_shape):
"""Apply cutout to pil_img at the specified level."""
size = int_parameter(level, 20)
if size <= 0:
return pil_img
img_height, img_width, num_channels = (img_shape[0], img_shape[1], 3)
_, upper_coord, lower_coord = (
create_cutout_mask(img_height, img_width, num_channels, size))
pixels = pil_img.load() # create the pixel map
for i in range(upper_coord[0], lower_coord[0]): # for every col:
for j in range(upper_coord[1], lower_coord[1]): # For every row
pixels[i, j] = (125, 122, 113, 0) # set the colour accordingly
return pil_img
cutout = TransformT('Cutout', _cutout_pil_impl)
def _enhancer_impl(enhancer):
"""Sets level to be between 0.1 and 1.8 for ImageEnhance transforms of PIL."""
def impl(pil_img, level, _):
v = float_parameter(level, 1.8) + .1 # going to 0 just destroys it
return enhancer(pil_img).enhance(v)
return impl
color = TransformT('Color', _enhancer_impl(ImageEnhance.Color))
contrast = TransformT('Contrast', _enhancer_impl(ImageEnhance.Contrast))
brightness = TransformT('Brightness', _enhancer_impl(
ImageEnhance.Brightness))
sharpness = TransformT('Sharpness', _enhancer_impl(ImageEnhance.Sharpness))
ALL_TRANSFORMS = [
flip_lr,
flip_ud,
auto_contrast,
equalize,
invert,
rotate,
posterize,
crop_bilinear,
solarize,
color,
contrast,
brightness,
sharpness,
shear_x,
shear_y,
translate_x,
translate_y,
cutout,
blur,
smooth
]
NAME_TO_TRANSFORM = {t.name: t for t in ALL_TRANSFORMS}
TRANSFORM_NAMES = NAME_TO_TRANSFORM.keys()

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,85 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import higher
import time
data_train = torchvision.datasets.CIFAR10("./data", train=True, download=True, transform=torchvision.transforms.ToTensor())
dl_train = torch.utils.data.DataLoader(data_train, batch_size=300, shuffle=True, num_workers=0, pin_memory=False)
class Aug_model(nn.Module):
def __init__(self, model, hyper_param=True):
super(Aug_model, self).__init__()
#### Origin of the issue ? ####
if hyper_param:
self._params = nn.ParameterDict({
"hyper_param": nn.Parameter(torch.Tensor([0.5])),
})
###############################
self._mods = nn.ModuleDict({
'model': model,
})
def forward(self, x):
return self._mods['model'](x) #* self._params['hyper_param']
def __getitem__(self, key):
return self._mods[key]
class Aug_model2(nn.Module): #Slow increase like no hyper_param
def __init__(self, model, hyper_param=True):
super(Aug_model2, self).__init__()
#### Origin of the issue ? ####
if hyper_param:
self._params = nn.ParameterDict({
"hyper_param": nn.Parameter(torch.Tensor([0.5])),
})
###############################
self._mods = nn.ModuleDict({
'model': model,
'fmodel': higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
})
def forward(self, x):
return self._mods['fmodel'](x) * self._params['hyper_param']
def get_diffopt(self, opt, track_higher_grads=True):
return higher.optim.get_diff_optim(opt,
self._mods['model'].parameters(),
fmodel=self._mods['fmodel'],
track_higher_grads=track_higher_grads)
def __getitem__(self, key):
return self._mods[key]
if __name__ == "__main__":
device = torch.device('cuda:1')
aug_model = Aug_model2(
model=torch.hub.load('pytorch/vision:v0.4.2', 'resnet18', pretrained=False),
hyper_param=True #False will not extend step time
).to(device)
inner_opt = torch.optim.SGD(aug_model['model'].parameters(), lr=1e-2, momentum=0.9)
#fmodel = higher.patch.monkeypatch(aug_model, device=None, copy_initial_weights=True)
#diffopt = higher.optim.get_diff_optim(inner_opt, aug_model.parameters(),fmodel=fmodel,track_higher_grads=True)
diffopt = aug_model.get_diffopt(inner_opt)
for i, (xs, ys) in enumerate(dl_train):
xs, ys = xs.to(device), ys.to(device)
#logits = fmodel(xs)
logits = aug_model(xs)
loss = F.cross_entropy(F.log_softmax(logits, dim=1), ys, reduction='mean')
t = time.process_time()
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
#print(len(fmodel._fast_params),"step", time.process_time()-t)
print(len(aug_model['fmodel']._fast_params),"step", time.process_time()-t)

View file

@ -0,0 +1,502 @@
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
## Basic CNN ##
class LeNet_F(nn.Module):
def __init__(self, num_inp, num_out):
super(LeNet_F, self).__init__()
self._params = nn.ParameterDict({
'w1': nn.Parameter(torch.zeros(20, num_inp, 5, 5)),
'b1': nn.Parameter(torch.zeros(20)),
'w2': nn.Parameter(torch.zeros(50, 20, 5, 5)),
'b2': nn.Parameter(torch.zeros(50)),
#'w3': nn.Parameter(torch.zeros(500,4*4*50)), #num_imp=1
'w3': nn.Parameter(torch.zeros(500,5*5*50)), #num_imp=3
'b3': nn.Parameter(torch.zeros(500)),
'w4': nn.Parameter(torch.zeros(num_out, 500)),
'b4': nn.Parameter(torch.zeros(num_out))
})
self.initialize()
def initialize(self):
nn.init.kaiming_uniform_(self._params["w1"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self._params["w2"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self._params["w3"], a=math.sqrt(5))
nn.init.kaiming_uniform_(self._params["w4"], a=math.sqrt(5))
def forward(self, x):
#print("Start Shape ", x.shape)
out = F.relu(F.conv2d(input=x, weight=self._params["w1"], bias=self._params["b1"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = F.relu(F.conv2d(input=out, weight=self._params["w2"], bias=self._params["b2"]))
#print("Shape ", out.shape)
out = F.max_pool2d(out, 2)
#print("Shape ", out.shape)
out = out.view(out.size(0), -1)
#print("Shape ", out.shape)
out = F.relu(F.linear(out, self._params["w3"], self._params["b3"]))
#print("Shape ", out.shape)
out = F.linear(out, self._params["w4"], self._params["b4"])
#print("Shape ", out.shape)
#return F.log_softmax(out, dim=1)
return out
def __getitem__(self, key):
return self._params[key]
def __str__(self):
return "LeNet"
## MobileNetv2 ##
def _make_divisible(v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
padding = (kernel_size - 1) // 2
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
# pw
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self,
num_classes=1000,
width_mult=1.0,
inverted_residual_setting=None,
round_nearest=8,
block=None):
"""
MobileNet V2 main class
Args:
num_classes (int): Number of classes
width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
inverted_residual_setting: Network structure
round_nearest (int): Round the number of channels in each layer to be a multiple of this number
Set to 1 to turn off rounding
block: Module specifying inverted residual building block for mobilenet
"""
super(MobileNetV2, self).__init__()
if block is None:
block = InvertedResidual
input_channel = 32
last_channel = 1280
if inverted_residual_setting is None:
inverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
# only check the first element, assuming user knows t,c,n,s are required
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
raise ValueError("inverted_residual_setting should be non-empty "
"or a 4-element list, got {}".format(inverted_residual_setting))
# building first layer
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
features = [ConvBNReLU(3, input_channel, stride=2)]
# building inverted residual blocks
for t, c, n, s in inverted_residual_setting:
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# building last several layers
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
# make it nn.Sequential
self.features = nn.Sequential(*features)
# building classifier
self.classifier = nn.Sequential(
nn.Dropout(0.2),
nn.Linear(self.last_channel, num_classes),
)
# weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def _forward_impl(self, x):
# This exists since TorchScript doesn't support inheritance, so the superclass method
# (this one) needs to have a name other than `forward` that can be accessed in a subclass
x = self.features(x)
x = x.mean([2, 3])
x = self.classifier(x)
return x
def forward(self, x):
return self._forward_impl(x)
def __str__(self):
return "MobileNetV2"
## ResNet ##
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion = 1
__constants__ = ['downsample']
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
__constants__ = ['downsample']
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
#ResNet18 : block=BasicBlock, layers=[2, 2, 2, 2]
class ResNet(nn.Module):
def __init__(self, block=BasicBlock, layers=[2, 2, 2, 2], num_classes=1000, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def _forward_impl(self, x):
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def forward(self, x):
return self._forward_impl(x)
def __str__(self):
return "ResNet18"
## Wide ResNet ##
#https://github.com/xternalz/WideResNet-pytorch/blob/master/wideresnet.py
#https://github.com/arcelien/pba/blob/master/pba/wrn.py
#https://github.com/szagoruyko/wide-residual-networks/blob/master/pytorch/resnet.py
'''
class BasicBlock(nn.Module):
def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
super(BasicBlock, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.relu1 = nn.ReLU(inplace=True)
self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_planes)
self.relu2 = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
padding=1, bias=False)
self.droprate = dropRate
self.equalInOut = (in_planes == out_planes)
self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
padding=0, bias=False) or None
def forward(self, x):
if not self.equalInOut:
x = self.relu1(self.bn1(x))
else:
out = self.relu1(self.bn1(x))
out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
if self.droprate > 0:
out = F.dropout(out, p=self.droprate, training=self.training)
out = self.conv2(out)
return torch.add(x if self.equalInOut else self.convShortcut(x), out)
class NetworkBlock(nn.Module):
def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
super(NetworkBlock, self).__init__()
self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
layers = []
for i in range(int(nb_layers)):
layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
return nn.Sequential(*layers)
def forward(self, x):
return self.layer(x)
#wrn_size: 32 = WRN-28-2 ? 160 = WRN-28-10
class WideResNet(nn.Module):
#def __init__(self, depth, num_classes, widen_factor=1, dropRate=0.0):
def __init__(self, num_classes, wrn_size, depth=28, dropRate=0.0):
super(WideResNet, self).__init__()
self.kernel_size = wrn_size
self.depth=depth
filter_size = 3
nChannels = [min(self.kernel_size, 16), self.kernel_size, self.kernel_size * 2, self.kernel_size * 4]
strides = [1, 2, 2] # stride for each resblock
#nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
assert((depth - 4) % 6 == 0)
n = (depth - 4) / 6
block = BasicBlock
# 1st conv before any network block
self.conv1 = nn.Conv2d(filter_size, nChannels[0], kernel_size=3, stride=1,
padding=1, bias=False)
# 1st block
self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, strides[0], dropRate)
# 2nd block
self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, strides[1], dropRate)
# 3rd block
self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, strides[2], dropRate)
# global average pooling and classifier
self.bn1 = nn.BatchNorm2d(nChannels[3])
self.relu = nn.ReLU(inplace=True)
self.fc = nn.Linear(nChannels[3], num_classes)
self.nChannels = nChannels[3]
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.bias.data.zero_()
def forward(self, x):
out = self.conv1(x)
out = self.block1(out)
out = self.block2(out)
out = self.block3(out)
out = self.relu(self.bn1(out))
out = F.avg_pool2d(out, 8)
out = out.view(-1, self.nChannels)
return self.fc(out)
def architecture(self):
return super(WideResNet, self).__str__()
def __str__(self):
return "WideResNet(s{}-d{})".format(self.kernel_size, self.depth)
'''

150
higher/smart_aug/old/test_lr.py Executable file
View file

@ -0,0 +1,150 @@
import numpy as np
import json, math, time, os
from torch.utils.data import SubsetRandomSampler
import torch.optim as optim
import higher
from model import *
import copy
BATCH_SIZE = 300
TEST_SIZE = 300
mnist_train = torchvision.datasets.MNIST(
"./data", train=True, download=True,
transform=torchvision.transforms.Compose([
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
torchvision.transforms.ToTensor()
])
)
mnist_test = torchvision.datasets.MNIST(
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
)
#train_subset_indices=range(int(len(mnist_train)/2))
train_subset_indices=range(BATCH_SIZE)
val_subset_indices=range(int(len(mnist_train)/2),len(mnist_train))
dl_train = torch.utils.data.DataLoader(mnist_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
dl_val = torch.utils.data.DataLoader(mnist_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=TEST_SIZE, shuffle=False)
def test(model):
model.eval()
for i, (features, labels) in enumerate(dl_test):
pred = model.forward(features)
return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
def train_classic(model, optim, epochs=1):
model.train()
log = []
for epoch in range(epochs):
t0 = time.process_time()
for i, (features, labels) in enumerate(dl_train):
optim.zero_grad()
pred = model.forward(features)
loss = F.cross_entropy(pred,labels)
loss.backward()
optim.step()
#### Log ####
tf = time.process_time()
data={
"time": tf - t0,
}
log.append(data)
times = [x["time"] for x in log]
print("Vanilla : acc", test(model), "in (ms):", np.mean(times), "+/-", np.std(times))
##########################################
if __name__ == "__main__":
device = torch.device('cpu')
model = LeNet(1,10)
opt_param = {
"lr": torch.tensor(1e-2).requires_grad_(),
"momentum": torch.tensor(0.9).requires_grad_()
}
n_inner_iter = 1
dl_train_it = iter(dl_train)
dl_val_it = iter(dl_val)
epoch = 0
epochs = 10
####
train_classic(model=model, optim=torch.optim.Adam(model.parameters(), lr=0.001), epochs=epochs)
model = LeNet(1,10)
meta_opt = torch.optim.Adam(opt_param.values(), lr=1e-2)
inner_opt = torch.optim.SGD(model.parameters(), lr=opt_param['lr'], momentum=opt_param['momentum'])
#for xs_val, ys_val in dl_val:
while epoch < epochs:
#print(data_aug.params["mag"], data_aug.params["mag"].grad)
meta_opt.zero_grad()
model.train()
with higher.innerloop_ctx(model, inner_opt, copy_initial_weights=True, track_higher_grads=True) as (fmodel, diffopt): #effet copy_initial_weight pas clair...
for param_group in diffopt.param_groups:
param_group['lr'] = opt_param['lr']
param_group['momentum'] = opt_param['momentum']
for i in range(n_inner_iter):
try:
xs, ys = next(dl_train_it)
except StopIteration: #Fin epoch train
epoch +=1
dl_train_it = iter(dl_train)
xs, ys = next(dl_train_it)
print('Epoch', epoch)
print('train loss',loss.item(), '/ val loss', val_loss.item())
print('acc', test(model))
print('opt : lr', opt_param['lr'].item(), 'momentum', opt_param['momentum'].item())
print('-'*9)
model.train()
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
loss = F.cross_entropy(logits, ys) # no need to call loss.backwards()
#print('loss',loss.item())
diffopt.step(loss) # note that `step` must take `loss` as an argument!
# The line above gets P[t+1] from P[t] and loss[t]. `step` also returns
# these new parameters, as an alternative to getting them from
# `fmodel.fast_params` or `fmodel.parameters()` after calling
# `diffopt.step`.
# At this point, or at any point in the iteration, you can take the
# gradient of `fmodel.parameters()` (or equivalently
# `fmodel.fast_params`) w.r.t. `fmodel.parameters(time=0)` (equivalently
# `fmodel.init_fast_params`). i.e. `fast_params` will always have
# `grad_fn` as an attribute, and be part of the gradient tape.
# At the end of your inner loop you can obtain these e.g. ...
#grad_of_grads = torch.autograd.grad(
# meta_loss_fn(fmodel.parameters()), fmodel.parameters(time=0))
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val_it)
xs_val, ys_val = next(dl_val_it)
val_logits = fmodel(xs_val)
val_loss = F.cross_entropy(val_logits, ys_val)
#print('val_loss',val_loss.item())
val_loss.backward()
#meta_grads = torch.autograd.grad(val_loss, opt_lr, allow_unused=True)
#print(meta_grads)
for param_group in diffopt.param_groups:
print(param_group['lr'], '/',param_group['lr'].grad)
print(param_group['momentum'], '/',param_group['momentum'].grad)
#model=copy.deepcopy(fmodel)
model.load_state_dict(fmodel.state_dict())
meta_opt.step()

View file

@ -0,0 +1,866 @@
import torch
#import torch.optim
import torchvision
import higher
from datasets import *
from utils import *
def train_classic_higher(model, epochs=1):
device = next(model.parameters()).device
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
model.train()
dl_val_it = iter(dl_val)
log = []
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(optim, model.parameters(),fmodel=fmodel,track_higher_grads=False)
#with higher.innerloop_ctx(model, optim, copy_initial_weights=True, track_higher_grads=False) as (fmodel, diffopt):
for epoch in range(epochs):
#print_torch_mem("Start epoch "+str(epoch))
#print("Fast param ",len(fmodel._fast_params))
t0 = time.process_time()
for i, (features, labels) in enumerate(dl_train):
#print_torch_mem("Start iter")
features,labels = features.to(device), labels.to(device)
#optim.zero_grad()
logits = model.forward(features)
pred = F.log_softmax(logits, dim=1)
loss = F.cross_entropy(pred,labels)
#.backward()
#optim.step()
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
model_copy(src=fmodel, dst=model, patch_copy=False)
optim_copy(dopt=diffopt, opt=optim)
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(optim, model.parameters(),fmodel=fmodel,track_higher_grads=False)
#### Tests ####
tf = time.process_time()
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val)
xs_val, ys_val = next(dl_val_it)
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
val_loss = F.cross_entropy(model(xs_val), ys_val)
accuracy, _ =test(model)
model.train()
#### Log ####
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": None,
}
log.append(data)
return log
def train_classic_tests(model, epochs=1):
device = next(model.parameters()).device
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
countcopy=0
model.train()
dl_val_it = iter(dl_val)
log = []
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
doptim = higher.optim.get_diff_optim(optim, model.parameters(), fmodel=fmodel, track_higher_grads=False)
for epoch in range(epochs):
print_torch_mem("Start epoch")
print(len(fmodel._fast_params))
t0 = time.process_time()
#with higher.innerloop_ctx(model, optim, copy_initial_weights=True, track_higher_grads=True) as (fmodel, doptim):
#fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
#doptim = higher.optim.get_diff_optim(optim, model.parameters(), track_higher_grads=True)
for i, (features, labels) in enumerate(dl_train):
features,labels = features.to(device), labels.to(device)
#with higher.innerloop_ctx(model, optim, copy_initial_weights=True, track_higher_grads=False) as (fmodel, doptim):
#optim.zero_grad()
pred = fmodel.forward(features)
loss = F.cross_entropy(pred,labels)
doptim.step(loss) #(opt.zero_grad, loss.backward, opt.step)
#loss.backward()
#new_params = doptim.step(loss, params=fmodel.parameters())
#fmodel.update_params(new_params)
#print('Fast param',len(fmodel._fast_params))
#print('opt state', type(doptim.state[0][0]['momentum_buffer']), doptim.state[0][2]['momentum_buffer'].shape)
if False or (len(fmodel._fast_params)>1):
print("fmodel fast param",len(fmodel._fast_params))
'''
#val_loss = F.cross_entropy(fmodel(features), labels)
#print_graph(val_loss)
#val_loss.backward()
#print('bip')
tmp = fmodel.parameters()
#print(list(tmp)[1])
tmp = [higher.utils._copy_tensor(t,safe_copy=True) if isinstance(t, torch.Tensor) else t for t in tmp]
#print(len(tmp))
#fmodel._fast_params.clear()
del fmodel._fast_params
fmodel._fast_params=None
fmodel.fast_params=tmp # Surcharge la memoire
#fmodel.update_params(tmp) #Meilleur perf / Surcharge la memoire avec trach higher grad
#optim._fmodel=fmodel
'''
countcopy+=1
model_copy(src=fmodel, dst=model, patch_copy=False)
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
#doptim.detach_dyn()
#tmp = doptim.state
#tmp = doptim.state_dict()
#for k, v in tmp['state'].items():
# print('dict',k, type(v))
a = optim.param_groups[0]['params'][0]
state = optim.state[a]
#state['momentum_buffer'] = None
#print('opt state', type(optim.state[a]), len(optim.state[a]))
#optim.load_state_dict(tmp)
for group_idx, group in enumerate(optim.param_groups):
# print('gp idx',group_idx)
for p_idx, p in enumerate(group['params']):
optim.state[p]=doptim.state[group_idx][p_idx]
#print('opt state', type(optim.state[a]['momentum_buffer']), optim.state[a]['momentum_buffer'][0:10])
#print('dopt state', type(doptim.state[0][0]['momentum_buffer']), doptim.state[0][0]['momentum_buffer'][0:10])
'''
for a in tmp:
#print(type(a), len(a))
for nb, b in a.items():
#print(nb, type(b), len(b))
for n, state in b.items():
#print(n, type(states))
#print(state.grad_fn)
state = torch.tensor(state.data).requires_grad_()
#print(state.grad_fn)
'''
doptim = higher.optim.get_diff_optim(optim, model.parameters(), track_higher_grads=True)
#doptim.state = tmp
countcopy+=1
model_copy(src=fmodel, dst=model)
optim_copy(dopt=diffopt, opt=inner_opt)
#### Tests ####
tf = time.process_time()
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val)
xs_val, ys_val = next(dl_val_it)
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
val_loss = F.cross_entropy(model(xs_val), ys_val)
accuracy, _ =test(model)
model.train()
#### Log ####
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": None,
}
log.append(data)
#countcopy+=1
#model_copy(src=fmodel, dst=model, patch_copy=False)
#optim.load_state_dict(doptim.state_dict()) #Besoin sauver etat otpim ?
print("Copy ", countcopy)
return log
from torchvision.datasets.vision import VisionDataset
from PIL import Image
import augmentation_transforms
import numpy as np
class AugmentedDatasetV2(VisionDataset):
def __init__(self, root, train=True, transform=None, target_transform=None, download=False, subset=None):
super(AugmentedDatasetV2, self).__init__(root, transform=transform, target_transform=target_transform)
supervised_dataset = torchvision.datasets.CIFAR10(root, train=train, download=download, transform=transform)
self.sup_data = supervised_dataset.data if not subset else supervised_dataset.data[subset[0]:subset[1]]
self.sup_targets = supervised_dataset.targets if not subset else supervised_dataset.targets[subset[0]:subset[1]]
assert len(self.sup_data)==len(self.sup_targets)
for idx, img in enumerate(self.sup_data):
self.sup_data[idx]= Image.fromarray(img) #to PIL Image
self.unsup_data=[]
self.unsup_targets=[]
self.origin_idx=[]
self.dataset_info= {
'name': 'CIFAR10',
'sup': len(self.sup_data),
'unsup': len(self.unsup_data),
'length': len(self.sup_data)+len(self.unsup_data),
}
self._TF = [
## Geometric TF ##
'Rotate',
'TranslateX',
'TranslateY',
'ShearX',
'ShearY',
'Cutout',
## Color TF ##
'Contrast',
'Color',
'Brightness',
'Sharpness',
'Posterize',
'Solarize',
'Invert',
'AutoContrast',
'Equalize',
]
self._op_list =[]
self.prob=0.5
self.mag_range=(1, 10)
for tf in self._TF:
for mag in range(self.mag_range[0], self.mag_range[1]):
self._op_list+=[(tf, self.prob, mag)]
self._nb_op = len(self._op_list)
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (image, target) where target is index of the target class.
"""
aug_img, origin_img, target = self.unsup_data[index], self.sup_data[self.origin_idx[index]], self.unsup_targets[index]
# doing this so that it is consistent with all other datasets
# to return a PIL Image
#img = Image.fromarray(img)
if self.transform is not None:
aug_img = self.transform(aug_img)
origin_img = self.transform(origin_img)
if self.target_transform is not None:
target = self.target_transform(target)
return aug_img, origin_img, target
def augement_data(self, aug_copy=1):
policies = []
for op_1 in self._op_list:
for op_2 in self._op_list:
policies += [[op_1, op_2]]
for idx, image in enumerate(self.sup_data):
if idx%(self.dataset_info['sup']/5)==0: print("Augmenting data... ", idx,"/", self.dataset_info['sup'])
#if idx==10000:break
for _ in range(aug_copy):
chosen_policy = policies[np.random.choice(len(policies))]
aug_image = augmentation_transforms.apply_policy(chosen_policy, image, use_mean_std=False) #Cast en float image
#aug_image = augmentation_transforms.cutout_numpy(aug_image)
self.unsup_data+=[(aug_image*255.).astype(self.sup_data.dtype)]#Cast float image to uint8
self.unsup_targets+=[self.sup_targets[idx]]
self.origin_idx+=[idx]
#self.unsup_data=(np.array(self.unsup_data)*255.).astype(self.sup_data.dtype) #Cast float image to uint8
self.unsup_data=np.array(self.unsup_data)
assert len(self.unsup_data)==len(self.unsup_targets)
self.dataset_info['unsup']=len(self.unsup_data)
self.dataset_info['length']=self.dataset_info['sup']+self.dataset_info['unsup']
def __len__(self):
return self.dataset_info['unsup']#self.dataset_info['length']
def __str__(self):
return "CIFAR10(Sup:{}-Unsup:{}-{}TF(Mag{}-{}))".format(self.dataset_info['sup'], self.dataset_info['unsup'], len(self._TF), self.mag_range[0], self.mag_range[1])
def train_UDA(model, dl_unsup, opt_param, epochs=1, print_freq=1):
"""Training of a model using UDA inspired approach.
Intended to be used alongside an already augmented dataset (see AugmentedDatasetV2).
Args:
model (nn.Module): Model to train.
dl_unsup (Dataloader): Data loader of unsupervised/augmented data.
opt_param (dict): Dictionnary containing optimizers parameters.
epochs (int): Number of epochs to perform. (default: 1)
print_freq (int): Number of epoch between display of the state of training. If set to None, no display will be done. (default:1)
Returns:
(list) Logs of training. Each items is a dict containing results of an epoch.
"""
device = next(model.parameters()).device
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
opt = torch.optim.SGD(model.parameters(), lr=opt_param['Inner']['lr'], momentum=opt_param['Inner']['momentum']) #lr=1e-2 / momentum=0.9
model.train()
dl_val_it = iter(dl_val)
dl_unsup_it =iter(dl_unsup)
log = []
for epoch in range(epochs):
#print_torch_mem("Start epoch")
t0 = time.process_time()
for i, (features, labels) in enumerate(dl_train):
#print_torch_mem("Start iter")
features,labels = features.to(device), labels.to(device)
optim.zero_grad()
#Supervised
logits = model.forward(features)
pred = F.log_softmax(logits, dim=1)
sup_loss = F.cross_entropy(pred,labels)
#Unsupervised
try:
aug_xs, origin_xs, ys = next(dl_unsup_it)
except StopIteration: #Fin epoch val
dl_unsup_it =iter(dl_unsup)
aug_xs, origin_xs, ys = next(dl_unsup_it)
aug_xs, origin_xs, ys = aug_xs.to(device), origin_xs.to(device), ys.to(device)
#print(aug_xs.shape, origin_xs.shape, ys.shape)
sup_logits = model.forward(origin_xs)
unsup_logits = model.forward(aug_xs)
log_sup=F.log_softmax(sup_logits, dim=1)
log_unsup=F.log_softmax(unsup_logits, dim=1)
#KL div w/ logits
unsup_loss = F.softmax(sup_logits, dim=1)*(log_sup-log_unsup)
unsup_loss=unsup_loss.sum(dim=-1).mean()
#print(unsup_loss)
unsupp_coeff = 1
loss = sup_loss + unsup_loss * unsupp_coeff
loss.backward()
optim.step()
#### Tests ####
tf = time.process_time()
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val)
xs_val, ys_val = next(dl_val_it)
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
val_loss = F.cross_entropy(model(xs_val), ys_val)
accuracy, _ =test(model)
model.train()
#### Print ####
if(print_freq and epoch%print_freq==0):
print('-'*9)
print('Epoch : %d/%d'%(epoch,epochs))
print('Time : %.00f'%(tf - t0))
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
print('Sup Loss :', sup_loss.item(), '/ unsup_loss :', unsup_loss.item())
print('Accuracy :', accuracy)
#### Log ####
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": None,
}
log.append(data)
return log
def run_simple_dataug(inner_it, epochs=1):
device = next(model.parameters()).device
dl_train_it = iter(dl_train)
dl_val_it = iter(dl_val)
#aug_model = nn.Sequential(
# Data_aug(),
# LeNet(1,10),
# )
aug_model = Augmented_model(Data_aug(), LeNet(1,10)).to(device)
print(str(aug_model))
meta_opt = torch.optim.Adam(aug_model['data_aug'].parameters(), lr=1e-2)
inner_opt = torch.optim.SGD(aug_model['model'].parameters(), lr=1e-2, momentum=0.9)
log = []
t0 = time.process_time()
epoch = 0
while epoch < epochs:
meta_opt.zero_grad()
aug_model.train()
with higher.innerloop_ctx(aug_model, inner_opt, copy_initial_weights=True, track_higher_grads=True) as (fmodel, diffopt): #effet copy_initial_weight pas clair...
for i in range(n_inner_iter):
try:
xs, ys = next(dl_train_it)
except StopIteration: #Fin epoch train
tf = time.process_time()
epoch +=1
dl_train_it = iter(dl_train)
xs, ys = next(dl_train_it)
accuracy, _ =test(model)
aug_model.train()
#### Print ####
print('-'*9)
print('Epoch %d/%d'%(epoch,epochs))
print('train loss',loss.item(), '/ val loss', val_loss.item())
print('acc', accuracy)
print('mag', aug_model['data_aug']['mag'].item())
#### Log ####
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": aug_model['data_aug']['mag'].item(),
}
log.append(data)
t0 = time.process_time()
xs, ys = xs.to(device), ys.to(device)
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
loss = F.cross_entropy(logits, ys) # no need to call loss.backwards()
#loss.backward(retain_graph=True)
#print(fmodel['model']._params['b4'].grad)
#print('mag', fmodel['data_aug']['mag'].grad)
diffopt.step(loss) # note that `step` must take `loss` as an argument!
# The line above gets P[t+1] from P[t] and loss[t]. `step` also returns
# these new parameters, as an alternative to getting them from
# `fmodel.fast_params` or `fmodel.parameters()` after calling
# `diffopt.step`.
# At this point, or at any point in the iteration, you can take the
# gradient of `fmodel.parameters()` (or equivalently
# `fmodel.fast_params`) w.r.t. `fmodel.parameters(time=0)` (equivalently
# `fmodel.init_fast_params`). i.e. `fast_params` will always have
# `grad_fn` as an attribute, and be part of the gradient tape.
# At the end of your inner loop you can obtain these e.g. ...
#grad_of_grads = torch.autograd.grad(
# meta_loss_fn(fmodel.parameters()), fmodel.parameters(time=0))
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val)
xs_val, ys_val = next(dl_val_it)
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
fmodel.augment(mode=False)
val_logits = fmodel(xs_val) #Validation sans transfornations !
val_loss = F.cross_entropy(val_logits, ys_val)
#print('val_loss',val_loss.item())
val_loss.backward()
#print('mag', fmodel['data_aug']['mag'], '/', fmodel['data_aug']['mag'].grad)
#model=copy.deepcopy(fmodel)
aug_model.load_state_dict(fmodel.state_dict()) #Do not copy gradient !
#Copie des gradients
for paramName, paramValue, in fmodel.named_parameters():
for netCopyName, netCopyValue, in aug_model.named_parameters():
if paramName == netCopyName:
netCopyValue.grad = paramValue.grad
#print('mag', aug_model['data_aug']['mag'], '/', aug_model['data_aug']['mag'].grad)
meta_opt.step()
plot_res(log, fig_name="res/{}-{} epochs- {} in_it".format(str(aug_model),epochs,inner_it))
print('-'*9)
times = [x["time"] for x in log]
print(str(aug_model),": acc", max([x["acc"] for x in log]), "in (ms):", np.mean(times), "+/-", np.std(times))
def run_dist_dataug(model, epochs=1, inner_it=1, dataug_epoch_start=0):
device = next(model.parameters()).device
dl_train_it = iter(dl_train)
dl_val_it = iter(dl_val)
meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=1e-3)
inner_opt = torch.optim.SGD(model['model'].parameters(), lr=1e-2, momentum=0.9)
high_grad_track = True
if dataug_epoch_start>0:
model.augment(mode=False)
high_grad_track = False
model.train()
log = []
t0 = time.process_time()
countcopy=0
val_loss=torch.tensor(0)
opt_param=None
epoch = 0
while epoch < epochs:
meta_opt.zero_grad()
with higher.innerloop_ctx(model, inner_opt, copy_initial_weights=True, override=opt_param, track_higher_grads=high_grad_track) as (fmodel, diffopt): #effet copy_initial_weight pas clair...
for i in range(n_inner_iter):
try:
xs, ys = next(dl_train_it)
except StopIteration: #Fin epoch train
tf = time.process_time()
epoch +=1
dl_train_it = iter(dl_train)
xs, ys = next(dl_train_it)
#viz_sample_data(imgs=xs, labels=ys, fig_name='samples/data_sample_epoch{}_noTF'.format(epoch))
#viz_sample_data(imgs=aug_model['data_aug'](xs), labels=ys, fig_name='samples/data_sample_epoch{}'.format(epoch))
accuracy, _ =test(model)
model.train()
#### Print ####
print('-'*9)
print('Epoch : %d/%d'%(epoch,epochs))
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
print('Accuracy :', accuracy)
print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
print('TF Proba :', model['data_aug']['prob'].data)
#print('proba grad',aug_model['data_aug']['prob'].grad)
#############
#### Log ####
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": [p for p in model['data_aug']['prob']],
}
log.append(data)
#############
if epoch == dataug_epoch_start:
print('Starting Data Augmention...')
model.augment(mode=True)
high_grad_track = True
t0 = time.process_time()
xs, ys = xs.to(device), ys.to(device)
'''
#Methode exacte
final_loss = 0
for tf_idx in range(fmodel['data_aug']._nb_tf):
fmodel['data_aug'].transf_idx=tf_idx
logits = fmodel(xs)
loss = F.cross_entropy(logits, ys)
#loss.backward(retain_graph=True)
#print('idx', tf_idx)
#print(fmodel['data_aug']['prob'][tf_idx], fmodel['data_aug']['prob'][tf_idx].grad)
final_loss += loss*fmodel['data_aug']['prob'][tf_idx] #Take it in the forward function ?
loss = final_loss
'''
#Methode uniforme
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
loss = F.cross_entropy(logits, ys, reduction='none') # no need to call loss.backwards()
if fmodel._data_augmentation: #Weight loss
w_loss = fmodel['data_aug'].loss_weight().to(device)
loss = loss * w_loss
loss = loss.mean()
#'''
#to visualize computational graph
#print_graph(loss)
#loss.backward(retain_graph=True)
#print(fmodel['model']._params['b4'].grad)
#print('prob grad', fmodel['data_aug']['prob'].grad)
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
try:
xs_val, ys_val = next(dl_val_it)
except StopIteration: #Fin epoch val
dl_val_it = iter(dl_val)
xs_val, ys_val = next(dl_val_it)
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
fmodel.augment(mode=False) #Validation sans transfornations !
val_loss = F.cross_entropy(fmodel(xs_val), ys_val)
#print_graph(val_loss)
val_loss.backward()
countcopy+=1
model_copy(src=fmodel, dst=model)
optim_copy(dopt=diffopt, opt=inner_opt)
meta_opt.step()
model['data_aug'].adjust_param() #Contrainte sum(proba)=1
print("Copy ", countcopy)
return log
def run_dist_dataugV2(model, opt_param, epochs=1, inner_it=0, dataug_epoch_start=0, print_freq=1, KLdiv=False, loss_patience=None, save_sample=False):
device = next(model.parameters()).device
log = []
countcopy=0
val_loss=torch.tensor(0) #Necessaire si pas de metastep sur une epoch
dl_val_it = iter(dl_val)
#if inner_it!=0:
meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=opt_param['Meta']['lr']) #lr=1e-2
inner_opt = torch.optim.SGD(model['model'].parameters(), lr=opt_param['Inner']['lr'], momentum=opt_param['Inner']['momentum']) #lr=1e-2 / momentum=0.9
high_grad_track = True
if inner_it == 0:
high_grad_track=False
if dataug_epoch_start!=0:
model.augment(mode=False)
high_grad_track = False
val_loss_monitor= None
if loss_patience != None :
if dataug_epoch_start==-1: val_loss_monitor = loss_monitor(patience=loss_patience, end_train=2) #1st limit = dataug start
else: val_loss_monitor = loss_monitor(patience=loss_patience) #Val loss monitor (Not on val data : used by Dataug... => Test data)
model.train()
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
meta_opt.zero_grad()
for epoch in range(1, epochs+1):
#print_torch_mem("Start epoch "+str(epoch))
#print(high_grad_track, fmodel._data_augmentation, len(fmodel._fast_params))
t0 = time.process_time()
#with higher.innerloop_ctx(model, inner_opt, copy_initial_weights=True, override=opt_param, track_higher_grads=high_grad_track) as (fmodel, diffopt):
for i, (xs, ys) in enumerate(dl_train):
xs, ys = xs.to(device), ys.to(device)
#Methode exacte
#final_loss = 0
#for tf_idx in range(fmodel['data_aug']._nb_tf):
# fmodel['data_aug'].transf_idx=tf_idx
# logits = fmodel(xs)
# loss = F.cross_entropy(logits, ys)
# #loss.backward(retain_graph=True)
# final_loss += loss*fmodel['data_aug']['prob'][tf_idx] #Take it in the forward function ?
#loss = final_loss
if(not KLdiv):
#Methode uniforme
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
loss = F.cross_entropy(F.log_softmax(logits, dim=1), ys, reduction='none') # no need to call loss.backwards()
if fmodel._data_augmentation: #Weight loss
w_loss = fmodel['data_aug'].loss_weight()#.to(device)
loss = loss * w_loss
loss = loss.mean()
else:
#Methode KL div
if fmodel._data_augmentation :
fmodel.augment(mode=False)
sup_logits = fmodel(xs)
fmodel.augment(mode=True)
else:
sup_logits = fmodel(xs)
log_sup=F.log_softmax(sup_logits, dim=1)
loss = F.cross_entropy(log_sup, ys)
if fmodel._data_augmentation:
aug_logits = fmodel(xs)
log_aug=F.log_softmax(aug_logits, dim=1)
w_loss = fmodel['data_aug'].loss_weight() #Weight loss
#if epoch>50: #debut differe ?
#KL div w/ logits - Similarite predictions (distributions)
aug_loss = F.softmax(sup_logits, dim=1)*(log_sup-log_aug)
aug_loss = aug_loss.sum(dim=-1)
#aug_loss = F.kl_div(aug_logits, sup_logits, reduction='none')
aug_loss = (w_loss * aug_loss).mean()
aug_loss += (F.cross_entropy(log_aug, ys , reduction='none') * w_loss).mean()
unsupp_coeff = 1
loss += aug_loss * unsupp_coeff
#to visualize computational graph
#print_graph(loss)
#loss.backward(retain_graph=True)
#print(fmodel['model']._params['b4'].grad)
#print('prob grad', fmodel['data_aug']['prob'].grad)
#t = time.process_time()
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
#print(len(fmodel._fast_params),"step", time.process_time()-t)
if(high_grad_track and i>0 and i%inner_it==0): #Perform Meta step
#print("meta")
val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val) #+ fmodel['data_aug'].reg_loss()
#print_graph(val_loss)
#t = time.process_time()
val_loss.backward()
#print("meta", time.process_time()-t)
#print('proba grad',model['data_aug']['prob'].grad)
if model['data_aug']['prob'].grad is None or model['data_aug']['mag'] is None:
print("Warning no grad (iter",i,") :\n Prob-",model['data_aug']['prob'].grad,"\n Mag-", model['data_aug']['mag'].grad)
countcopy+=1
model_copy(src=fmodel, dst=model)
optim_copy(dopt=diffopt, opt=inner_opt)
torch.nn.utils.clip_grad_norm_(model['data_aug'].parameters(), max_norm=10, norm_type=2) #Prevent exploding grad with RNN
#if epoch>50:
meta_opt.step()
model['data_aug'].adjust_param(soft=False) #Contrainte sum(proba)=1
try: #Dataugv6
model['data_aug'].next_TF_set()
except:
pass
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
meta_opt.zero_grad()
tf = time.process_time()
#viz_sample_data(imgs=xs, labels=ys, fig_name='samples/data_sample_epoch{}_noTF'.format(epoch))
#viz_sample_data(imgs=model['data_aug'](xs), labels=ys, fig_name='samples/data_sample_epoch{}'.format(epoch), weight_labels=model['data_aug'].loss_weight())
if(not high_grad_track):
countcopy+=1
model_copy(src=fmodel, dst=model)
optim_copy(dopt=diffopt, opt=inner_opt)
val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val)
#Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False)
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
accuracy, test_loss =test(model)
model.train()
#### Log ####
#print(type(model['data_aug']) is dataug.Data_augV5)
param = [{'p': p.item(), 'm':model['data_aug']['mag'].item()} for p in model['data_aug']['prob']] if model['data_aug']._shared_mag else [{'p': p.item(), 'm': m.item()} for p, m in zip(model['data_aug']['prob'], model['data_aug']['mag'])]
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": param #if isinstance(model['data_aug'], Data_augV5)
#else [p.item() for p in model['data_aug']['prob']],
}
log.append(data)
#############
#### Print ####
if(print_freq and epoch%print_freq==0):
print('-'*9)
print('Epoch : %d/%d'%(epoch,epochs))
print('Time : %.00f'%(tf - t0))
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
print('Accuracy :', max([x["acc"] for x in log]))
print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
print('TF Proba :', model['data_aug']['prob'].data)
#print('proba grad',model['data_aug']['prob'].grad)
print('TF Mag :', model['data_aug']['mag'].data)
#print('Mag grad',model['data_aug']['mag'].grad)
#print('Reg loss:', model['data_aug'].reg_loss().item())
#print('Aug loss', aug_loss.item())
#############
if val_loss_monitor :
model.eval()
val_loss_monitor.register(test_loss)#val_loss.item())
if val_loss_monitor.end_training(): break #Stop training
model.train()
if not model.is_augmenting() and (epoch == dataug_epoch_start or (val_loss_monitor and val_loss_monitor.limit_reached()==1)):
print('Starting Data Augmention...')
dataug_epoch_start = epoch
model.augment(mode=True)
if inner_it != 0: high_grad_track = True
try:
viz_sample_data(imgs=xs, labels=ys, fig_name='samples/data_sample_epoch{}_noTF'.format(epoch))
viz_sample_data(imgs=model['data_aug'](xs), labels=ys, fig_name='samples/data_sample_epoch{}'.format(epoch), weight_labels=model['data_aug'].loss_weight())
except:
print("Couldn't save finals samples")
pass
#print("Copy ", countcopy)
return log

View file

@ -0,0 +1,161 @@
import numpy as np
import json, math, time, os
import matplotlib.pyplot as plt
import copy
import gc
from torchviz import make_dot
import torch
import torch.nn.functional as F
import time
class timer():
def __init__(self):
self._start_time=time.time()
def exec_time(self):
end = time.time()
res = end-self._start_time
self._start_time=end
return res
def plot_res(log, fig_name='res', param_names=None):
epochs = [x["epoch"] for x in log]
fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
ax[0].set_title('Loss')
ax[0].plot(epochs,[x["train_loss"] for x in log], label='Train')
ax[0].plot(epochs,[x["val_loss"] for x in log], label='Val')
ax[0].legend()
ax[1].set_title('Acc')
ax[1].plot(epochs,[x["acc"] for x in log])
if log[0]["param"]!= None:
if isinstance(log[0]["param"],float):
ax[2].set_title('Mag')
ax[2].plot(epochs,[x["param"] for x in log], label='Mag')
ax[2].legend()
else :
ax[2].set_title('Prob')
#for idx, _ in enumerate(log[0]["param"]):
#ax[2].plot(epochs,[x["param"][idx] for x in log], label='P'+str(idx))
if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])]
proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])]
ax[2].stackplot(epochs, proba, labels=param_names)
ax[2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name)
plt.close()
def plot_res_compare(filenames, fig_name='res'):
all_data=[]
#legend=""
for idx, file in enumerate(filenames):
#legend+=str(idx)+'-'+file+'\n'
with open(file) as json_file:
data = json.load(json_file)
all_data.append(data)
n_tf = [len(x["Param_names"]) for x in all_data]
acc = [x["Accuracy"] for x in all_data]
time = [x["Time"][0] for x in all_data]
fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
ax[0].plot(n_tf, acc)
ax[1].plot(n_tf, time)
ax[0].set_title('Acc')
ax[1].set_title('Time')
#for a in ax: a.legend()
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name, bbox_inches='tight')
plt.close()
def plot_TF_res(log, tf_names, fig_name='res'):
mean = np.mean([x["param"] for x in log], axis=0)
std = np.std([x["param"] for x in log], axis=0)
fig, ax = plt.subplots(1, 1, figsize=(30, 8), sharey=True)
ax.bar(tf_names, mean, yerr=std)
#ax.bar(tf_names, log[-1]["param"])
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name, bbox_inches='tight')
plt.close()
def model_copy(src,dst, patch_copy=True, copy_grad=True):
#model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats)
dst.load_state_dict(src.state_dict()) #Do not copy gradient !
if patch_copy:
dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ?
dst['data_aug'].load_state_dict(src['data_aug'].state_dict())
#Copie des gradients
if copy_grad:
for paramName, paramValue, in src.named_parameters():
for netCopyName, netCopyValue, in dst.named_parameters():
if paramName == netCopyName:
netCopyValue.grad = paramValue.grad
#netCopyValue=copy.deepcopy(paramValue)
try: #Data_augV4
dst['data_aug']._input_info = src['data_aug']._input_info
dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix
except:
pass
def optim_copy(dopt, opt):
#inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state...
#opt_param=higher.optim.get_trainable_opt_params(diffopt)
for group_idx, group in enumerate(opt.param_groups):
# print('gp idx',group_idx)
for p_idx, p in enumerate(group['params']):
opt.state[p]=dopt.state[group_idx][p_idx]
class loss_monitor(): #Voir https://github.com/pytorch/ignite
def __init__(self, patience, end_train=1):
self.patience = patience
self.end_train = end_train
self.counter = 0
self.best_score = None
self.reached_limit = 0
def register(self, loss):
if self.best_score is None:
self.best_score = loss
elif loss > self.best_score:
self.counter += 1
#if not self.reached_limit:
print("loss no improve counter", self.counter, self.reached_limit)
else:
self.best_score = loss
self.counter = 0
def limit_reached(self):
if self.counter >= self.patience:
self.counter = 0
self.reached_limit +=1
self.best_score = None
return self.reached_limit
def end_training(self):
if self.limit_reached() >= self.end_train:
return True
else:
return False
def reset(self):
self.__init__(self.patience, self.end_train)

163
higher/smart_aug/test_brutus.py Executable file
View file

@ -0,0 +1,163 @@
from model import *
from dataug import *
#from utils import *
from train_utils import *
tf_names = [
## Geometric TF ##
'Identity',
'FlipUD',
'FlipLR',
'Rotate',
'TranslateX',
'TranslateY',
'ShearX',
'ShearY',
## Color TF (Expect image in the range of [0, 1]) ##
'Contrast',
'Color',
'Brightness',
'Sharpness',
'Posterize',
'Solarize', #=>Image entre [0,1] #Pas opti pour des batch
]
device = torch.device('cuda')
if device == torch.device('cpu'):
device_name = 'CPU'
else:
device_name = torch.cuda.get_device_name(device)
##########################################
if __name__ == "__main__":
n_inner_iter = 1
epochs = 150
dataug_epoch_start=0
optim_param={
'Meta':{
'optim':'Adam',
'lr':1e-2, #1e-2
},
'Inner':{
'optim': 'SGD',
'lr':1e-1, #1e-2
'momentum':0.9, #0.9
}
}
#model = LeNet(3,10)
#model = ResNet(num_classes=10)
#model = MobileNetV2(num_classes=10)
#model = WideResNet(num_classes=10, wrn_size=32)
tf_dict = {k: TF.TF_dict[k] for k in tf_names}
####
'''
t0 = time.process_time()
aug_model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
print("{} on {} for {} epochs - {} inner_it".format(str(aug_model), device_name, epochs, n_inner_iter))
log= run_dist_dataugV2(model=aug_model, epochs=epochs, inner_it=n_inner_iter, dataug_epoch_start=dataug_epoch_start, print_freq=10, KLdiv=True, loss_patience=None)
exec_time=time.process_time() - t0
####
times = [x["time"] for x in log]
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), "Device": device_name, "Param_names": aug_model.TF_names(), "Log": log}
filename = "{}-{} epochs (dataug:{})- {} in_it".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter)
with open("res/log/%s.json" % filename, "w+") as f:
json.dump(out, f, indent=True)
print('Log :\"',f.name, '\" saved !')
'''
####
'''
t0 = time.process_time()
aug_model = Augmented_model(Data_augV5(TF_dict=tf_dict, N_TF=3, mix_dist=0.0, fixed_prob=False, fixed_mag=False, shared_mag=False), model).to(device)
print("{} on {} for {} epochs - {} inner_it".format(str(aug_model), device_name, epochs, n_inner_iter))
log= run_dist_dataugV2(model=aug_model, epochs=epochs, inner_it=n_inner_iter, dataug_epoch_start=dataug_epoch_start, print_freq=10, KLdiv=True, loss_patience=None)
exec_time=time.process_time() - t0
####
times = [x["time"] for x in log]
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), "Device": device_name, "Param_names": aug_model.TF_names(), "Log": log}
filename = "{}-{} epochs (dataug:{})- {} in_it".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter)
with open("res/log/%s.json" % filename, "w+") as f:
json.dump(out, f, indent=True)
print('Log :\"',f.name, '\" saved !')
'''
res_folder="../res/brutus-tests2/"
epochs= 150
inner_its = [1]
dist_mix = [0.0, 0.5, 0.8, 1.0]
dataug_epoch_starts= [0]
tf_dict = {k: TF.TF_dict[k] for k in tf_names}
TF_nb = [len(tf_dict)] #range(10,len(TF.TF_dict)+1) #[len(TF.TF_dict)]
N_seq_TF= [4, 3, 2]
mag_setup = [(True,True), (False, False)] #(Fixed, Shared)
#prob_setup = [True, False]
nb_run= 3
try:
os.mkdir(res_folder)
os.mkdir(res_folder+"log/")
except FileExistsError:
pass
for n_inner_iter in inner_its:
for dataug_epoch_start in dataug_epoch_starts:
for n_tf in N_seq_TF:
for dist in dist_mix:
#for i in TF_nb:
for m_setup in mag_setup:
#for p_setup in prob_setup:
p_setup=False
for run in range(nb_run):
if (n_inner_iter == 0 and (m_setup!=(True,True) and p_setup!=True)) or (p_setup and dist!=0.0): continue #Autres setup inutiles sans meta-opti
#keys = list(TF.TF_dict.keys())[0:i]
#ntf_dict = {k: TF.TF_dict[k] for k in keys}
t0 = time.process_time()
model = ResNet(num_classes=10)
model = Higher_model(model) #run_dist_dataugV3
aug_model = Augmented_model(Data_augV5(TF_dict=tf_dict, N_TF=n_tf, mix_dist=dist, fixed_prob=p_setup, fixed_mag=m_setup[0], shared_mag=m_setup[1]), model).to(device)
#aug_model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
print("{} on {} for {} epochs - {} inner_it".format(str(aug_model), device_name, epochs, n_inner_iter))
log= run_dist_dataugV3(model=aug_model,
epochs=epochs,
inner_it=n_inner_iter,
dataug_epoch_start=dataug_epoch_start,
opt_param=optim_param,
print_freq=50,
KLdiv=True)
exec_time=time.process_time() - t0
####
print('-'*9)
times = [x["time"] for x in log]
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), 'Optimizer': optim_param, "Device": device_name, "Param_names": aug_model.TF_names(), "Log": log}
print(str(aug_model),": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
filename = "{}-{} epochs (dataug:{})- {} in_it-{}".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter, run)
with open("../res/log/%s.json" % filename, "w+") as f:
try:
json.dump(out, f, indent=True)
print('Log :\"',f.name, '\" saved !')
except:
print("Failed to save logs :",f.name)
try:
plot_resV2(log, fig_name="../res/"+filename, param_names=aug_model.TF_names())
except:
print("Failed to plot res")
print('Execution Time : %.00f '%(exec_time))
print('-'*9)
#'''

218
higher/smart_aug/test_dataug.py Executable file
View file

@ -0,0 +1,218 @@
""" Script to run experiment on smart augmentation.
"""
from model import *
from dataug import *
#from utils import *
from train_utils import *
# Use available TF (see transformations.py)
tf_names = [
## Geometric TF ##
'Identity',
'FlipUD',
'FlipLR',
'Rotate',
'TranslateX',
'TranslateY',
'ShearX',
'ShearY',
## Color TF (Expect image in the range of [0, 1]) ##
'Contrast',
'Color',
'Brightness',
'Sharpness',
'Posterize',
'Solarize', #=>Image entre [0,1] #Pas opti pour des batch
#Color TF (Common mag scale)
#'+Contrast',
#'+Color',
#'+Brightness',
#'+Sharpness',
#'-Contrast',
#'-Color',
#'-Brightness',
#'-Sharpness',
#'=Posterize',
#'=Solarize',
## Bad Tranformations ##
# Bad Geometric TF #
#'BShearX',
#'BShearY',
#'BTranslateX-',
#'BTranslateX-',
#'BTranslateY',
#'BTranslateY-',
#'BadContrast',
#'BadBrightness',
#'Random',
#'RandBlend'
#Non fonctionnel
#'Auto_Contrast', #Pas opti pour des batch (Super lent)
#'Equalize',
]
device = torch.device('cuda') #Select device to use
if device == torch.device('cpu'):
device_name = 'CPU'
else:
device_name = torch.cuda.get_device_name(device)
##########################################
if __name__ == "__main__":
#Task to perform
tasks={
#'classic',
#'aug_dataset', #Moved to old code
'aug_model'
}
#Parameters
n_inner_iter = 1
epochs = 200
dataug_epoch_start=0
optim_param={
'Meta':{
'optim':'Adam',
'lr':1e-2, #1e-2
},
'Inner':{
'optim': 'SGD',
'lr':1e-2, #1e-2
'momentum':0.9, #0.9
}
}
#Models
model = LeNet(3,10)
#model = ResNet(num_classes=10)
#Lents
#model = MobileNetV2(num_classes=10)
#model = WideResNet(num_classes=10, wrn_size=32)
#### Classic ####
if 'classic' in tasks:
t0 = time.process_time()
model = model.to(device)
print("{} on {} for {} epochs".format(str(model), device_name, epochs))
log= train_classic(model=model, opt_param=optim_param, epochs=epochs, print_freq=1)
#log= train_classic_higher(model=model, epochs=epochs)
exec_time=time.process_time() - t0
####
print('-'*9)
times = [x["time"] for x in log]
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), 'Optimizer': optim_param['Inner'], "Device": device_name, "Log": log}
print(str(model),": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
filename = "{}-{} epochs".format(str(model),epochs)
with open("../res/log/%s.json" % filename, "w+") as f:
json.dump(out, f, indent=True)
print('Log :\"',f.name, '\" saved !')
plot_res(log, fig_name="../res/"+filename)
print('Execution Time : %.00f '%(exec_time))
print('-'*9)
#### Augmented Dataset ####
'''
if 'aug_dataset' in tasks:
t0 = time.process_time()
#data_train_aug = AugmentedDataset("./data", train=True, download=download_data, transform=transform, subset=(0,int(len(data_train)/2)))
#data_train_aug.augement_data(aug_copy=30)
#print(data_train_aug)
#dl_train = torch.utils.data.DataLoader(data_train_aug, batch_size=BATCH_SIZE, shuffle=True)
#xs, ys = next(iter(dl_train))
#viz_sample_data(imgs=xs, labels=ys, fig_name='samples/data_sample_{}'.format(str(data_train_aug)))
#model = model.to(device)
#print("{} on {} for {} epochs".format(str(model), device_name, epochs))
#log= train_classic(model=model, epochs=epochs, print_freq=10)
##log= train_classic_higher(model=model, epochs=epochs)
data_train_aug = AugmentedDatasetV2("./data", train=True, download=download_data, transform=transform, subset=(0,int(len(data_train)/2)))
data_train_aug.augement_data(aug_copy=1)
print(data_train_aug)
unsup_ratio = 5
dl_unsup = torch.utils.data.DataLoader(data_train_aug, batch_size=BATCH_SIZE*unsup_ratio, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)
unsup_xs, sup_xs, ys = next(iter(dl_unsup))
viz_sample_data(imgs=sup_xs, labels=ys, fig_name='samples/data_sample_{}'.format(str(data_train_aug)))
viz_sample_data(imgs=unsup_xs, labels=ys, fig_name='samples/data_sample_{}_unsup'.format(str(data_train_aug)))
model = model.to(device)
print("{} on {} for {} epochs".format(str(model), device_name, epochs))
log= train_UDA(model=model, dl_unsup=dl_unsup, epochs=epochs, opt_param=optim_param, print_freq=10)
exec_time=time.process_time() - t0
####
print('-'*9)
times = [x["time"] for x in log]
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), 'Optimizer': optim_param['Inner'], "Device": device_name, "Param_names": data_train_aug._TF, "Log": log}
print(str(model),": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
filename = "{}-{}-{} epochs".format(str(data_train_aug),str(model),epochs)
with open("res/log/%s.json" % filename, "w+") as f:
json.dump(out, f, indent=True)
print('Log :\"',f.name, '\" saved !')
plot_res(log, fig_name="res/"+filename)
print('Execution Time : %.00f '%(exec_time))
print('-'*9)
'''
#### Augmented Model ####
if 'aug_model' in tasks:
t0 = time.process_time()
tf_dict = {k: TF.TF_dict[k] for k in tf_names}
model = Higher_model(model) #run_dist_dataugV3
aug_model = Augmented_model(Data_augV7(TF_dict=tf_dict, N_TF=3, mix_dist=0.8, fixed_prob=False, fixed_mag=False, shared_mag=False), model).to(device)
#aug_model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
print("{} on {} for {} epochs - {} inner_it".format(str(aug_model), device_name, epochs, n_inner_iter))
log= run_dist_dataugV3(model=aug_model,
epochs=epochs,
inner_it=n_inner_iter,
dataug_epoch_start=dataug_epoch_start,
opt_param=optim_param,
print_freq=20,
KLdiv=True,
hp_opt=False)
exec_time=time.process_time() - t0
####
print('-'*9)
times = [x["time"] for x in log]
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), 'Optimizer': optim_param, "Device": device_name, "Param_names": aug_model.TF_names(), "Log": log}
print(str(aug_model),": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
filename = "{}-{} epochs (dataug:{})- {} in_it".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter)
with open("res/log/%s.json" % filename, "w+") as f:
try:
json.dump(out, f, indent=True)
print('Log :\"',f.name, '\" saved !')
except:
print("Failed to save logs :",f.name)
try:
plot_resV2(log, fig_name="res/"+filename, param_names=aug_model.TF_names())
except:
print("Failed to plot res")
print('Execution Time : %.00f '%(exec_time))
print('-'*9)

336
higher/smart_aug/train_utils.py Executable file
View file

@ -0,0 +1,336 @@
""" Utilities function for training.
"""
import torch
#import torch.optim
import torchvision
import higher
from datasets import *
from utils import *
def test(model):
"""Evaluate a model on test data.
Args:
model (nn.Module): Model to test.
Returns:
(float, Tensor) Returns the accuracy and test loss of the model.
"""
device = next(model.parameters()).device
model.eval()
#for i, (features, labels) in enumerate(dl_test):
# features,labels = features.to(device), labels.to(device)
# pred = model.forward(features)
# return pred.argmax(dim=1).eq(labels).sum().item() / dl_test.batch_size * 100
correct = 0
total = 0
loss = []
with torch.no_grad():
for features, labels in dl_test:
features,labels = features.to(device), labels.to(device)
outputs = model(features)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
loss.append(F.cross_entropy(outputs, labels).item())
accuracy = 100 * correct / total
return accuracy, np.mean(loss)
def compute_vaLoss(model, dl_it, dl):
"""Evaluate a model on a batch of data.
Args:
model (nn.Module): Model to evaluate.
dl_it (Iterator): Data loader iterator.
dl (DataLoader): Data loader.
Returns:
(Tensor) Loss on a single batch of data.
"""
device = next(model.parameters()).device
try:
xs, ys = next(dl_it)
except StopIteration: #Fin epoch val
dl_it = iter(dl)
xs, ys = next(dl_it)
xs, ys = xs.to(device), ys.to(device)
model.eval() #Validation sans transfornations !
return F.cross_entropy(F.log_softmax(model(xs), dim=1), ys)
def train_classic(model, opt_param, epochs=1, print_freq=1):
"""Classic training of a model.
Args:
model (nn.Module): Model to train.
opt_param (dict): Dictionnary containing optimizers parameters.
epochs (int): Number of epochs to perform. (default: 1)
print_freq (int): Number of epoch between display of the state of training. If set to None, no display will be done. (default:1)
Returns:
(list) Logs of training. Each items is a dict containing results of an epoch.
"""
device = next(model.parameters()).device
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
optim = torch.optim.SGD(model.parameters(), lr=opt_param['Inner']['lr'], momentum=opt_param['Inner']['momentum']) #lr=1e-2 / momentum=0.9
model.train()
dl_val_it = iter(dl_val)
log = []
for epoch in range(epochs):
#print_torch_mem("Start epoch")
t0 = time.process_time()
for i, (features, labels) in enumerate(dl_train):
#print_torch_mem("Start iter")
features,labels = features.to(device), labels.to(device)
optim.zero_grad()
logits = model.forward(features)
pred = F.log_softmax(logits, dim=1)
loss = F.cross_entropy(pred,labels)
loss.backward()
optim.step()
#### Tests ####
tf = time.process_time()
val_loss = compute_vaLoss(model=model, dl_it=dl_val_it, dl=dl_val)
accuracy, _ =test(model)
model.train()
#### Print ####
if(print_freq and epoch%print_freq==0):
print('-'*9)
print('Epoch : %d/%d'%(epoch,epochs))
print('Time : %.00f'%(tf - t0))
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
print('Accuracy :', accuracy)
#### Log ####
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"param": None,
}
log.append(data)
return log
def run_dist_dataugV3(model, opt_param, epochs=1, inner_it=1, dataug_epoch_start=0, print_freq=1, KLdiv=1, hp_opt=False, save_sample_freq=None):
"""Training of an augmented model with higher.
This function is intended to be used with Augmented_model containing an Higher_model (see dataug.py).
Ex : Augmented_model(Data_augV5(...), Higher_model(model))
Training loss can either be computed directly from augmented inputs (KLdiv=0).
However, it is recommended to use the KLdiv loss computation, inspired from UDA, which combine original and augmented inputs to compute the loss (KLdiv>0).
See : https://github.com/google-research/uda
Args:
model (nn.Module): Augmented model to train.
opt_param (dict): Dictionnary containing optimizers parameters.
epochs (int): Number of epochs to perform. (default: 1)
inner_it (int): Number of inner iteration before a meta-step. 0 inner iteration means there's no meta-step. (default: 1)
dataug_epoch_start (int): Epoch when to start data augmentation. (default: 0)
print_freq (int): Number of epoch between display of the state of training. If set to None, no display will be done. (default:1)
KLdiv (float): Proportion of the KLdiv loss added to the supervised loss. If set to 0, the loss is classicly computed on augmented inputs. (default: 1)
hp_opt (bool): Wether to learn inner optimizer parameters. (default: False)
save_sample_freq (int): Number of epochs between saves of samples of data. If set to None, only one save would be done at the end of the training. (default: None)
Returns:
(list) Logs of training. Each items is a dict containing results of an epoch.
"""
device = next(model.parameters()).device
log = []
dl_val_it = iter(dl_val)
val_loss=None
high_grad_track = True
if inner_it == 0: #No HP optimization
high_grad_track=False
if dataug_epoch_start!=0: #Augmentation de donnee differee
model.augment(mode=False)
high_grad_track = False
## Optimizers ##
#Inner Opt
inner_opt = torch.optim.SGD(model['model']['original'].parameters(), lr=opt_param['Inner']['lr'], momentum=opt_param['Inner']['momentum']) #lr=1e-2 / momentum=0.9
diffopt = model['model'].get_diffopt(
inner_opt,
grad_callback=(lambda grads: clip_norm(grads, max_norm=10)),
track_higher_grads=high_grad_track)
#Meta Opt
hyper_param = list(model['data_aug'].parameters())
if hp_opt :
for param_group in diffopt.param_groups:
for param in list(opt_param['Inner'].keys())[1:]:
param_group[param]=torch.tensor(param_group[param]).to(device).requires_grad_()
hyper_param += [param_group[param]]
meta_opt = torch.optim.Adam(hyper_param, lr=opt_param['Meta']['lr']) #lr=1e-2
model.train()
meta_opt.zero_grad()
for epoch in range(1, epochs+1):
t0 = time.process_time()
for i, (xs, ys) in enumerate(dl_train):
xs, ys = xs.to(device), ys.to(device)
if(KLdiv<=0):
#Methode uniforme
logits = model(xs) # modified `params` can also be passed as a kwarg
loss = F.cross_entropy(F.log_softmax(logits, dim=1), ys, reduction='none') # no need to call loss.backwards()
if model._data_augmentation: #Weight loss
w_loss = model['data_aug'].loss_weight()#.to(device)
loss = loss * w_loss
loss = loss.mean()
else:
#Methode KL div
# Supervised loss (classic)
if model.is_augmenting() :
model.augment(mode=False)
sup_logits = model(xs)
model.augment(mode=True)
else:
sup_logits = model(xs)
log_sup=F.log_softmax(sup_logits, dim=1)
loss = F.cross_entropy(log_sup, ys)
# Unsupervised loss (KLdiv)
if model.is_augmenting() :
aug_logits = model(xs)
log_aug=F.log_softmax(aug_logits, dim=1)
aug_loss=0
w_loss = model['data_aug'].loss_weight() #Weight loss
#KL div w/ logits - Similarite predictions (distributions)
aug_loss = F.softmax(sup_logits, dim=1)*(log_sup-log_aug)
aug_loss = aug_loss.sum(dim=-1)
aug_loss = (w_loss * aug_loss).mean()
aug_loss += (F.cross_entropy(log_aug, ys , reduction='none') * w_loss).mean()
loss += aug_loss * KLdiv
#print_graph(loss) #to visualize computational graph
#t = time.process_time()
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
#print(len(model['model']['functional']._fast_params),"step", time.process_time()-t)
if(high_grad_track and i>0 and i%inner_it==0): #Perform Meta step
#print("meta")
val_loss = compute_vaLoss(model=model, dl_it=dl_val_it, dl=dl_val) + model['data_aug'].reg_loss()
#print_graph(val_loss) #to visualize computational graph
val_loss.backward()
torch.nn.utils.clip_grad_norm_(model['data_aug'].parameters(), max_norm=10, norm_type=2) #Prevent exploding grad with RNN
meta_opt.step()
#Adjust Hyper-parameters
model['data_aug'].adjust_param(soft=False) #Contrainte sum(proba)=1
if hp_opt:
for param_group in diffopt.param_groups:
for param in list(opt_param['Inner'].keys())[1:]:
param_group[param].data = param_group[param].data.clamp(min=1e-4)
#Reset gradients
diffopt.detach_()
model['model'].detach_()
meta_opt.zero_grad()
tf = time.process_time()
if (save_sample_freq and epoch%save_sample_freq==0): #Data sample saving
try:
viz_sample_data(imgs=xs, labels=ys, fig_name='../samples/data_sample_epoch{}_noTF'.format(epoch))
viz_sample_data(imgs=model['data_aug'](xs), labels=ys, fig_name='../samples/data_sample_epoch{}'.format(epoch))
except:
print("Couldn't save samples epoch"+epoch)
pass
if(not val_loss): #Compute val loss for logs
val_loss = compute_vaLoss(model=model, dl_it=dl_val_it, dl=dl_val)
# Test model
accuracy, test_loss =test(model)
model.train()
#### Log ####
param = [{'p': p.item(), 'm':model['data_aug']['mag'].item()} for p in model['data_aug']['prob']] if model['data_aug']._shared_mag else [{'p': p.item(), 'm': m.item()} for p, m in zip(model['data_aug']['prob'], model['data_aug']['mag'])]
data={
"epoch": epoch,
"train_loss": loss.item(),
"val_loss": val_loss.item(),
"acc": accuracy,
"time": tf - t0,
"mix_dist": model['data_aug']['mix_dist'].item(),
"param": param,
}
if hp_opt : data["opt_param"]=[{'lr': p_grp['lr'].item(), 'momentum': p_grp['momentum'].item()} for p_grp in diffopt.param_groups]
log.append(data)
#############
#### Print ####
if(print_freq and epoch%print_freq==0):
print('-'*9)
print('Epoch : %d/%d'%(epoch,epochs))
print('Time : %.00f'%(tf - t0))
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
print('Accuracy :', max([x["acc"] for x in log]))
print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
if not model['data_aug']._fixed_prob: print('TF Proba :', model['data_aug']['prob'].data)
#print('proba grad',model['data_aug']['prob'].grad)
if not model['data_aug']._fixed_mag: print('TF Mag :', model['data_aug']['mag'].data)
#print('Mag grad',model['data_aug']['mag'].grad)
if not model['data_aug']._fixed_mix: print('Mix:', model['data_aug']['mix_dist'].item())
#print('Reg loss:', model['data_aug'].reg_loss().item())
if hp_opt :
for param_group in diffopt.param_groups:
print('Opt param - lr:', param_group['lr'].item(),'- momentum:', param_group['momentum'].item())
#############
#Augmentation de donnee differee
if not model.is_augmenting() and (epoch == dataug_epoch_start):
print('Starting Data Augmention...')
dataug_epoch_start = epoch
model.augment(mode=True)
if inner_it != 0: #Rebuild diffopt if needed
high_grad_track = True
diffopt = model['model'].get_diffopt(
inner_opt,
grad_callback=(lambda grads: clip_norm(grads, max_norm=10)),
track_higher_grads=high_grad_track)
#Data sample saving
try:
viz_sample_data(imgs=xs, labels=ys, fig_name='../samples/data_sample_epoch{}_noTF'.format(epoch))
viz_sample_data(imgs=model['data_aug'](xs), labels=ys, fig_name='../samples/data_sample_epoch{}'.format(epoch))
except:
print("Couldn't save finals samples")
pass
return log

View file

@ -0,0 +1,467 @@
""" PyTorch implementation of some PIL image transformations.
Those implementation are thinked to take advantages of batched computation of PyTorch on GPU.
Based on Kornia library.
See: https://github.com/kornia/kornia
And PIL.
See:
https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py
https://github.com/python-pillow/Pillow/blob/9c78c3f97291bd681bc8637922d6a2fa9415916c/src/PIL/Image.py#L2818
Inspired from AutoAugment.
See: https://github.com/tensorflow/models/blob/fc2056bce6ab17eabdc139061fef8f4f2ee763ec/research/autoaugment/augmentation_transforms.py
"""
import torch
import kornia
import random
TF_no_mag={'Identity', 'FlipUD', 'FlipLR', 'Random', 'RandBlend'} #TF that don't have use for magnitude parameter.
TF_no_grad={'Solarize', 'Posterize', '=Solarize', '=Posterize'} #TF which implemetation doesn't allow gradient propagaition.
TF_ignore_mag= TF_no_mag | TF_no_grad #TF for which magnitude should be ignored (Magnitude fixed).
PARAMETER_MAX = 1 # What is the max 'level' a transform could be predicted
PARAMETER_MIN = 0.1 # What is the min 'level' a transform could be predicted
### Available TF for Dataug ###
# Dictionnary mapping tranformations identifiers to their function.
# Each value of the dict should be a lambda function taking a (batch of data, magnitude of transformations) tuple as input and returns a batch of data.
TF_dict={ #Dataugv5+
## Geometric TF ##
'Identity' : (lambda x, mag: x),
'FlipUD' : (lambda x, mag: flipUD(x)),
'FlipLR' : (lambda x, mag: flipLR(x)),
'Rotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30))),
'TranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=0))),
'TranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=1))),
'ShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=0))),
'ShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=1))),
## Color TF (Expect image in the range of [0, 1]) ##
'Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
'Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
'Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
'Posterize': (lambda x, mag: posterize(x, bits=rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
'Solarize': (lambda x, mag: solarize(x, thresholds=rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
#Color TF (Common mag scale)
'+Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
'+Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
'+Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
'+Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
'-Contrast': (lambda x, mag: contrast(x, contrast_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
'-Color':(lambda x, mag: color(x, color_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
'-Brightness':(lambda x, mag: brightness(x, brightness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
'-Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
'=Posterize': (lambda x, mag: posterize(x, bits=invScale_rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
'=Solarize': (lambda x, mag: solarize(x, thresholds=invScale_rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
## Bad Tranformations ##
# Bad Geometric TF #
'BShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=0.3*3, maxval=0.3*4), zero_pos=0))),
'BShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=0.3*3, maxval=0.3*4), zero_pos=1))),
'BTranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=25, maxval=30), zero_pos=0))),
'BTranslateX-': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-25, maxval=-30), zero_pos=0))),
'BTranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=25, maxval=30), zero_pos=1))),
'BTranslateY-': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-25, maxval=-30), zero_pos=1))),
# Bad Color TF #
'BadContrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9*2, maxval=2*4))),
'BadBrightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*3))),
# Random TF #
'Random':(lambda x, mag: torch.rand_like(x)),
'RandBlend': (lambda x, mag: blend(x,torch.rand_like(x), alpha=torch.tensor(0.7,device=mag.device).expand(x.shape[0]))),
#Not ready for use
#'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
#'Equalize': (lambda mag: None),
}
## Image type cast ##
def int_image(float_image):
"""Convert a float Tensor/Image to an int Tensor/Image.
Be warry that this transformation isn't bijective, each conversion will result in small loss of information.
Granularity: 1/256 = 0.0039.
This will also result in the loss of the gradient associated to input as gradient cannot be tracked on int Tensor.
Args:
float_image (FloatTensor): Image tensor.
Returns:
(ByteTensor) Converted tensor.
"""
return (float_image*255.).type(torch.uint8)
def float_image(int_image):
"""Convert a int Tensor/Image to an float Tensor/Image.
Args:
int_image (ByteTensor): Image tensor.
Returns:
(FloatTensor) Converted tensor.
"""
return int_image.type(torch.float)/255.
## Parameters utils ##
def rand_floats(size, mag, maxval, minval=None):
"""Generate a batch of random values.
Args:
size (int): Number of value to generate.
mag (float): Level of the operation that will be between [PARAMETER_MIN, PARAMETER_MAX].
maxval (float): Maximum value that can be generated. This will be scaled to mag/PARAMETER_MAX.
minval (float): Minimum value that can be generated. (default: -maxval)
Returns:
(Tensor) Generated batch of float values between [minval, maxval].
"""
real_mag = float_parameter(mag, maxval=maxval)
if not minval : minval = -real_mag
#return random.uniform(minval, real_max)
return minval + (real_mag-minval) * torch.rand(size, device=mag.device) #[min_val, real_mag]
def invScale_rand_floats(size, mag, maxval, minval):
"""Generate a batch of random values.
Similar to rand_floats() except that the mag is used in an inversed scale.
Mag:[0,PARAMETER_MAX] => [PARAMETER_MAX, 0]
Args:
size (int): Number of value to generate.
mag (float): Level of the operation that will be between [PARAMETER_MIN, PARAMETER_MAX].
maxval (float): Maximum value that can be generated. This will be scaled to mag/PARAMETER_MAX.
minval (float): Minimum value that can be generated. (default: -maxval)
Returns:
(Tensor) Generated batch of float values between [minval, maxval].
"""
real_mag = float_parameter(float(PARAMETER_MAX) - mag, maxval=maxval-minval)+minval
return real_mag + (maxval-real_mag) * torch.rand(size, device=mag.device) #[real_mag, max_val]
def zero_stack(tensor, zero_pos):
"""Add a row of zeros to a Tensor.
This function is intended to be used with single row Tensor, thus returning a 2 dimension Tensor.
Args:
tensor (Tensor): Tensor to be stacked with zeros.
zero_pos (int): Wheter the zeros should be added before or after the Tensor. Either 0 or 1.
Returns:
Stacked Tensor.
"""
if zero_pos==0:
return torch.stack((tensor, torch.zeros((tensor.shape[0],), device=tensor.device)), dim=1)
if zero_pos==1:
return torch.stack((torch.zeros((tensor.shape[0],), device=tensor.device), tensor), dim=1)
else:
raise Exception("Invalid zero_pos : ", zero_pos)
def float_parameter(level, maxval):
"""Scale level between 0 and maxval.
Args:
level (float): Level of the operation that will be between [PARAMETER_MIN, PARAMETER_MAX].
maxval: Maximum value that the operation can have. This will be scaled to level/PARAMETER_MAX.
Returns:
A float that results from scaling `maxval` according to `level`.
"""
#return float(level) * maxval / PARAMETER_MAX
return (level * maxval / PARAMETER_MAX)#.to(torch.float)
## Tranformations ##
def flipLR(x):
"""Flip horizontaly/Left-Right images.
Args:
x (Tensor): Batch of images.
Returns:
(Tensor): Batch of fliped images.
"""
device = x.device
(batch_size, channels, h, w) = x.shape
M =torch.tensor( [[[-1., 0., w-1],
[ 0., 1., 0.],
[ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
# warp the original image by the found transform
return kornia.warp_perspective(x, M, dsize=(h, w))
def flipUD(x):
"""Flip vertically/Up-Down images.
Args:
x (Tensor): Batch of images.
Returns:
(Tensor): Batch of fliped images.
"""
device = x.device
(batch_size, channels, h, w) = x.shape
M =torch.tensor( [[[ 1., 0., 0.],
[ 0., -1., h-1],
[ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
# warp the original image by the found transform
return kornia.warp_perspective(x, M, dsize=(h, w))
def rotate(x, angle):
"""Rotate images.
Args:
x (Tensor): Batch of images.
angle (Tensor): Angles (degrees) of rotation for each images.
Returns:
(Tensor): Batch of rotated images.
"""
return kornia.rotate(x, angle=angle.type(torch.float)) #Kornia ne supporte pas les int
def translate(x, translation):
"""Translate images.
Args:
x (Tensor): Batch of images.
translation (Tensor): Distance (pixels) of translation for each images.
Returns:
(Tensor): Batch of translated images.
"""
return kornia.translate(x, translation=translation.type(torch.float)) #Kornia ne supporte pas les int
def shear(x, shear):
"""Shear images.
Args:
x (Tensor): Batch of images.
shear (Tensor): Angle of shear for each images.
Returns:
(Tensor): Batch of skewed images.
"""
return kornia.shear(x, shear=shear)
def contrast(x, contrast_factor):
"""Adjust contast of images.
Args:
x (FloatTensor): Batch of images.
contrast_factor (FloatTensor): Contrast adjust factor per element in the batch.
0 generates a compleatly black image, 1 does not modify the input image while any other non-negative number modify the brightness by this factor.
Returns:
(Tensor): Batch of adjusted images.
"""
return kornia.adjust_contrast(x, contrast_factor=contrast_factor) #Expect image in the range of [0, 1]
def color(x, color_factor):
"""Adjust color of images.
Args:
x (Tensor): Batch of images.
color_factor (Tensor): Color factor for each images.
0.0 gives a black and white image. A factor of 1.0 gives the original image.
Returns:
(Tensor): Batch of adjusted images.
"""
(batch_size, channels, h, w) = x.shape
gray_x = kornia.rgb_to_grayscale(x)
gray_x = gray_x.repeat_interleave(channels, dim=1)
return blend(gray_x, x, color_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
def brightness(x, brightness_factor):
"""Adjust brightness of images.
Args:
x (Tensor): Batch of images.
brightness_factor (Tensor): Brightness factor for each images.
0.0 gives a black image. A factor of 1.0 gives the original image.
Returns:
(Tensor): Batch of adjusted images.
"""
device = x.device
return blend(torch.zeros(x.size(), device=device), x, brightness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
def sharpeness(x, sharpness_factor):
"""Adjust sharpness of images.
Args:
x (Tensor): Batch of images.
sharpness_factor (Tensor): Sharpness factor for each images.
0.0 gives a black image. A factor of 1.0 gives the original image.
Returns:
(Tensor): Batch of adjusted images.
"""
device = x.device
(batch_size, channels, h, w) = x.shape
k = torch.tensor([[[ 1., 1., 1.],
[ 1., 5., 1.],
[ 1., 1., 1.]]], device=device) #Smooth Filter : https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageFilter.py
smooth_x = kornia.filter2D(x, kernel=k, border_type='reflect', normalized=True) #Peut etre necessaire de s'occuper du channel Alhpa differement
return blend(smooth_x, x, sharpness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
def posterize(x, bits):
"""Reduce the number of bits for each color channel.
Be warry that the cast to integers block the gradient propagation.
Args:
x (Tensor): Batch of images.
bits (Tensor): The number of bits to keep for each channel (1-8).
Returns:
(Tensor): Batch of posterized images.
"""
bits = bits.type(torch.uint8) #Perte du gradient
x = int_image(x) #Expect image in the range of [0, 1]
mask = ~(2 ** (8 - bits) - 1).type(torch.uint8)
(batch_size, channels, h, w) = x.shape
mask = mask.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
return float_image(x & mask)
def solarize(x, thresholds):
"""Invert all pixel values above a threshold.
Be warry that the use of the inequality (x>tresholds) block the gradient propagation.
Args:
x (Tensor): Batch of images.
thresholds (Tensor): All pixels above this level are inverted
Returns:
(Tensor): Batch of solarized images.
"""
batch_size, channels, h, w = x.shape
#imgs=[]
#for idx, t in enumerate(thresholds): #Operation par image
# mask = x[idx] > t #Perte du gradient
#In place
# inv_x = 1-x[idx][mask]
# x[idx][mask]=inv_x
#
#Out of place
# im = x[idx]
# inv_x = 1-im[mask]
# imgs.append(im.masked_scatter(mask,inv_x))
#idxs=torch.tensor(range(x.shape[0]), device=x.device)
#idxs=idxs.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
#x=x.scatter(dim=0, index=idxs, src=torch.stack(imgs))
#
thresholds = thresholds.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
x=torch.where(x>thresholds,1-x, x)
#x=x.min(thresholds)
#inv_x = 1-x[mask]
#x=x.where(x<thresholds,1-x)
#x[mask]=inv_x
#x=x.masked_scatter(mask, inv_x)
return x
def blend(x,y,alpha):
"""Creates a new images by interpolating between two input images, using a constant alpha.
x and y should have the same size.
alpha should have the same batch size as the images.
Apply batch wise :
out = image1 * (1.0 - alpha) + image2 * alpha
Args:
x (Tensor): Batch of images.
y (Tensor): Batch of images.
alpha (Tensor): The interpolation alpha factor for each images.
Returns:
(Tensor): Batch of solarized images.
"""
#return kornia.add_weighted(src1=x, alpha=(1-alpha), src2=y, beta=alpha, gamma=0) #out=src1alpha+src2beta+gamma #Ne fonctionne pas pour des batch de alpha
if not isinstance(x, torch.Tensor):
raise TypeError("x should be a tensor. Got {}".format(type(x)))
if not isinstance(y, torch.Tensor):
raise TypeError("y should be a tensor. Got {}".format(type(y)))
assert(x.shape==y.shape and x.shape[0]==alpha.shape[0])
(batch_size, channels, h, w) = x.shape
alpha = alpha.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
res = x*(1-alpha) + y*alpha
return res
#Not working
def auto_contrast(x): #PAS OPTIMISE POUR DES BATCH #EXTRA LENT
# Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
print("Warning : Pas encore check !")
(batch_size, channels, h, w) = x.shape
x = int_image(x) #Expect image in the range of [0, 1]
#print('Start',x[0])
for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
#print(img.shape)
for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
#print(chan.shape)
hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
# find lowest/highest samples after preprocessing
for lo in range(256):
if hist[lo]:
break
for hi in range(255, -1, -1):
if hist[hi]:
break
if hi <= lo:
# don't bother
pass
else:
scale = 255.0 / (hi - lo)
offset = -lo * scale
for ix in range(256):
n_ix = int(ix * scale + offset)
if n_ix < 0: n_ix = 0
elif n_ix > 255: n_ix = 255
chan[chan==ix]=n_ix
x[im_idx, chan_idx]=chan
#print('End',x[0])
return float_image(x)
def equalize(x): #PAS OPTIMISE POUR DES BATCH
raise Exception(self, "not implemented")
# Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
(batch_size, channels, h, w) = x.shape
x = int_image(x) #Expect image in the range of [0, 1]
#print('Start',x[0])
for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
#print(img.shape)
for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
#print(chan.shape)
hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
return float_image(x)

236
higher/smart_aug/utils.py Executable file
View file

@ -0,0 +1,236 @@
""" Utilties function.
"""
import numpy as np
import json, math, time, os
import matplotlib.pyplot as plt
import copy
import gc
from torchviz import make_dot
import torch
import torch.nn.functional as F
import time
def print_graph(PyTorch_obj, fig_name='graph'):
"""Save the computational graph.
Args:
PyTorch_obj (Tensor): End of the graph. Commonly, the loss tensor to get the whole graph.
fig_name (string): Relative path where to save the graph. (default: graph)
"""
graph=make_dot(PyTorch_obj)
graph.format = 'pdf' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
graph.render(fig_name)
def plot_resV2(log, fig_name='res', param_names=None):
"""Save a visual graph of the logs.
Args:
log (dict): Logs of the training generated by most of train_utils.
fig_name (string): Relative path where to save the graph. (default: res)
param_names (list): Labels for the parameters. (default: None)
"""
epochs = [x["epoch"] for x in log]
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(30, 15))
ax[0, 0].set_title('Loss')
ax[0, 0].plot(epochs,[x["train_loss"] for x in log], label='Train')
ax[0, 0].plot(epochs,[x["val_loss"] for x in log], label='Val')
ax[0, 0].legend()
ax[1, 0].set_title('Acc')
ax[1, 0].plot(epochs,[x["acc"] for x in log])
if log[0]["param"]!= None:
if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])]
#proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])]
proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])]
mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])]
ax[0, 1].set_title('Prob =f(epoch)')
ax[0, 1].stackplot(epochs, proba, labels=param_names)
#ax[0, 1].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
ax[1, 1].set_title('Prob =f(TF)')
mean = np.mean(proba, axis=1)
std = np.std(proba, axis=1)
ax[1, 1].bar(param_names, mean, yerr=std)
plt.sca(ax[1, 1]), plt.xticks(rotation=90)
ax[0, 2].set_title('Mag =f(epoch)')
ax[0, 2].stackplot(epochs, mag, labels=param_names)
#ax[0, 2].plot(epochs, np.array(mag).T, label=param_names)
ax[0, 2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
ax[1, 2].set_title('Mag =f(TF)')
mean = np.mean(mag, axis=1)
std = np.std(mag, axis=1)
ax[1, 2].bar(param_names, mean, yerr=std)
plt.sca(ax[1, 2]), plt.xticks(rotation=90)
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name, bbox_inches='tight')
plt.close()
def plot_compare(filenames, fig_name='res'):
"""Save a visual graph comparing trainings stats.
Args:
filenames (list[Strings]): Relative paths to the logs (JSON files).
fig_name (string): Relative path where to save the graph. (default: res)
"""
all_data=[]
legend=""
for idx, file in enumerate(filenames):
legend+=str(idx)+'-'+file+'\n'
with open(file) as json_file:
data = json.load(json_file)
all_data.append(data)
fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
for data_idx, log in enumerate(all_data):
log=log['Log']
epochs = [x["epoch"] for x in log]
ax[0].plot(epochs,[x["train_loss"] for x in log], label=str(data_idx)+'-Train')
ax[0].plot(epochs,[x["val_loss"] for x in log], label=str(data_idx)+'-Val')
ax[1].plot(epochs,[x["acc"] for x in log], label=str(data_idx))
#ax[1].text(x=0.5,y=0,s=str(data_idx)+'-'+filenames[data_idx], transform=ax[1].transAxes)
if log[0]["param"]!= None:
if isinstance(log[0]["param"],float):
ax[2].plot(epochs,[x["param"] for x in log], label=str(data_idx)+'-Mag')
else :
for idx, _ in enumerate(log[0]["param"]):
ax[2].plot(epochs,[x["param"][idx] for x in log], label=str(data_idx)+'-P'+str(idx))
fig.suptitle(legend)
ax[0].set_title('Loss')
ax[1].set_title('Acc')
ax[2].set_title('Param')
for a in ax: a.legend()
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name, bbox_inches='tight')
plt.close()
def viz_sample_data(imgs, labels, fig_name='data_sample', weight_labels=None):
"""Save data samples.
Args:
imgs (Tensor): Batch of image to sample from. Intended to contain at least 25 images.
labels (Tensor): Labels of the images.
fig_name (string): Relative path where to save the graph. (default: data_sample)
weight_labels (Tensor): Weights associated to each labels. (default: None)
"""
sample = imgs[0:25,].permute(0, 2, 3, 1).squeeze().cpu()
plt.figure(figsize=(10,10))
for i in range(25):
plt.subplot(5,5,i+1) #Trop de figure cree ?
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(sample[i,].detach().numpy(), cmap=plt.cm.binary)
label = str(labels[i].item())
if weight_labels is not None : label+= (" - p %.2f" % weight_labels[i].item())
plt.xlabel(label)
plt.savefig(fig_name)
print("Sample saved :", fig_name)
plt.close('all')
def print_torch_mem(add_info=''):
"""Print informations on PyTorch memory usage.
Args:
add_info (string): Prefix added before the print. (default: None)
"""
nb=0
max_size=0
for obj in gc.get_objects():
#print(type(obj))
try:
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
#print(i, type(obj), obj.size())
size = np.sum(obj.size())
if(size>max_size): max_size=size
nb+=1
except:
pass
print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
#print(add_info, "-Garbage size :",len(gc.garbage))
"""Simple GPU memory report."""
mega_bytes = 1024.0 * 1024.0
string = add_info + ' memory (MB)'
string += ' | allocated: {}'.format(
torch.cuda.memory_allocated() / mega_bytes)
string += ' | max allocated: {}'.format(
torch.cuda.max_memory_allocated() / mega_bytes)
string += ' | cached: {}'.format(torch.cuda.memory_cached() / mega_bytes)
string += ' | max cached: {}'.format(
torch.cuda.max_memory_cached()/ mega_bytes)
print(string)
'''
def plot_TF_influence(log, fig_name='TF_influence', param_names=None):
proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])]
mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])]
plt.figure()
mean = np.mean(proba, axis=1)*np.mean(mag, axis=1) #Pourrait etre interessant de multiplier avant le mean
std = np.std(proba, axis=1)*np.std(mag, axis=1)
plt.bar(param_names, mean, yerr=std)
plt.xticks(rotation=90)
fig_name = fig_name.replace('.',',')
plt.savefig(fig_name, bbox_inches='tight')
plt.close()
'''
from torch._six import inf
def clip_norm(tensors, max_norm, norm_type=2):
"""Clips norm of passed tensors.
The norm is computed over all tensors together, as if they were
concatenated into a single vector. Clipped tensors are returned.
See: https://github.com/facebookresearch/higher/issues/18
Args:
tensors (Iterable[Tensor]): an iterable of Tensors or a
single Tensor to be normalized.
max_norm (float or int): max norm of the gradients
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for
infinity norm.
Returns:
Clipped (List[Tensor]) tensors.
"""
if isinstance(tensors, torch.Tensor):
tensors = [tensors]
tensors = list(tensors)
max_norm = float(max_norm)
norm_type = float(norm_type)
if norm_type == inf:
total_norm = max(t.abs().max() for t in tensors)
else:
total_norm = 0
for t in tensors:
param_norm = t.norm(norm_type)
total_norm += param_norm.item() ** norm_type
total_norm = total_norm ** (1. / norm_type)
clip_coef = max_norm / (total_norm + 1e-6)
if clip_coef >= 1:
return tensors
return [t.mul(clip_coef) for t in tensors]