mirror of
https://github.com/AntoineHX/smart_augmentation.git
synced 2025-05-04 12:10:45 +02:00
Rangement
This commit is contained in:
parent
f83c73ec17
commit
f507ff4741
16 changed files with 85 additions and 46 deletions
95
higher/smart_aug/compare_res.py
Executable file
95
higher/smart_aug/compare_res.py
Executable file
|
@ -0,0 +1,95 @@
|
|||
from utils import *
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
#'''
|
||||
files=[
|
||||
"res/log/Aug_mod(Data_augV5(Mix0.8-23TFx4-Mag)-LeNet)-100 epochs (dataug:0)- 1 in_it.json",
|
||||
#"res/brutus-tests/log/Aug_mod(Data_augV5(Uniform-14TFx3-MagFxSh)-LeNet)-150epochs(dataug:0)-10in_it-0.json",
|
||||
#"res/brutus-tests/log/Aug_mod(Data_augV5(Uniform-14TFx3-MagFxSh)-LeNet)-150epochs(dataug:0)-10in_it-1.json",
|
||||
#"res/brutus-tests/log/Aug_mod(Data_augV5(Uniform-14TFx3-MagFxSh)-LeNet)-150epochs(dataug:0)-10in_it-2.json",
|
||||
#"res/log/Aug_mod(RandAugUDA(18TFx2-Mag1)-LeNet)-100 epochs (dataug:0)- 0 in_it.json",
|
||||
]
|
||||
|
||||
for idx, file in enumerate(files):
|
||||
#legend+=str(idx)+'-'+file+'\n'
|
||||
with open(file) as json_file:
|
||||
data = json.load(json_file)
|
||||
plot_resV2(data['Log'], fig_name=file.replace('.json','').replace('log/',''), param_names=data['Param_names'])
|
||||
#plot_TF_influence(data['Log'], param_names=data['Param_names'])
|
||||
#'''
|
||||
## Loss , Acc, Proba = f(epoch) ##
|
||||
#plot_compare(filenames=files, fig_name="res/compare")
|
||||
|
||||
'''
|
||||
## Acc, Time, Epochs = f(n_tf) ##
|
||||
#fig_name="res/TF_nb_tests_compare"
|
||||
fig_name="res/TF_seq_tests_compare"
|
||||
inner_its = [0, 10]
|
||||
dataug_epoch_starts= [0]
|
||||
TF_nb = 14#[len(TF.TF_dict)] #range(10,len(TF.TF_dict)+1) #[len(TF.TF_dict)]
|
||||
N_seq_TF= [1, 2, 3, 4, 6] #[1]
|
||||
|
||||
fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
|
||||
for in_it in inner_its:
|
||||
for dataug in dataug_epoch_starts:
|
||||
|
||||
#n_tf = TF_nb
|
||||
#filenames =["res/TF_nb_tests/log/Aug_mod(Data_augV4(Uniform-{} TF)-LeNet)-200 epochs (dataug:{})- {} in_it.json".format(n_tf, dataug, in_it) for n_tf in TF_nb]
|
||||
#filenames =["res/TF_nb_tests/log/Aug_mod(Data_augV4(Uniform-{} TF x {})-LeNet)-200 epochs (dataug:{})- {} in_it.json".format(n_tf, 1, dataug, in_it) for n_tf in TF_nb]
|
||||
|
||||
n_tf = N_seq_TF
|
||||
#filenames =["res/TF_nb_tests/log/Aug_mod(Data_augV4(Uniform-{} TF x {})-LeNet)-200 epochs (dataug:{})- {} in_it.json".format(TF_nb, n_tf, dataug, in_it) for n_tf in N_seq_TF]
|
||||
filenames =["res/TF_nb_tests/log/Aug_mod(Data_augV4(Uniform-{} TF x {})-LeNet)-200 epochs (dataug:{})- {} in_it.json".format(TF_nb, n_tf, dataug, in_it) for n_tf in N_seq_TF]
|
||||
|
||||
|
||||
all_data=[]
|
||||
#legend=""
|
||||
for idx, file in enumerate(filenames):
|
||||
#legend+=str(idx)+'-'+file+'\n'
|
||||
with open(file) as json_file:
|
||||
data = json.load(json_file)
|
||||
all_data.append(data)
|
||||
|
||||
acc = [x["Accuracy"] for x in all_data]
|
||||
epochs = [len(x["Log"]) for x in all_data]
|
||||
time = [x["Time"][0] for x in all_data]
|
||||
#for i in range(len(time)): time[i] *= epochs[i] #Estimation temps total
|
||||
|
||||
ax[0].plot(n_tf, acc, label="{} in_it/{} dataug".format(in_it,dataug))
|
||||
ax[1].plot(n_tf, time, label="{} in_it/{} dataug".format(in_it,dataug))
|
||||
ax[2].plot(n_tf, epochs, label="{} in_it/{} dataug".format(in_it,dataug))
|
||||
|
||||
|
||||
#for data in all_data:
|
||||
#print(np.mean([x["param"] for x in data["Log"]], axis=0))
|
||||
#print(len(data["Param_names"]), np.argsort(np.argsort(np.mean([x["param"] for x in data["Log"]], axis=0))))
|
||||
|
||||
|
||||
ax[0].set_title('Acc')
|
||||
ax[1].set_title('Time')
|
||||
ax[2].set_title('Epochs')
|
||||
for a in ax: a.legend()
|
||||
|
||||
fig_name = fig_name.replace('.',',')
|
||||
plt.savefig(fig_name, bbox_inches='tight')
|
||||
plt.close()
|
||||
'''
|
||||
|
||||
#Res print
|
||||
'''
|
||||
nb_run=3
|
||||
accs = []
|
||||
times = []
|
||||
files = ["res/brutus-tests/log/Aug_mod(Data_augV5(Uniform-14TFx3-Mag)-LeNet)-150epochs(dataug:0)-1in_it-%s.json"%str(run) for run in range(nb_run)]
|
||||
|
||||
for idx, file in enumerate(files):
|
||||
#legend+=str(idx)+'-'+file+'\n'
|
||||
with open(file) as json_file:
|
||||
data = json.load(json_file)
|
||||
accs.append(data['Accuracy'])
|
||||
times.append(data['Time'][0])
|
||||
print(idx, data['Accuracy'])
|
||||
|
||||
print(files[0], np.mean(accs), np.std(accs), np.mean(times))
|
||||
'''
|
51
higher/smart_aug/datasets.py
Executable file
51
higher/smart_aug/datasets.py
Executable file
|
@ -0,0 +1,51 @@
|
|||
""" Dataset definition.
|
||||
|
||||
MNIST / CIFAR10
|
||||
"""
|
||||
import torch
|
||||
from torch.utils.data import SubsetRandomSampler
|
||||
import torchvision
|
||||
|
||||
BATCH_SIZE = 300
|
||||
TEST_SIZE = 300
|
||||
#TEST_SIZE = 10000 #legerement +Rapide / + Consomation memoire !
|
||||
|
||||
download_data=False
|
||||
num_workers=2 #4
|
||||
pin_memory=False #True :+ GPU memory / + Lent
|
||||
|
||||
#ATTENTION : Dataug (Kornia) Expect image in the range of [0, 1]
|
||||
#transform_train = torchvision.transforms.Compose([
|
||||
# torchvision.transforms.RandomHorizontalFlip(),
|
||||
# torchvision.transforms.ToTensor(),
|
||||
# torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #CIFAR10
|
||||
#])
|
||||
transform = torchvision.transforms.Compose([
|
||||
torchvision.transforms.ToTensor(),
|
||||
#torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #CIFAR10
|
||||
])
|
||||
|
||||
#data_train = torchvision.datasets.MNIST(
|
||||
# "./data", train=True, download=True,
|
||||
# transform=torchvision.transforms.Compose([
|
||||
# #torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
||||
# torchvision.transforms.ToTensor()
|
||||
# ])
|
||||
#)
|
||||
#data_test = torchvision.datasets.MNIST(
|
||||
# "./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
||||
#)
|
||||
|
||||
### Classic Dataset ###
|
||||
data_train = torchvision.datasets.CIFAR10("../data", train=True, download=download_data, transform=transform)
|
||||
#data_val = torchvision.datasets.CIFAR10("../data", train=True, download=download_data, transform=transform)
|
||||
data_test = torchvision.datasets.CIFAR10("../data", train=False, download=download_data, transform=transform)
|
||||
|
||||
train_subset_indices=range(int(len(data_train)/2))
|
||||
val_subset_indices=range(int(len(data_train)/2),len(data_train))
|
||||
#train_subset_indices=range(BATCH_SIZE*10)
|
||||
#val_subset_indices=range(BATCH_SIZE*10, BATCH_SIZE*20)
|
||||
|
||||
dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices), num_workers=num_workers, pin_memory=pin_memory)
|
||||
dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices), num_workers=num_workers, pin_memory=pin_memory)
|
||||
dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)
|
985
higher/smart_aug/dataug.py
Executable file
985
higher/smart_aug/dataug.py
Executable file
|
@ -0,0 +1,985 @@
|
|||
""" Data augmentation modules.
|
||||
|
||||
Features a custom implementaiton of RandAugment (RandAug), as well as a data augmentation modules allowing gradient propagation.
|
||||
|
||||
Typical usage:
|
||||
|
||||
aug_model = Augmented_model(Data_AugV5, model)
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.distributions import *
|
||||
|
||||
#import kornia
|
||||
#import random
|
||||
import numpy as np
|
||||
import copy
|
||||
|
||||
import transformations as TF
|
||||
|
||||
|
||||
class Data_augV5(nn.Module): #Optimisation jointe (mag, proba)
|
||||
"""Data augmentation module with learnable parameters.
|
||||
|
||||
Applies transformations (TF) to batch of data.
|
||||
Each TF is defined by a (name, probability of application, magnitude of distorsion) tuple which can be learned. For the full definiton of the TF, see transformations.py.
|
||||
The TF probabilities defines a distribution from which we sample the TF applied.
|
||||
|
||||
Be warry, that the order of sequential application of TF is not taken into account. See Data_augV7.
|
||||
|
||||
Attributes:
|
||||
_data_augmentation (bool): Wether TF will be applied during forward pass.
|
||||
_TF_dict (dict) : A dictionnary containing the data transformations (TF) to be applied.
|
||||
_TF (list) : List of TF names.
|
||||
_nb_tf (int) : Number of TF used.
|
||||
_N_seqTF (int) : Number of TF to be applied sequentially to each inputs
|
||||
_shared_mag (bool) : Wether to share a single magnitude parameters for all TF.
|
||||
_fixed_mag (bool): Wether to lock the TF magnitudes.
|
||||
_fixed_prob (bool): Wether to lock the TF probabilies.
|
||||
_samples (list): Sampled TF index during last forward pass.
|
||||
_mix_dist (bool): Wether we use a mix of an uniform distribution and the real distribution (TF probabilites). If False, only a uniform distribution is used.
|
||||
_fixed_mix (bool): Wether we lock the mix distribution factor.
|
||||
_params (nn.ParameterDict): Learnable parameters.
|
||||
_reg_tgt (Tensor): Target for the magnitude regularisation. Only used when _fixed_mag is set to false (ie. we learn the magnitudes).
|
||||
_reg_mask (list): Mask selecting the TF considered for the regularisation.
|
||||
"""
|
||||
def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mix_dist=0.0, fixed_prob=False, fixed_mag=True, shared_mag=True):
|
||||
"""Init Data_augv5.
|
||||
|
||||
Args:
|
||||
TF_dict (dict): A dictionnary containing the data transformations (TF) to be applied. (default: use all available TF from transformations.py)
|
||||
N_TF (int): Number of TF to be applied sequentially to each inputs. (default: 1)
|
||||
mix_dist (float): Proportion [0.0, 1.0] of the real distribution used for sampling/selection of the TF. Distribution = (1-mix_dist)*Uniform_distribution + mix_dist*Real_distribution. If None is given, try to learn this parameter. (default: 0)
|
||||
fixed_prob (bool): Wether to lock the TF probabilies. (default: False)
|
||||
fixed_mag (bool): Wether to lock the TF magnitudes. (default: True)
|
||||
shared_mag (bool): Wether to share a single magnitude parameters for all TF. (default: True)
|
||||
"""
|
||||
super(Data_augV5, self).__init__()
|
||||
assert len(TF_dict)>0
|
||||
assert N_TF>=0
|
||||
|
||||
self._data_augmentation = True
|
||||
|
||||
#TF
|
||||
self._TF_dict = TF_dict
|
||||
self._TF= list(self._TF_dict.keys())
|
||||
self._nb_tf= len(self._TF)
|
||||
self._N_seqTF = N_TF
|
||||
|
||||
#Mag
|
||||
self._shared_mag = shared_mag
|
||||
self._fixed_mag = fixed_mag
|
||||
if not self._fixed_mag and len([tf for tf in self._TF if tf not in TF.TF_ignore_mag])==0:
|
||||
print("WARNING: Mag would be fixed as current TF doesn't allow gradient propagation:",self._TF)
|
||||
self._fixed_mag=True
|
||||
|
||||
#Distribution
|
||||
self._fixed_prob=fixed_prob
|
||||
self._samples = []
|
||||
|
||||
self._mix_dist = False
|
||||
if mix_dist != 0.0: #Mix dist
|
||||
self._mix_dist = True
|
||||
|
||||
self._fixed_mix=True
|
||||
if mix_dist is None: #Learn Mix dist
|
||||
self._fixed_mix = False
|
||||
mix_dist=0.5
|
||||
|
||||
#Params
|
||||
init_mag = float(TF.PARAMETER_MAX) if self._fixed_mag else float(TF.PARAMETER_MAX)/2
|
||||
self._params = nn.ParameterDict({
|
||||
"prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Distribution prob uniforme
|
||||
"mag" : nn.Parameter(torch.tensor(init_mag) if self._shared_mag
|
||||
else torch.tensor(init_mag).repeat(self._nb_tf)), #[0, PARAMETER_MAX]
|
||||
"mix_dist": nn.Parameter(torch.tensor(mix_dist).clamp(min=0.0,max=0.999))
|
||||
})
|
||||
|
||||
#for tf in TF.TF_no_grad :
|
||||
# if tf in self._TF: self._params['mag'].data[self._TF.index(tf)]=float(TF.PARAMETER_MAX) #TF fixe a max parameter
|
||||
#for t in TF.TF_no_mag: self._params['mag'][self._TF.index(t)].data-=self._params['mag'][self._TF.index(t)].data #Mag inutile pour les TF ignore_mag
|
||||
|
||||
#Mag regularisation
|
||||
if not self._fixed_mag:
|
||||
if self._shared_mag :
|
||||
self._reg_tgt = torch.tensor(TF.PARAMETER_MAX, dtype=torch.float) #Encourage amplitude max
|
||||
else:
|
||||
self._reg_mask=[self._TF.index(t) for t in self._TF if t not in TF.TF_ignore_mag]
|
||||
self._reg_tgt=torch.full(size=(len(self._reg_mask),), fill_value=TF.PARAMETER_MAX) #Encourage amplitude max
|
||||
|
||||
def forward(self, x):
|
||||
""" Main method of the Data augmentation module.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of data.
|
||||
|
||||
Returns:
|
||||
Tensor : Batch of tranformed data.
|
||||
"""
|
||||
self._samples = []
|
||||
if self._data_augmentation:# and TF.random.random() < 0.5:
|
||||
device = x.device
|
||||
batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
|
||||
|
||||
x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
|
||||
|
||||
for _ in range(self._N_seqTF):
|
||||
## Echantillonage ##
|
||||
uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=1)
|
||||
|
||||
if not self._mix_dist:
|
||||
self._distrib = uniforme_dist
|
||||
else:
|
||||
prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"]
|
||||
mix_dist = self._params["mix_dist"].detach() if self._fixed_mix else self._params["mix_dist"]
|
||||
self._distrib = (mix_dist*prob+(1-mix_dist)*uniforme_dist)#.softmax(dim=1) #Mix distrib reel / uniforme avec mix_factor
|
||||
|
||||
cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*self._distrib)
|
||||
sample = cat_distrib.sample()
|
||||
self._samples.append(sample)
|
||||
|
||||
## Transformations ##
|
||||
x = self.apply_TF(x, sample)
|
||||
return x
|
||||
|
||||
def apply_TF(self, x, sampled_TF):
|
||||
""" Applies the sampled transformations.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of data.
|
||||
sampled_TF (Tensor): Indexes of the TF to be applied to each element of data.
|
||||
|
||||
Returns:
|
||||
Tensor: Batch of tranformed data.
|
||||
"""
|
||||
device = x.device
|
||||
batch_size, channels, h, w = x.shape
|
||||
smps_x=[]
|
||||
|
||||
for tf_idx in range(self._nb_tf):
|
||||
mask = sampled_TF==tf_idx #Create selection mask
|
||||
smp_x = x[mask] #torch.masked_select() ? (Necessite d'expand le mask au meme dim)
|
||||
|
||||
if smp_x.shape[0]!=0: #if there's data to TF
|
||||
magnitude=self._params["mag"] if self._shared_mag else self._params["mag"][tf_idx]
|
||||
if self._fixed_mag: magnitude=magnitude.detach() #Fmodel tente systematiquement de tracker les gradient de tout les param
|
||||
|
||||
tf=self._TF[tf_idx]
|
||||
|
||||
#In place
|
||||
#x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude)
|
||||
|
||||
#Out of place
|
||||
smp_x = self._TF_dict[tf](x=smp_x, mag=magnitude)
|
||||
idx= mask.nonzero()
|
||||
idx= idx.expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
|
||||
x=x.scatter(dim=0, index=idx, src=smp_x)
|
||||
|
||||
return x
|
||||
|
||||
def adjust_param(self, soft=False): #Detach from gradient ?
|
||||
""" Enforce limitations to the learned parameters.
|
||||
|
||||
Ensure that the parameters value stays in the right intevals. This should be called after each update of those parameters.
|
||||
|
||||
Args:
|
||||
soft (bool): Wether to use a softmax function for TF probabilites. Not Recommended as it tends to lock the probabilities, preventing them to be learned. (default: False)
|
||||
"""
|
||||
if not self._fixed_prob:
|
||||
if soft :
|
||||
self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible
|
||||
else:
|
||||
self._params['prob'].data = self._params['prob'].data.clamp(min=1/(self._nb_tf*100),max=1.0)
|
||||
self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
|
||||
|
||||
if not self._fixed_mag:
|
||||
self._params['mag'].data = self._params['mag'].data.clamp(min=TF.PARAMETER_MIN, max=TF.PARAMETER_MAX)
|
||||
|
||||
if not self._fixed_mix:
|
||||
self._params['mix_dist'].data = self._params['mix_dist'].data.clamp(min=0.0, max=0.999)
|
||||
|
||||
def loss_weight(self):
|
||||
""" Weights for the loss.
|
||||
Compute the weights for the loss of each inputs depending on wich TF was applied to them.
|
||||
Should be applied to the loss before reduction.
|
||||
|
||||
Do nottake into account the order of application of the TF. See Data_augV7.
|
||||
|
||||
Returns:
|
||||
Tensor : Loss weights.
|
||||
"""
|
||||
if len(self._samples)==0 : return 1 #Pas d'echantillon = pas de ponderation
|
||||
|
||||
prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"]
|
||||
|
||||
#Plusieurs TF sequentielles (Attention ne prend pas en compte ordre !)
|
||||
w_loss = torch.zeros((self._samples[0].shape[0],self._nb_tf), device=self._samples[0].device)
|
||||
for sample in self._samples:
|
||||
tmp_w = torch.zeros(w_loss.size(),device=w_loss.device)
|
||||
tmp_w.scatter_(dim=1, index=sample.view(-1,1), value=1/self._N_seqTF)
|
||||
w_loss += tmp_w
|
||||
|
||||
w_loss = w_loss * prob/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss)
|
||||
w_loss = torch.sum(w_loss,dim=1)
|
||||
return w_loss
|
||||
|
||||
def reg_loss(self, reg_factor=0.005):
|
||||
""" Regularisation term used to learn the magnitudes.
|
||||
Use an L2 loss to encourage high magnitudes TF.
|
||||
|
||||
Args:
|
||||
reg_factor (float): Factor by wich the regularisation loss is multiplied. (default: 0.005)
|
||||
Returns:
|
||||
Tensor containing the regularisation loss value.
|
||||
"""
|
||||
if self._fixed_mag:
|
||||
return torch.tensor(0)
|
||||
else:
|
||||
#return reg_factor * F.l1_loss(self._params['mag'][self._reg_mask], target=self._reg_tgt, reduction='mean')
|
||||
mags = self._params['mag'] if self._params['mag'].shape==torch.Size([]) else self._params['mag'][self._reg_mask]
|
||||
max_mag_reg = reg_factor * F.mse_loss(mags, target=self._reg_tgt.to(mags.device), reduction='mean')
|
||||
return max_mag_reg
|
||||
|
||||
def train(self, mode=True):
|
||||
""" Set the module training mode.
|
||||
|
||||
Args:
|
||||
mode (bool): Wether to learn the parameter of the module. None would not change mode. (default: None)
|
||||
"""
|
||||
#if mode is None :
|
||||
# mode=self._data_augmentation
|
||||
self.augment(mode=mode) #Inutile si mode=None
|
||||
super(Data_augV5, self).train(mode)
|
||||
return self
|
||||
|
||||
def eval(self):
|
||||
""" Set the module to evaluation mode.
|
||||
"""
|
||||
return self.train(mode=False)
|
||||
|
||||
def augment(self, mode=True):
|
||||
""" Set the augmentation mode.
|
||||
|
||||
Args:
|
||||
mode (bool): Wether to perform data augmentation on the forward pass. (default: True)
|
||||
"""
|
||||
self._data_augmentation=mode
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Access to the learnable parameters
|
||||
Args:
|
||||
key (string): Name of the learnable parameter to access.
|
||||
|
||||
Returns:
|
||||
nn.Parameter.
|
||||
"""
|
||||
return self._params[key]
|
||||
|
||||
def __str__(self):
|
||||
"""Name of the module
|
||||
|
||||
Returns:
|
||||
String containing the name of the module as well as the higher levels parameters.
|
||||
"""
|
||||
dist_param=''
|
||||
if self._fixed_prob: dist_param+='Fx'
|
||||
mag_param='Mag'
|
||||
if self._fixed_mag: mag_param+= 'Fx'
|
||||
if self._shared_mag: mag_param+= 'Sh'
|
||||
if not self._mix_dist:
|
||||
return "Data_augV5(Uniform%s-%dTFx%d-%s)" % (dist_param, self._nb_tf, self._N_seqTF, mag_param)
|
||||
elif self._fixed_mix:
|
||||
return "Data_augV5(Mix%.1f%s-%dTFx%d-%s)" % (self._params['mix_dist'].item(),dist_param, self._nb_tf, self._N_seqTF, mag_param)
|
||||
else:
|
||||
return "Data_augV5(Mix%s-%dTFx%d-%s)" % (dist_param, self._nb_tf, self._N_seqTF, mag_param)
|
||||
|
||||
class Data_augV7(nn.Module): #Proba sequentielles
|
||||
"""Data augmentation module with learnable parameters.
|
||||
|
||||
Applies transformations (TF) to batch of data.
|
||||
Each TF is defined by a (name, probability of application, magnitude of distorsion) tuple which can be learned. For the full definiton of the TF, see transformations.py.
|
||||
The TF probabilities defines a distribution from which we sample the TF applied.
|
||||
|
||||
Replace the use of TF by TF sets which are combinaisons of classic TF.
|
||||
|
||||
Attributes:
|
||||
_data_augmentation (bool): Wether TF will be applied during forward pass.
|
||||
_TF_dict (dict) : A dictionnary containing the data transformations (TF) to be applied.
|
||||
_TF (list) : List of TF names.
|
||||
_nb_tf (int) : Number of TF used.
|
||||
_N_seqTF (int) : Number of TF to be applied sequentially to each inputs
|
||||
_shared_mag (bool) : Wether to share a single magnitude parameters for all TF.
|
||||
_fixed_mag (bool): Wether to lock the TF magnitudes.
|
||||
_fixed_prob (bool): Wether to lock the TF probabilies.
|
||||
_samples (list): Sampled TF index during last forward pass.
|
||||
_mix_dist (bool): Wether we use a mix of an uniform distribution and the real distribution (TF probabilites). If False, only a uniform distribution is used.
|
||||
_fixed_mix (bool): Wether we lock the mix distribution factor.
|
||||
_params (nn.ParameterDict): Learnable parameters.
|
||||
_reg_tgt (Tensor): Target for the magnitude regularisation. Only used when _fixed_mag is set to false (ie. we learn the magnitudes).
|
||||
_reg_mask (list): Mask selecting the TF considered for the regularisation.
|
||||
"""
|
||||
def __init__(self, TF_dict=TF.TF_dict, N_TF=2, mix_dist=0.0, fixed_prob=False, fixed_mag=True, shared_mag=True):
|
||||
"""Init Data_augv7.
|
||||
|
||||
Args:
|
||||
TF_dict (dict): A dictionnary containing the data transformations (TF) to be applied. (default: use all available TF from transformations.py)
|
||||
N_TF (int): Number of TF to be applied sequentially to each inputs. Minimum 2, otherwise prefer using Data_augV5. (default: 2)
|
||||
mix_dist (float): Proportion [0.0, 1.0] of the real distribution used for sampling/selection of the TF. Distribution = (1-mix_dist)*Uniform_distribution + mix_dist*Real_distribution. If None is given, try to learn this parameter. (default: 0)
|
||||
fixed_prob (bool): Wether to lock the TF probabilies. (default: False)
|
||||
fixed_mag (bool): Wether to lock the TF magnitudes. (default: True)
|
||||
shared_mag (bool): Wether to share a single magnitude parameters for all TF. (default: True)
|
||||
"""
|
||||
super(Data_augV7, self).__init__()
|
||||
assert len(TF_dict)>0
|
||||
assert N_TF>=0
|
||||
|
||||
if N_TF<2:
|
||||
print("WARNING: Data_augv7 isn't designed to use less than 2 sequentials TF. Please use Data_augv5 instead.")
|
||||
|
||||
self._data_augmentation = True
|
||||
|
||||
#TF
|
||||
self._TF_dict = TF_dict
|
||||
self._TF= list(self._TF_dict.keys())
|
||||
self._nb_tf= len(self._TF)
|
||||
self._N_seqTF = N_TF
|
||||
|
||||
#Mag
|
||||
self._shared_mag = shared_mag
|
||||
self._fixed_mag = fixed_mag
|
||||
if not self._fixed_mag and len([tf for tf in self._TF if tf not in TF.TF_ignore_mag])==0:
|
||||
print("WARNING: Mag would be fixed as current TF doesn't allow gradient propagation:",self._TF)
|
||||
self._fixed_mag=True
|
||||
|
||||
#Distribution
|
||||
self._fixed_prob=fixed_prob
|
||||
self._samples = []
|
||||
|
||||
self._mix_dist = False
|
||||
if mix_dist != 0.0: #Mix dist
|
||||
self._mix_dist = True
|
||||
|
||||
self._fixed_mix=True
|
||||
if mix_dist is None: #Learn Mix dist
|
||||
self._fixed_mix = False
|
||||
mix_dist=0.5
|
||||
|
||||
#TF sets
|
||||
#import itertools
|
||||
#itertools.product(range(self._nb_tf), repeat=self._N_seqTF)
|
||||
|
||||
#no_consecutive={idx for idx, t in enumerate(self._TF) if t in {'FlipUD', 'FlipLR'}} #Specific No consecutive ops
|
||||
no_consecutive={idx for idx, t in enumerate(self._TF) if t not in {'Identity'}} #No consecutive same ops (except Identity)
|
||||
cons_test = (lambda i, idxs: i in no_consecutive and len(idxs)!=0 and i==idxs[-1]) #Exclude selected consecutive
|
||||
def generate_TF_sets(n_TF, set_size, idx_prefix=[]): #Generate every arrangement (with reuse) of TF (exclude cons_test arrangement)
|
||||
TF_sets=[]
|
||||
if set_size>1:
|
||||
for i in range(n_TF):
|
||||
if not cons_test(i, idx_prefix):
|
||||
TF_sets += generate_TF_sets(n_TF, set_size=set_size-1, idx_prefix=idx_prefix+[i])
|
||||
else:
|
||||
TF_sets+=[[idx_prefix+[i]] for i in range(n_TF) if not cons_test(i, idx_prefix)]
|
||||
return TF_sets
|
||||
|
||||
self._TF_sets=torch.ByteTensor(generate_TF_sets(self._nb_tf, self._N_seqTF)).squeeze()
|
||||
self._nb_TF_sets=len(self._TF_sets)
|
||||
print("Number of TF sets:",self._nb_TF_sets)
|
||||
#print(self._TF_sets)
|
||||
self._prob_mem=torch.zeros(self._nb_TF_sets)
|
||||
|
||||
#Params
|
||||
init_mag = float(TF.PARAMETER_MAX) if self._fixed_mag else float(TF.PARAMETER_MAX)/2
|
||||
self._params = nn.ParameterDict({
|
||||
"prob": nn.Parameter(torch.ones(self._nb_TF_sets)/self._nb_TF_sets), #Distribution prob uniforme
|
||||
"mag" : nn.Parameter(torch.tensor(init_mag) if self._shared_mag
|
||||
else torch.tensor(init_mag).repeat(self._nb_tf)), #[0, PARAMETER_MAX]
|
||||
"mix_dist": nn.Parameter(torch.tensor(mix_dist).clamp(min=0.0,max=0.999))
|
||||
})
|
||||
|
||||
#for tf in TF.TF_no_grad :
|
||||
# if tf in self._TF: self._params['mag'].data[self._TF.index(tf)]=float(TF.PARAMETER_MAX) #TF fixe a max parameter
|
||||
#for t in TF.TF_no_mag: self._params['mag'][self._TF.index(t)].data-=self._params['mag'][self._TF.index(t)].data #Mag inutile pour les TF ignore_mag
|
||||
|
||||
#Mag regularisation
|
||||
if not self._fixed_mag:
|
||||
if self._shared_mag :
|
||||
self._reg_tgt = torch.FloatTensor(TF.PARAMETER_MAX) #Encourage amplitude max
|
||||
else:
|
||||
self._reg_mask=[idx for idx,t in enumerate(self._TF) if t not in TF.TF_ignore_mag]
|
||||
self._reg_tgt=torch.full(size=(len(self._reg_mask),), fill_value=TF.PARAMETER_MAX) #Encourage amplitude max
|
||||
|
||||
def forward(self, x):
|
||||
""" Main method of the Data augmentation module.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of data.
|
||||
|
||||
Returns:
|
||||
Tensor : Batch of tranformed data.
|
||||
"""
|
||||
self._samples = None
|
||||
if self._data_augmentation:# and TF.random.random() < 0.5:
|
||||
device = x.device
|
||||
batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
|
||||
|
||||
x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
|
||||
|
||||
|
||||
## Echantillonage ##
|
||||
uniforme_dist = torch.ones(1,self._nb_TF_sets,device=device).softmax(dim=1)
|
||||
|
||||
if not self._mix_dist:
|
||||
self._distrib = uniforme_dist
|
||||
else:
|
||||
prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"]
|
||||
mix_dist = self._params["mix_dist"].detach() if self._fixed_mix else self._params["mix_dist"]
|
||||
self._distrib = (mix_dist*prob+(1-mix_dist)*uniforme_dist)#.softmax(dim=1) #Mix distrib reel / uniforme avec mix_factor
|
||||
|
||||
cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_TF_sets), device=device)*self._distrib)
|
||||
sample = cat_distrib.sample()
|
||||
|
||||
self._samples=sample
|
||||
TF_samples=self._TF_sets[sample,:].to(device) #[Batch_size, TFseq]
|
||||
|
||||
for i in range(self._N_seqTF):
|
||||
## Transformations ##
|
||||
x = self.apply_TF(x, TF_samples[:,i])
|
||||
return x
|
||||
|
||||
def apply_TF(self, x, sampled_TF):
|
||||
""" Applies the sampled transformations.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of data.
|
||||
sampled_TF (Tensor): Indexes of the TF to be applied to each element of data.
|
||||
|
||||
Returns:
|
||||
Tensor: Batch of tranformed data.
|
||||
"""
|
||||
device = x.device
|
||||
batch_size, channels, h, w = x.shape
|
||||
smps_x=[]
|
||||
|
||||
for tf_idx in range(self._nb_tf):
|
||||
mask = sampled_TF==tf_idx #Create selection mask
|
||||
smp_x = x[mask] #torch.masked_select() ? (Necessite d'expand le mask au meme dim)
|
||||
|
||||
if smp_x.shape[0]!=0: #if there's data to TF
|
||||
magnitude=self._params["mag"] if self._shared_mag else self._params["mag"][tf_idx]
|
||||
if self._fixed_mag: magnitude=magnitude.detach() #Fmodel tente systematiquement de tracker les gradient de tout les param
|
||||
|
||||
tf=self._TF[tf_idx]
|
||||
|
||||
#In place
|
||||
#x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude)
|
||||
|
||||
#Out of place
|
||||
smp_x = self._TF_dict[tf](x=smp_x, mag=magnitude)
|
||||
idx= mask.nonzero()
|
||||
idx= idx.expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
|
||||
x=x.scatter(dim=0, index=idx, src=smp_x)
|
||||
|
||||
return x
|
||||
|
||||
def adjust_param(self, soft=False): #Detach from gradient ?
|
||||
""" Enforce limitations to the learned parameters.
|
||||
|
||||
Ensure that the parameters value stays in the right intevals. This should be called after each update of those parameters.
|
||||
|
||||
Args:
|
||||
soft (bool): Wether to use a softmax function for TF probabilites. Not Recommended as it tends to lock the probabilities, preventing them to be learned. (default: False)
|
||||
"""
|
||||
if not self._fixed_prob:
|
||||
if soft :
|
||||
self._params['prob'].data=F.softmax(self._params['prob'].data, dim=0) #Trop 'soft', bloque en dist uniforme si lr trop faible
|
||||
else:
|
||||
self._params['prob'].data = self._params['prob'].data.clamp(min=1/(self._nb_tf*100),max=1.0)
|
||||
self._params['prob'].data = self._params['prob']/sum(self._params['prob']) #Contrainte sum(p)=1
|
||||
|
||||
if not self._fixed_mag:
|
||||
self._params['mag'].data = self._params['mag'].data.clamp(min=TF.PARAMETER_MIN, max=TF.PARAMETER_MAX)
|
||||
|
||||
if not self._fixed_mix:
|
||||
self._params['mix_dist'].data = self._params['mix_dist'].data.clamp(min=0.0, max=0.999)
|
||||
|
||||
def loss_weight(self):
|
||||
""" Weights for the loss.
|
||||
Compute the weights for the loss of each inputs depending on wich TF was applied to them.
|
||||
Should be applied to the loss before reduction.
|
||||
|
||||
Returns:
|
||||
Tensor : Loss weights.
|
||||
"""
|
||||
if self._samples is None : return 1 #Pas d'echantillon = pas de ponderation
|
||||
|
||||
prob = self._params["prob"].detach() if self._fixed_prob else self._params["prob"]
|
||||
|
||||
w_loss = torch.zeros((self._samples.shape[0],self._nb_TF_sets), device=self._samples.device)
|
||||
w_loss.scatter_(1, self._samples.view(-1,1), 1)
|
||||
w_loss = w_loss * prob/self._distrib #Ponderation par les proba (divisee par la distrib pour pas diminuer la loss)
|
||||
w_loss = torch.sum(w_loss,dim=1)
|
||||
return w_loss
|
||||
|
||||
def reg_loss(self, reg_factor=0.005):
|
||||
""" Regularisation term used to learn the magnitudes.
|
||||
Use an L2 loss to encourage high magnitudes TF.
|
||||
|
||||
Args:
|
||||
reg_factor (float): Factor by wich the regularisation loss is multiplied. (default: 0.005)
|
||||
Returns:
|
||||
Tensor containing the regularisation loss value.
|
||||
"""
|
||||
if self._fixed_mag:
|
||||
return torch.tensor(0)
|
||||
else:
|
||||
#return reg_factor * F.l1_loss(self._params['mag'][self._reg_mask], target=self._reg_tgt, reduction='mean')
|
||||
mags = self._params['mag'] if self._params['mag'].shape==torch.Size([]) else self._params['mag'][self._reg_mask]
|
||||
max_mag_reg = reg_factor * F.mse_loss(mags, target=self._reg_tgt.to(mags.device), reduction='mean')
|
||||
return max_mag_reg
|
||||
|
||||
def TF_prob(self):
|
||||
""" Gives an estimation of the individual TF probabilities.
|
||||
|
||||
Be warry that the probability returned isn't exact. The TF distribution isn't fully represented by those.
|
||||
Each probability should be taken individualy. They only represent the chance for a specific TF to be picked at least once.
|
||||
|
||||
Returms:
|
||||
Tensor containing the single TF probabilities of applications.
|
||||
"""
|
||||
if torch.all(self._params['prob']!=self._prob_mem.to(self._params['prob'].device)): #Prevent recompute if originial prob didn't changed
|
||||
self._prob_mem=self._params['prob'].data.detach_()
|
||||
self._single_TF_prob=torch.zeros(self._nb_tf)
|
||||
for idx_tf in range(self._nb_tf):
|
||||
for i, t_set in enumerate(self._TF_sets):
|
||||
#uni, count = np.unique(t_set, return_counts=True)
|
||||
#if idx_tf in uni:
|
||||
# res[idx_tf]+=self._params['prob'][i]*int(count[np.where(uni==idx_tf)])
|
||||
if idx_tf in t_set:
|
||||
self._single_TF_prob[idx_tf]+=self._params['prob'][i]
|
||||
|
||||
return self._single_TF_prob
|
||||
|
||||
def train(self, mode=True):
|
||||
""" Set the module training mode.
|
||||
|
||||
Args:
|
||||
mode (bool): Wether to learn the parameter of the module. None would not change mode. (default: None)
|
||||
"""
|
||||
#if mode is None :
|
||||
# mode=self._data_augmentation
|
||||
self.augment(mode=mode) #Inutile si mode=None
|
||||
super(Data_augV7, self).train(mode)
|
||||
return self
|
||||
|
||||
def eval(self):
|
||||
""" Set the module to evaluation mode.
|
||||
"""
|
||||
return self.train(mode=False)
|
||||
|
||||
def augment(self, mode=True):
|
||||
""" Set the augmentation mode.
|
||||
|
||||
Args:
|
||||
mode (bool): Wether to perform data augmentation on the forward pass. (default: True)
|
||||
"""
|
||||
self._data_augmentation=mode
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Access to the learnable parameters
|
||||
Args:
|
||||
key (string): Name of the learnable parameter to access.
|
||||
|
||||
Returns:
|
||||
nn.Parameter.
|
||||
"""
|
||||
if key == 'prob': #Override prob access
|
||||
return self.TF_prob()
|
||||
return self._params[key]
|
||||
|
||||
def __str__(self):
|
||||
"""Name of the module
|
||||
|
||||
Returns:
|
||||
String containing the name of the module as well as the higher levels parameters.
|
||||
"""
|
||||
dist_param=''
|
||||
if self._fixed_prob: dist_param+='Fx'
|
||||
mag_param='Mag'
|
||||
if self._fixed_mag: mag_param+= 'Fx'
|
||||
if self._shared_mag: mag_param+= 'Sh'
|
||||
if not self._mix_dist:
|
||||
return "Data_augV7(Uniform%s-%dTFx%d-%s)" % (dist_param, self._nb_tf, self._N_seqTF, mag_param)
|
||||
elif self._fixed_mix:
|
||||
return "Data_augV7(Mix%.1f%s-%dTFx%d-%s)" % (self._params['mix_dist'].item(),dist_param, self._nb_tf, self._N_seqTF, mag_param)
|
||||
else:
|
||||
return "Data_augV7(Mix%s-%dTFx%d-%s)" % (dist_param, self._nb_tf, self._N_seqTF, mag_param)
|
||||
|
||||
class RandAug(nn.Module): #RandAugment = UniformFx-MagFxSh + rapide
|
||||
"""RandAugment implementation.
|
||||
|
||||
Applies transformations (TF) to batch of data.
|
||||
Each TF is defined by a (name, probability of application, magnitude of distorsion) tuple. For the full definiton of the TF, see transformations.py.
|
||||
The TF probabilities are ignored and, instead selected randomly.
|
||||
|
||||
Attributes:
|
||||
_data_augmentation (bool): Wether TF will be applied during forward pass.
|
||||
_TF_dict (dict) : A dictionnary containing the data transformations (TF) to be applied.
|
||||
_TF (list) : List of TF names.
|
||||
_nb_tf (int) : Number of TF used.
|
||||
_N_seqTF (int) : Number of TF to be applied sequentially to each inputs
|
||||
_shared_mag (bool) : Wether to share a single magnitude parameters for all TF. Should be True.
|
||||
_fixed_mag (bool): Wether to lock the TF magnitudes. Should be True.
|
||||
_params (nn.ParameterDict): Data augmentation parameters.
|
||||
"""
|
||||
def __init__(self, TF_dict=TF.TF_dict, N_TF=1, mag=TF.PARAMETER_MAX):
|
||||
"""Init RandAug.
|
||||
|
||||
Args:
|
||||
TF_dict (dict): A dictionnary containing the data transformations (TF) to be applied. (default: use all available TF from transformations.py)
|
||||
N_TF (int): Number of TF to be applied sequentially to each inputs. (default: 1)
|
||||
mag (float): Magnitude of the TF. Should be between [PARAMETER_MIN, PARAMETER_MAX] defined in transformations.py. (default: PARAMETER_MAX)
|
||||
"""
|
||||
super(RandAug, self).__init__()
|
||||
|
||||
self._data_augmentation = True
|
||||
|
||||
self._TF_dict = TF_dict
|
||||
self._TF= list(self._TF_dict.keys())
|
||||
self._nb_tf= len(self._TF)
|
||||
self._N_seqTF = N_TF
|
||||
|
||||
self.mag=nn.Parameter(torch.tensor(float(mag)))
|
||||
self._params = nn.ParameterDict({
|
||||
"prob": nn.Parameter(torch.ones(self._nb_tf)/self._nb_tf), #Ignored
|
||||
"mag" : nn.Parameter(torch.tensor(float(mag))),
|
||||
})
|
||||
self._shared_mag = True
|
||||
self._fixed_mag = True
|
||||
|
||||
self._params['mag'].data = self._params['mag'].data.clamp(min=TF.PARAMETER_MIN, max=TF.PARAMETER_MAX)
|
||||
|
||||
def forward(self, x):
|
||||
""" Main method of the Data augmentation module.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of data.
|
||||
|
||||
Returns:
|
||||
Tensor : Batch of tranformed data.
|
||||
"""
|
||||
if self._data_augmentation:# and TF.random.random() < 0.5:
|
||||
device = x.device
|
||||
batch_size, h, w = x.shape[0], x.shape[2], x.shape[3]
|
||||
|
||||
x = copy.deepcopy(x) #Evite de modifier les echantillons par reference (Problematique pour des utilisations paralleles)
|
||||
|
||||
for _ in range(self._N_seqTF):
|
||||
## Echantillonage ## == sampled_ops = np.random.choice(transforms, N)
|
||||
uniforme_dist = torch.ones(1,self._nb_tf,device=device).softmax(dim=1)
|
||||
cat_distrib= Categorical(probs=torch.ones((batch_size, self._nb_tf), device=device)*uniforme_dist)
|
||||
sample = cat_distrib.sample()
|
||||
|
||||
## Transformations ##
|
||||
x = self.apply_TF(x, sample)
|
||||
return x
|
||||
|
||||
def apply_TF(self, x, sampled_TF):
|
||||
""" Applies the sampled transformations.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of data.
|
||||
sampled_TF (Tensor): Indexes of the TF to be applied to each element of data.
|
||||
|
||||
Returns:
|
||||
Tensor: Batch of tranformed data.
|
||||
"""
|
||||
smps_x=[]
|
||||
|
||||
for tf_idx in range(self._nb_tf):
|
||||
mask = sampled_TF==tf_idx #Create selection mask
|
||||
smp_x = x[mask] #torch.masked_select() ? (NEcessite d'expand le mask au meme dim)
|
||||
|
||||
if smp_x.shape[0]!=0: #if there's data to TF
|
||||
magnitude=self._params["mag"].detach()
|
||||
|
||||
tf=self._TF[tf_idx]
|
||||
#print(magnitude)
|
||||
|
||||
#In place
|
||||
x[mask]=self._TF_dict[tf](x=smp_x, mag=magnitude)
|
||||
|
||||
return x
|
||||
|
||||
def adjust_param(self, soft=False):
|
||||
"""Not used
|
||||
"""
|
||||
pass #Pas de parametre a opti
|
||||
|
||||
def loss_weight(self):
|
||||
"""Not used
|
||||
"""
|
||||
return 1 #Pas d'echantillon = pas de ponderation
|
||||
|
||||
def reg_loss(self, reg_factor=0.005):
|
||||
"""Not used
|
||||
"""
|
||||
return torch.tensor(0) #Pas de regularisation
|
||||
|
||||
def train(self, mode=None):
|
||||
""" Set the module training mode.
|
||||
|
||||
Args:
|
||||
mode (bool): Wether to learn the parameter of the module. None would not change mode. (default: None)
|
||||
"""
|
||||
if mode is None :
|
||||
mode=self._data_augmentation
|
||||
self.augment(mode=mode) #Inutile si mode=None
|
||||
super(RandAug, self).train(mode)
|
||||
|
||||
def eval(self):
|
||||
""" Set the module to evaluation mode.
|
||||
"""
|
||||
self.train(mode=False)
|
||||
|
||||
def augment(self, mode=True):
|
||||
""" Set the augmentation mode.
|
||||
|
||||
Args:
|
||||
mode (bool): Wether to perform data augmentation on the forward pass. (default: True)
|
||||
"""
|
||||
self._data_augmentation=mode
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Access to the learnable parameters
|
||||
Args:
|
||||
key (string): Name of the learnable parameter to access.
|
||||
|
||||
Returns:
|
||||
nn.Parameter.
|
||||
"""
|
||||
return self._params[key]
|
||||
|
||||
def __str__(self):
|
||||
"""Name of the module
|
||||
|
||||
Returns:
|
||||
String containing the name of the module as well as the higher levels parameters.
|
||||
"""
|
||||
return "RandAug(%dTFx%d-Mag%d)" % (self._nb_tf, self._N_seqTF, self.mag)
|
||||
|
||||
import higher
|
||||
class Higher_model(nn.Module):
|
||||
"""Model wrapper for higher gradient tracking.
|
||||
|
||||
Keep in memory the orginial model and it's functionnal, higher, version.
|
||||
|
||||
Might not be needed anymore if Higher implement detach for fmodel.
|
||||
|
||||
see : https://github.com/facebookresearch/higher
|
||||
|
||||
TODO: Get rid of the original model if not needed by user.
|
||||
|
||||
Attributes:
|
||||
_name (string): Name of the model.
|
||||
_mods (nn.ModuleDict): Models (Orginial and Higher version).
|
||||
"""
|
||||
def __init__(self, model):
|
||||
"""Init Higher_model.
|
||||
|
||||
Args:
|
||||
model (nn.Module): Network for which higher gradients can be tracked.
|
||||
"""
|
||||
super(Higher_model, self).__init__()
|
||||
|
||||
self._name = model.__str__()
|
||||
self._mods = nn.ModuleDict({
|
||||
'original': model,
|
||||
'functional': higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
})
|
||||
|
||||
def get_diffopt(self, opt, grad_callback=None, track_higher_grads=True):
|
||||
"""Get a differentiable version of an Optimizer.
|
||||
|
||||
Higher/Differentiable optimizer required to be used for higher gradient tracking.
|
||||
Usage : diffopt.step(loss) == (opt.zero_grad, loss.backward, opt.step)
|
||||
|
||||
Be warry that if track_higher_grads is set to True, a new state of the model would be saved each time diffopt.step() is called.
|
||||
Thus increasing memory consumption. The detach_() method should be called to reset the gradient tape and prevent memory saturation.
|
||||
|
||||
Args:
|
||||
opt (torch.optim): Optimizer to make differentiable.
|
||||
grad_callback (fct(grads)=grads): Function applied to the list of gradients parameters (ex: clipping). (default: None)
|
||||
track_higher_grads (bool): Wether higher gradient are tracked. If True, the graph/states will be retained to allow backpropagation. (default: True)
|
||||
|
||||
Returns:
|
||||
(Higher.DifferentiableOptimizer): Differentiable version of the optimizer.
|
||||
"""
|
||||
return higher.optim.get_diff_optim(opt,
|
||||
self._mods['original'].parameters(),
|
||||
fmodel=self._mods['functional'],
|
||||
grad_callback=grad_callback,
|
||||
track_higher_grads=track_higher_grads)
|
||||
|
||||
def forward(self, x):
|
||||
""" Main method of the model.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of data.
|
||||
|
||||
Returns:
|
||||
Tensor : Output of the network. Should be logits.
|
||||
"""
|
||||
return self._mods['functional'](x)
|
||||
|
||||
def detach_(self):
|
||||
"""Detach from the graph.
|
||||
|
||||
Needed to limit the number of state kept in memory.
|
||||
"""
|
||||
tmp = self._mods['functional'].fast_params
|
||||
self._mods['functional']._fast_params=[]
|
||||
self._mods['functional'].update_params(tmp)
|
||||
for p in self._mods['functional'].fast_params:
|
||||
p.detach_().requires_grad_()
|
||||
|
||||
def state_dict(self):
|
||||
"""Returns a dictionary containing a whole state of the module.
|
||||
"""
|
||||
return self._mods['functional'].state_dict()
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Access to modules
|
||||
Args:
|
||||
key (string): Name of the module to access.
|
||||
|
||||
Returns:
|
||||
nn.Module.
|
||||
"""
|
||||
return self._mods[key]
|
||||
|
||||
def __str__(self):
|
||||
"""Name of the module
|
||||
|
||||
Returns:
|
||||
String containing the name of the module.
|
||||
"""
|
||||
return self._name
|
||||
|
||||
class Augmented_model(nn.Module):
|
||||
"""Wrapper for a Data Augmentation module and a model.
|
||||
|
||||
Attributes:
|
||||
_mods (nn.ModuleDict): A dictionary containing the modules.
|
||||
_data_augmentation (bool): Wether data augmentation should be used.
|
||||
"""
|
||||
def __init__(self, data_augmenter, model):
|
||||
"""Init Augmented Model.
|
||||
|
||||
By default, data augmentation will be performed.
|
||||
|
||||
Args:
|
||||
data_augmenter (nn.Module): Data augmentation module.
|
||||
model (nn.Module): Network.
|
||||
"""
|
||||
super(Augmented_model, self).__init__()
|
||||
|
||||
self._mods = nn.ModuleDict({
|
||||
'data_aug': data_augmenter,
|
||||
'model': model
|
||||
})
|
||||
|
||||
self.augment(mode=True)
|
||||
|
||||
def forward(self, x):
|
||||
""" Main method of the Augmented model.
|
||||
|
||||
Perform the forward pass of both modules.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of data.
|
||||
|
||||
Returns:
|
||||
Tensor : Output of the networks. Should be logits.
|
||||
"""
|
||||
return self._mods['model'](self._mods['data_aug'](x))
|
||||
|
||||
def augment(self, mode=True):
|
||||
""" Set the augmentation mode.
|
||||
|
||||
Args:
|
||||
mode (bool): Wether to perform data augmentation on the forward pass. (default: True)
|
||||
"""
|
||||
self._data_augmentation=mode
|
||||
self._mods['data_aug'].augment(mode)
|
||||
|
||||
def train(self, mode=True):
|
||||
""" Set the module training mode.
|
||||
|
||||
Args:
|
||||
mode (bool): Wether to learn the parameter of the module. (default: None)
|
||||
"""
|
||||
#if mode is None :
|
||||
# mode=self._data_augmentation
|
||||
super(Augmented_model, self).train(mode)
|
||||
self._mods['data_aug'].augment(mode=self._data_augmentation) #Restart if needed data augmentation
|
||||
return self
|
||||
|
||||
def eval(self):
|
||||
""" Set the module to evaluation mode.
|
||||
"""
|
||||
#return self.train(mode=False)
|
||||
super(Augmented_model, self).train(mode=False)
|
||||
self._mods['data_aug'].augment(mode=False)
|
||||
return self
|
||||
|
||||
def items(self):
|
||||
"""Return an iterable of the ModuleDict key/value pairs.
|
||||
"""
|
||||
return self._mods.items()
|
||||
|
||||
def update(self, modules):
|
||||
"""Update the module dictionnary.
|
||||
|
||||
The new dictionnary should keep the same structure.
|
||||
"""
|
||||
assert(self._mods.keys()==modules.keys())
|
||||
self._mods.update(modules)
|
||||
|
||||
def is_augmenting(self):
|
||||
""" Return wether data augmentation is applied.
|
||||
|
||||
Returns:
|
||||
bool : True if data augmentation is applied.
|
||||
"""
|
||||
return self._data_augmentation
|
||||
|
||||
def TF_names(self):
|
||||
""" Get the transformations names used by the data augmentation module.
|
||||
|
||||
Returns:
|
||||
list : names of the transformations of the data augmentation module.
|
||||
"""
|
||||
try:
|
||||
return self._mods['data_aug']._TF
|
||||
except:
|
||||
return None
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Access to the modules.
|
||||
Args:
|
||||
key (string): Name of the module to access.
|
||||
|
||||
Returns:
|
||||
nn.Module.
|
||||
"""
|
||||
return self._mods[key]
|
||||
|
||||
def __str__(self):
|
||||
"""Name of the module
|
||||
|
||||
Returns:
|
||||
String containing the name of the module as well as the higher levels parameters.
|
||||
"""
|
||||
return "Aug_mod("+str(self._mods['data_aug'])+"-"+str(self._mods['model'])+")"
|
26
higher/smart_aug/model.py
Executable file
26
higher/smart_aug/model.py
Executable file
|
@ -0,0 +1,26 @@
|
|||
import math
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
## Basic CNN ##
|
||||
class LeNet(nn.Module):
|
||||
def __init__(self, num_inp, num_out):
|
||||
super(LeNet, self).__init__()
|
||||
self.conv1 = nn.Conv2d(num_inp, 20, 5)
|
||||
self.pool = nn.MaxPool2d(2, 2)
|
||||
self.conv2 = nn.Conv2d(20, 50, 5)
|
||||
self.pool2 = nn.MaxPool2d(2, 2)
|
||||
self.fc1 = nn.Linear(5*5*50, 500)
|
||||
self.fc2 = nn.Linear(500, num_out)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.pool(F.relu(self.conv1(x)))
|
||||
x = self.pool2(F.relu(self.conv2(x)))
|
||||
x = x.view(x.size(0), -1)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = self.fc2(x)
|
||||
return x
|
||||
|
||||
def __str__(self):
|
||||
return "LeNet"
|
490
higher/smart_aug/old/augmentation_transforms.py
Executable file
490
higher/smart_aug/old/augmentation_transforms.py
Executable file
|
@ -0,0 +1,490 @@
|
|||
# coding=utf-8
|
||||
# Copyright 2019 The Google UDA Team Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Transforms used in the Augmentation Policies.
|
||||
|
||||
Copied from AutoAugment: https://github.com/tensorflow/models/blob/master/research/autoaugment/
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import random
|
||||
import numpy as np
|
||||
# pylint:disable=g-multiple-import
|
||||
from PIL import ImageOps, ImageEnhance, ImageFilter, Image
|
||||
# pylint:enable=g-multiple-import
|
||||
|
||||
#import tensorflow as tf
|
||||
|
||||
#FLAGS = tf.flags.FLAGS
|
||||
|
||||
|
||||
IMAGE_SIZE = 32
|
||||
# What is the dataset mean and std of the images on the training set
|
||||
PARAMETER_MAX = 10 # What is the max 'level' a transform could be predicted
|
||||
|
||||
|
||||
def get_mean_and_std():
|
||||
#if FLAGS.task_name == "cifar10":
|
||||
means = [0.49139968, 0.48215841, 0.44653091]
|
||||
stds = [0.24703223, 0.24348513, 0.26158784]
|
||||
#elif FLAGS.task_name == "svhn":
|
||||
# means = [0.4376821, 0.4437697, 0.47280442]
|
||||
# stds = [0.19803012, 0.20101562, 0.19703614]
|
||||
#else:
|
||||
# assert False
|
||||
return means, stds
|
||||
|
||||
|
||||
def random_flip(x):
|
||||
"""Flip the input x horizontally with 50% probability."""
|
||||
if np.random.rand(1)[0] > 0.5:
|
||||
return np.fliplr(x)
|
||||
return x
|
||||
|
||||
|
||||
def zero_pad_and_crop(img, amount=4):
|
||||
"""Zero pad by `amount` zero pixels on each side then take a random crop.
|
||||
|
||||
Args:
|
||||
img: numpy image that will be zero padded and cropped.
|
||||
amount: amount of zeros to pad `img` with horizontally and verically.
|
||||
|
||||
Returns:
|
||||
The cropped zero padded img. The returned numpy array will be of the same
|
||||
shape as `img`.
|
||||
"""
|
||||
padded_img = np.zeros((img.shape[0] + amount * 2, img.shape[1] + amount * 2,
|
||||
img.shape[2]))
|
||||
padded_img[amount:img.shape[0] + amount, amount:
|
||||
img.shape[1] + amount, :] = img
|
||||
top = np.random.randint(low=0, high=2 * amount)
|
||||
left = np.random.randint(low=0, high=2 * amount)
|
||||
new_img = padded_img[top:top + img.shape[0], left:left + img.shape[1], :]
|
||||
return new_img
|
||||
|
||||
|
||||
def create_cutout_mask(img_height, img_width, num_channels, size):
|
||||
"""Creates a zero mask used for cutout of shape `img_height` x `img_width`.
|
||||
|
||||
Args:
|
||||
img_height: Height of image cutout mask will be applied to.
|
||||
img_width: Width of image cutout mask will be applied to.
|
||||
num_channels: Number of channels in the image.
|
||||
size: Size of the zeros mask.
|
||||
|
||||
Returns:
|
||||
A mask of shape `img_height` x `img_width` with all ones except for a
|
||||
square of zeros of shape `size` x `size`. This mask is meant to be
|
||||
elementwise multiplied with the original image. Additionally returns
|
||||
the `upper_coord` and `lower_coord` which specify where the cutout mask
|
||||
will be applied.
|
||||
"""
|
||||
assert img_height == img_width
|
||||
|
||||
# Sample center where cutout mask will be applied
|
||||
height_loc = np.random.randint(low=0, high=img_height)
|
||||
width_loc = np.random.randint(low=0, high=img_width)
|
||||
|
||||
# Determine upper right and lower left corners of patch
|
||||
upper_coord = (max(0, height_loc - size // 2), max(0, width_loc - size // 2))
|
||||
lower_coord = (min(img_height, height_loc + size // 2),
|
||||
min(img_width, width_loc + size // 2))
|
||||
mask_height = lower_coord[0] - upper_coord[0]
|
||||
mask_width = lower_coord[1] - upper_coord[1]
|
||||
assert mask_height > 0
|
||||
assert mask_width > 0
|
||||
|
||||
mask = np.ones((img_height, img_width, num_channels))
|
||||
zeros = np.zeros((mask_height, mask_width, num_channels))
|
||||
mask[upper_coord[0]:lower_coord[0], upper_coord[1]:lower_coord[1], :] = (
|
||||
zeros)
|
||||
return mask, upper_coord, lower_coord
|
||||
|
||||
|
||||
def cutout_numpy(img, size=16):
|
||||
"""Apply cutout with mask of shape `size` x `size` to `img`.
|
||||
|
||||
The cutout operation is from the paper https://arxiv.org/abs/1708.04552.
|
||||
This operation applies a `size`x`size` mask of zeros to a random location
|
||||
within `img`.
|
||||
|
||||
Args:
|
||||
img: Numpy image that cutout will be applied to.
|
||||
size: Height/width of the cutout mask that will be
|
||||
|
||||
Returns:
|
||||
A numpy tensor that is the result of applying the cutout mask to `img`.
|
||||
"""
|
||||
img_height, img_width, num_channels = (img.shape[0], img.shape[1],
|
||||
img.shape[2])
|
||||
assert len(img.shape) == 3
|
||||
mask, _, _ = create_cutout_mask(img_height, img_width, num_channels, size)
|
||||
return img * mask
|
||||
|
||||
|
||||
def float_parameter(level, maxval):
|
||||
"""Helper function to scale `val` between 0 and maxval .
|
||||
|
||||
Args:
|
||||
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
|
||||
maxval: Maximum value that the operation can have. This will be scaled
|
||||
to level/PARAMETER_MAX.
|
||||
|
||||
Returns:
|
||||
A float that results from scaling `maxval` according to `level`.
|
||||
"""
|
||||
return float(level) * maxval / PARAMETER_MAX
|
||||
|
||||
|
||||
def int_parameter(level, maxval):
|
||||
"""Helper function to scale `val` between 0 and maxval .
|
||||
|
||||
Args:
|
||||
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
|
||||
maxval: Maximum value that the operation can have. This will be scaled
|
||||
to level/PARAMETER_MAX.
|
||||
|
||||
Returns:
|
||||
An int that results from scaling `maxval` according to `level`.
|
||||
"""
|
||||
return int(level * maxval / PARAMETER_MAX)
|
||||
|
||||
|
||||
def pil_wrap(img, use_mean_std):
|
||||
"""Convert the `img` numpy tensor to a PIL Image."""
|
||||
|
||||
if use_mean_std:
|
||||
MEANS, STDS = get_mean_and_std()
|
||||
else:
|
||||
MEANS = [0, 0, 0]
|
||||
STDS = [1, 1, 1]
|
||||
img_ori = (img * STDS + MEANS) * 255
|
||||
|
||||
return Image.fromarray(
|
||||
np.uint8((img * STDS + MEANS) * 255.0)).convert('RGBA')
|
||||
|
||||
|
||||
def pil_unwrap(pil_img, use_mean_std, img_shape):
|
||||
"""Converts the PIL img to a numpy array."""
|
||||
if use_mean_std:
|
||||
MEANS, STDS = get_mean_and_std()
|
||||
else:
|
||||
MEANS = [0, 0, 0]
|
||||
STDS = [1, 1, 1]
|
||||
pic_array = np.array(pil_img.getdata()).reshape((img_shape[0], img_shape[1], 4)) / 255.0
|
||||
i1, i2 = np.where(pic_array[:, :, 3] == 0)
|
||||
pic_array = (pic_array[:, :, :3] - MEANS) / STDS
|
||||
pic_array[i1, i2] = [0, 0, 0]
|
||||
return pic_array
|
||||
|
||||
|
||||
def apply_policy(policy, img, use_mean_std=True):
|
||||
"""Apply the `policy` to the numpy `img`.
|
||||
|
||||
Args:
|
||||
policy: A list of tuples with the form (name, probability, level) where
|
||||
`name` is the name of the augmentation operation to apply, `probability`
|
||||
is the probability of applying the operation and `level` is what strength
|
||||
the operation to apply.
|
||||
img: Numpy image that will have `policy` applied to it.
|
||||
|
||||
Returns:
|
||||
The result of applying `policy` to `img`.
|
||||
"""
|
||||
img_shape = img.shape
|
||||
pil_img = pil_wrap(img, use_mean_std)
|
||||
for xform in policy:
|
||||
assert len(xform) == 3
|
||||
name, probability, level = xform
|
||||
xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(
|
||||
probability, level, img_shape)
|
||||
pil_img = xform_fn(pil_img)
|
||||
return pil_unwrap(pil_img, use_mean_std, img_shape)
|
||||
|
||||
|
||||
class TransformFunction(object):
|
||||
"""Wraps the Transform function for pretty printing options."""
|
||||
|
||||
def __init__(self, func, name):
|
||||
self.f = func
|
||||
self.name = name
|
||||
|
||||
def __repr__(self):
|
||||
return '<' + self.name + '>'
|
||||
|
||||
def __call__(self, pil_img):
|
||||
return self.f(pil_img)
|
||||
|
||||
|
||||
class TransformT(object):
|
||||
"""Each instance of this class represents a specific transform."""
|
||||
|
||||
def __init__(self, name, xform_fn):
|
||||
self.name = name
|
||||
self.xform = xform_fn
|
||||
|
||||
def pil_transformer(self, probability, level, img_shape):
|
||||
|
||||
def return_function(im):
|
||||
if random.random() < probability:
|
||||
im = self.xform(im, level, img_shape)
|
||||
return im
|
||||
|
||||
name = self.name + '({:.1f},{})'.format(probability, level)
|
||||
return TransformFunction(return_function, name)
|
||||
|
||||
|
||||
################## Transform Functions ##################
|
||||
identity = TransformT('identity', lambda pil_img, level, _: pil_img)
|
||||
flip_lr = TransformT(
|
||||
'FlipLR',
|
||||
lambda pil_img, level, _: pil_img.transpose(Image.FLIP_LEFT_RIGHT))
|
||||
flip_ud = TransformT(
|
||||
'FlipUD',
|
||||
lambda pil_img, level, _: pil_img.transpose(Image.FLIP_TOP_BOTTOM))
|
||||
# pylint:disable=g-long-lambda
|
||||
auto_contrast = TransformT(
|
||||
'AutoContrast',
|
||||
lambda pil_img, level, _: ImageOps.autocontrast(
|
||||
pil_img.convert('RGB')).convert('RGBA'))
|
||||
equalize = TransformT(
|
||||
'Equalize',
|
||||
lambda pil_img, level, _: ImageOps.equalize(
|
||||
pil_img.convert('RGB')).convert('RGBA'))
|
||||
invert = TransformT(
|
||||
'Invert',
|
||||
lambda pil_img, level, _: ImageOps.invert(
|
||||
pil_img.convert('RGB')).convert('RGBA'))
|
||||
# pylint:enable=g-long-lambda
|
||||
blur = TransformT(
|
||||
'Blur', lambda pil_img, level, _: pil_img.filter(ImageFilter.BLUR))
|
||||
smooth = TransformT(
|
||||
'Smooth',
|
||||
lambda pil_img, level, _: pil_img.filter(ImageFilter.SMOOTH))
|
||||
|
||||
|
||||
def _rotate_impl(pil_img, level, _):
|
||||
"""Rotates `pil_img` from -30 to 30 degrees depending on `level`."""
|
||||
degrees = int_parameter(level, 30)
|
||||
if random.random() > 0.5:
|
||||
degrees = -degrees
|
||||
return pil_img.rotate(degrees)
|
||||
|
||||
|
||||
rotate = TransformT('Rotate', _rotate_impl)
|
||||
|
||||
|
||||
def _posterize_impl(pil_img, level, _):
|
||||
"""Applies PIL Posterize to `pil_img`."""
|
||||
level = int_parameter(level, 4)
|
||||
return ImageOps.posterize(pil_img.convert('RGB'), 4 - level).convert('RGBA')
|
||||
|
||||
|
||||
posterize = TransformT('Posterize', _posterize_impl)
|
||||
|
||||
|
||||
def _shear_x_impl(pil_img, level, img_shape):
|
||||
"""Applies PIL ShearX to `pil_img`.
|
||||
|
||||
The ShearX operation shears the image along the horizontal axis with `level`
|
||||
magnitude.
|
||||
|
||||
Args:
|
||||
pil_img: Image in PIL object.
|
||||
level: Strength of the operation specified as an Integer from
|
||||
[0, `PARAMETER_MAX`].
|
||||
|
||||
Returns:
|
||||
A PIL Image that has had ShearX applied to it.
|
||||
"""
|
||||
level = float_parameter(level, 0.3)
|
||||
if random.random() > 0.5:
|
||||
level = -level
|
||||
return pil_img.transform(
|
||||
(img_shape[0], img_shape[1]),
|
||||
Image.AFFINE,
|
||||
(1, level, 0, 0, 1, 0))
|
||||
|
||||
|
||||
shear_x = TransformT('ShearX', _shear_x_impl)
|
||||
|
||||
|
||||
def _shear_y_impl(pil_img, level, img_shape):
|
||||
"""Applies PIL ShearY to `pil_img`.
|
||||
|
||||
The ShearY operation shears the image along the vertical axis with `level`
|
||||
magnitude.
|
||||
|
||||
Args:
|
||||
pil_img: Image in PIL object.
|
||||
level: Strength of the operation specified as an Integer from
|
||||
[0, `PARAMETER_MAX`].
|
||||
|
||||
Returns:
|
||||
A PIL Image that has had ShearX applied to it.
|
||||
"""
|
||||
level = float_parameter(level, 0.3)
|
||||
if random.random() > 0.5:
|
||||
level = -level
|
||||
return pil_img.transform(
|
||||
(img_shape[0], img_shape[1]),
|
||||
Image.AFFINE,
|
||||
(1, 0, 0, level, 1, 0))
|
||||
|
||||
|
||||
shear_y = TransformT('ShearY', _shear_y_impl)
|
||||
|
||||
|
||||
def _translate_x_impl(pil_img, level, img_shape):
|
||||
"""Applies PIL TranslateX to `pil_img`.
|
||||
|
||||
Translate the image in the horizontal direction by `level`
|
||||
number of pixels.
|
||||
|
||||
Args:
|
||||
pil_img: Image in PIL object.
|
||||
level: Strength of the operation specified as an Integer from
|
||||
[0, `PARAMETER_MAX`].
|
||||
|
||||
Returns:
|
||||
A PIL Image that has had TranslateX applied to it.
|
||||
"""
|
||||
level = int_parameter(level, 10)
|
||||
if random.random() > 0.5:
|
||||
level = -level
|
||||
return pil_img.transform(
|
||||
(img_shape[0], img_shape[1]),
|
||||
Image.AFFINE,
|
||||
(1, 0, level, 0, 1, 0))
|
||||
|
||||
|
||||
translate_x = TransformT('TranslateX', _translate_x_impl)
|
||||
|
||||
|
||||
def _translate_y_impl(pil_img, level, img_shape):
|
||||
"""Applies PIL TranslateY to `pil_img`.
|
||||
|
||||
Translate the image in the vertical direction by `level`
|
||||
number of pixels.
|
||||
|
||||
Args:
|
||||
pil_img: Image in PIL object.
|
||||
level: Strength of the operation specified as an Integer from
|
||||
[0, `PARAMETER_MAX`].
|
||||
|
||||
Returns:
|
||||
A PIL Image that has had TranslateY applied to it.
|
||||
"""
|
||||
level = int_parameter(level, 10)
|
||||
if random.random() > 0.5:
|
||||
level = -level
|
||||
return pil_img.transform(
|
||||
(img_shape[0], img_shape[1]),
|
||||
Image.AFFINE,
|
||||
(1, 0, 0, 0, 1, level))
|
||||
|
||||
|
||||
translate_y = TransformT('TranslateY', _translate_y_impl)
|
||||
|
||||
|
||||
def _crop_impl(pil_img, level, img_shape, interpolation=Image.BILINEAR):
|
||||
"""Applies a crop to `pil_img` with the size depending on the `level`."""
|
||||
cropped = pil_img.crop((level, level, img_shape[0] - level, img_shape[1] - level))
|
||||
resized = cropped.resize((img_shape[0], img_shape[1]), interpolation)
|
||||
return resized
|
||||
|
||||
|
||||
crop_bilinear = TransformT('CropBilinear', _crop_impl)
|
||||
|
||||
|
||||
def _solarize_impl(pil_img, level, _):
|
||||
"""Applies PIL Solarize to `pil_img`.
|
||||
|
||||
Translate the image in the vertical direction by `level`
|
||||
number of pixels.
|
||||
|
||||
Args:
|
||||
pil_img: Image in PIL object.
|
||||
level: Strength of the operation specified as an Integer from
|
||||
[0, `PARAMETER_MAX`].
|
||||
|
||||
Returns:
|
||||
A PIL Image that has had Solarize applied to it.
|
||||
"""
|
||||
level = int_parameter(level, 256)
|
||||
return ImageOps.solarize(pil_img.convert('RGB'), 256 - level).convert('RGBA')
|
||||
|
||||
|
||||
solarize = TransformT('Solarize', _solarize_impl)
|
||||
|
||||
|
||||
def _cutout_pil_impl(pil_img, level, img_shape):
|
||||
"""Apply cutout to pil_img at the specified level."""
|
||||
size = int_parameter(level, 20)
|
||||
if size <= 0:
|
||||
return pil_img
|
||||
img_height, img_width, num_channels = (img_shape[0], img_shape[1], 3)
|
||||
_, upper_coord, lower_coord = (
|
||||
create_cutout_mask(img_height, img_width, num_channels, size))
|
||||
pixels = pil_img.load() # create the pixel map
|
||||
for i in range(upper_coord[0], lower_coord[0]): # for every col:
|
||||
for j in range(upper_coord[1], lower_coord[1]): # For every row
|
||||
pixels[i, j] = (125, 122, 113, 0) # set the colour accordingly
|
||||
return pil_img
|
||||
|
||||
cutout = TransformT('Cutout', _cutout_pil_impl)
|
||||
|
||||
|
||||
def _enhancer_impl(enhancer):
|
||||
"""Sets level to be between 0.1 and 1.8 for ImageEnhance transforms of PIL."""
|
||||
def impl(pil_img, level, _):
|
||||
v = float_parameter(level, 1.8) + .1 # going to 0 just destroys it
|
||||
return enhancer(pil_img).enhance(v)
|
||||
return impl
|
||||
|
||||
|
||||
color = TransformT('Color', _enhancer_impl(ImageEnhance.Color))
|
||||
contrast = TransformT('Contrast', _enhancer_impl(ImageEnhance.Contrast))
|
||||
brightness = TransformT('Brightness', _enhancer_impl(
|
||||
ImageEnhance.Brightness))
|
||||
sharpness = TransformT('Sharpness', _enhancer_impl(ImageEnhance.Sharpness))
|
||||
|
||||
ALL_TRANSFORMS = [
|
||||
flip_lr,
|
||||
flip_ud,
|
||||
auto_contrast,
|
||||
equalize,
|
||||
invert,
|
||||
rotate,
|
||||
posterize,
|
||||
crop_bilinear,
|
||||
solarize,
|
||||
color,
|
||||
contrast,
|
||||
brightness,
|
||||
sharpness,
|
||||
shear_x,
|
||||
shear_y,
|
||||
translate_x,
|
||||
translate_y,
|
||||
cutout,
|
||||
blur,
|
||||
smooth
|
||||
]
|
||||
|
||||
NAME_TO_TRANSFORM = {t.name: t for t in ALL_TRANSFORMS}
|
||||
TRANSFORM_NAMES = NAME_TO_TRANSFORM.keys()
|
1065
higher/smart_aug/old/dataug_old.py
Normal file
1065
higher/smart_aug/old/dataug_old.py
Normal file
File diff suppressed because it is too large
Load diff
85
higher/smart_aug/old/higher_repro.py
Normal file
85
higher/smart_aug/old/higher_repro.py
Normal file
|
@ -0,0 +1,85 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchvision
|
||||
import higher
|
||||
import time
|
||||
|
||||
data_train = torchvision.datasets.CIFAR10("./data", train=True, download=True, transform=torchvision.transforms.ToTensor())
|
||||
dl_train = torch.utils.data.DataLoader(data_train, batch_size=300, shuffle=True, num_workers=0, pin_memory=False)
|
||||
|
||||
|
||||
class Aug_model(nn.Module):
|
||||
def __init__(self, model, hyper_param=True):
|
||||
super(Aug_model, self).__init__()
|
||||
|
||||
#### Origin of the issue ? ####
|
||||
if hyper_param:
|
||||
self._params = nn.ParameterDict({
|
||||
"hyper_param": nn.Parameter(torch.Tensor([0.5])),
|
||||
})
|
||||
###############################
|
||||
|
||||
self._mods = nn.ModuleDict({
|
||||
'model': model,
|
||||
})
|
||||
|
||||
def forward(self, x):
|
||||
return self._mods['model'](x) #* self._params['hyper_param']
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._mods[key]
|
||||
|
||||
class Aug_model2(nn.Module): #Slow increase like no hyper_param
|
||||
def __init__(self, model, hyper_param=True):
|
||||
super(Aug_model2, self).__init__()
|
||||
|
||||
#### Origin of the issue ? ####
|
||||
if hyper_param:
|
||||
self._params = nn.ParameterDict({
|
||||
"hyper_param": nn.Parameter(torch.Tensor([0.5])),
|
||||
})
|
||||
###############################
|
||||
|
||||
self._mods = nn.ModuleDict({
|
||||
'model': model,
|
||||
'fmodel': higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
})
|
||||
|
||||
def forward(self, x):
|
||||
return self._mods['fmodel'](x) * self._params['hyper_param']
|
||||
|
||||
def get_diffopt(self, opt, track_higher_grads=True):
|
||||
return higher.optim.get_diff_optim(opt,
|
||||
self._mods['model'].parameters(),
|
||||
fmodel=self._mods['fmodel'],
|
||||
track_higher_grads=track_higher_grads)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._mods[key]
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
device = torch.device('cuda:1')
|
||||
aug_model = Aug_model2(
|
||||
model=torch.hub.load('pytorch/vision:v0.4.2', 'resnet18', pretrained=False),
|
||||
hyper_param=True #False will not extend step time
|
||||
).to(device)
|
||||
|
||||
inner_opt = torch.optim.SGD(aug_model['model'].parameters(), lr=1e-2, momentum=0.9)
|
||||
|
||||
#fmodel = higher.patch.monkeypatch(aug_model, device=None, copy_initial_weights=True)
|
||||
#diffopt = higher.optim.get_diff_optim(inner_opt, aug_model.parameters(),fmodel=fmodel,track_higher_grads=True)
|
||||
diffopt = aug_model.get_diffopt(inner_opt)
|
||||
|
||||
for i, (xs, ys) in enumerate(dl_train):
|
||||
xs, ys = xs.to(device), ys.to(device)
|
||||
|
||||
#logits = fmodel(xs)
|
||||
logits = aug_model(xs)
|
||||
loss = F.cross_entropy(F.log_softmax(logits, dim=1), ys, reduction='mean')
|
||||
|
||||
t = time.process_time()
|
||||
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
|
||||
#print(len(fmodel._fast_params),"step", time.process_time()-t)
|
||||
print(len(aug_model['fmodel']._fast_params),"step", time.process_time()-t)
|
502
higher/smart_aug/old/model_old.py
Normal file
502
higher/smart_aug/old/model_old.py
Normal file
|
@ -0,0 +1,502 @@
|
|||
import math
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
## Basic CNN ##
|
||||
class LeNet_F(nn.Module):
|
||||
def __init__(self, num_inp, num_out):
|
||||
super(LeNet_F, self).__init__()
|
||||
self._params = nn.ParameterDict({
|
||||
'w1': nn.Parameter(torch.zeros(20, num_inp, 5, 5)),
|
||||
'b1': nn.Parameter(torch.zeros(20)),
|
||||
'w2': nn.Parameter(torch.zeros(50, 20, 5, 5)),
|
||||
'b2': nn.Parameter(torch.zeros(50)),
|
||||
#'w3': nn.Parameter(torch.zeros(500,4*4*50)), #num_imp=1
|
||||
'w3': nn.Parameter(torch.zeros(500,5*5*50)), #num_imp=3
|
||||
'b3': nn.Parameter(torch.zeros(500)),
|
||||
'w4': nn.Parameter(torch.zeros(num_out, 500)),
|
||||
'b4': nn.Parameter(torch.zeros(num_out))
|
||||
})
|
||||
self.initialize()
|
||||
|
||||
|
||||
def initialize(self):
|
||||
nn.init.kaiming_uniform_(self._params["w1"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self._params["w2"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self._params["w3"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self._params["w4"], a=math.sqrt(5))
|
||||
|
||||
def forward(self, x):
|
||||
#print("Start Shape ", x.shape)
|
||||
out = F.relu(F.conv2d(input=x, weight=self._params["w1"], bias=self._params["b1"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.conv2d(input=out, weight=self._params["w2"], bias=self._params["b2"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = out.view(out.size(0), -1)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.linear(out, self._params["w3"], self._params["b3"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.linear(out, self._params["w4"], self._params["b4"])
|
||||
#print("Shape ", out.shape)
|
||||
#return F.log_softmax(out, dim=1)
|
||||
return out
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._params[key]
|
||||
|
||||
def __str__(self):
|
||||
return "LeNet"
|
||||
|
||||
|
||||
## MobileNetv2 ##
|
||||
|
||||
def _make_divisible(v, divisor, min_value=None):
|
||||
"""
|
||||
This function is taken from the original tf repo.
|
||||
It ensures that all layers have a channel number that is divisible by 8
|
||||
It can be seen here:
|
||||
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
|
||||
:param v:
|
||||
:param divisor:
|
||||
:param min_value:
|
||||
:return:
|
||||
"""
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
# Make sure that round down does not go down by more than 10%.
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
|
||||
class ConvBNReLU(nn.Sequential):
|
||||
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
|
||||
padding = (kernel_size - 1) // 2
|
||||
super(ConvBNReLU, self).__init__(
|
||||
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
|
||||
nn.BatchNorm2d(out_planes),
|
||||
nn.ReLU6(inplace=True)
|
||||
)
|
||||
|
||||
|
||||
class InvertedResidual(nn.Module):
|
||||
def __init__(self, inp, oup, stride, expand_ratio):
|
||||
super(InvertedResidual, self).__init__()
|
||||
self.stride = stride
|
||||
assert stride in [1, 2]
|
||||
|
||||
hidden_dim = int(round(inp * expand_ratio))
|
||||
self.use_res_connect = self.stride == 1 and inp == oup
|
||||
|
||||
layers = []
|
||||
if expand_ratio != 1:
|
||||
# pw
|
||||
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
|
||||
layers.extend([
|
||||
# dw
|
||||
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
|
||||
# pw-linear
|
||||
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
])
|
||||
self.conv = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
if self.use_res_connect:
|
||||
return x + self.conv(x)
|
||||
else:
|
||||
return self.conv(x)
|
||||
|
||||
|
||||
class MobileNetV2(nn.Module):
|
||||
def __init__(self,
|
||||
num_classes=1000,
|
||||
width_mult=1.0,
|
||||
inverted_residual_setting=None,
|
||||
round_nearest=8,
|
||||
block=None):
|
||||
"""
|
||||
MobileNet V2 main class
|
||||
Args:
|
||||
num_classes (int): Number of classes
|
||||
width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
|
||||
inverted_residual_setting: Network structure
|
||||
round_nearest (int): Round the number of channels in each layer to be a multiple of this number
|
||||
Set to 1 to turn off rounding
|
||||
block: Module specifying inverted residual building block for mobilenet
|
||||
"""
|
||||
super(MobileNetV2, self).__init__()
|
||||
|
||||
if block is None:
|
||||
block = InvertedResidual
|
||||
input_channel = 32
|
||||
last_channel = 1280
|
||||
|
||||
if inverted_residual_setting is None:
|
||||
inverted_residual_setting = [
|
||||
# t, c, n, s
|
||||
[1, 16, 1, 1],
|
||||
[6, 24, 2, 2],
|
||||
[6, 32, 3, 2],
|
||||
[6, 64, 4, 2],
|
||||
[6, 96, 3, 1],
|
||||
[6, 160, 3, 2],
|
||||
[6, 320, 1, 1],
|
||||
]
|
||||
|
||||
# only check the first element, assuming user knows t,c,n,s are required
|
||||
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
|
||||
raise ValueError("inverted_residual_setting should be non-empty "
|
||||
"or a 4-element list, got {}".format(inverted_residual_setting))
|
||||
|
||||
# building first layer
|
||||
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
|
||||
self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
|
||||
features = [ConvBNReLU(3, input_channel, stride=2)]
|
||||
# building inverted residual blocks
|
||||
for t, c, n, s in inverted_residual_setting:
|
||||
output_channel = _make_divisible(c * width_mult, round_nearest)
|
||||
for i in range(n):
|
||||
stride = s if i == 0 else 1
|
||||
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
|
||||
input_channel = output_channel
|
||||
# building last several layers
|
||||
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
|
||||
# make it nn.Sequential
|
||||
self.features = nn.Sequential(*features)
|
||||
|
||||
# building classifier
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Dropout(0.2),
|
||||
nn.Linear(self.last_channel, num_classes),
|
||||
)
|
||||
|
||||
# weight initialization
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out')
|
||||
if m.bias is not None:
|
||||
nn.init.zeros_(m.bias)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.ones_(m.weight)
|
||||
nn.init.zeros_(m.bias)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01)
|
||||
nn.init.zeros_(m.bias)
|
||||
|
||||
def _forward_impl(self, x):
|
||||
# This exists since TorchScript doesn't support inheritance, so the superclass method
|
||||
# (this one) needs to have a name other than `forward` that can be accessed in a subclass
|
||||
x = self.features(x)
|
||||
x = x.mean([2, 3])
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
def forward(self, x):
|
||||
return self._forward_impl(x)
|
||||
|
||||
def __str__(self):
|
||||
return "MobileNetV2"
|
||||
|
||||
## ResNet ##
|
||||
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
||||
"""3x3 convolution with padding"""
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
||||
padding=dilation, groups=groups, bias=False, dilation=dilation)
|
||||
|
||||
|
||||
def conv1x1(in_planes, out_planes, stride=1):
|
||||
"""1x1 convolution"""
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
__constants__ = ['downsample']
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
|
||||
base_width=64, dilation=1, norm_layer=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
if groups != 1 or base_width != 64:
|
||||
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
|
||||
if dilation > 1:
|
||||
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
|
||||
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
|
||||
self.conv1 = conv3x3(inplanes, planes, stride)
|
||||
self.bn1 = norm_layer(planes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = norm_layer(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
__constants__ = ['downsample']
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
|
||||
base_width=64, dilation=1, norm_layer=None):
|
||||
super(Bottleneck, self).__init__()
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
width = int(planes * (base_width / 64.)) * groups
|
||||
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
|
||||
self.conv1 = conv1x1(inplanes, width)
|
||||
self.bn1 = norm_layer(width)
|
||||
self.conv2 = conv3x3(width, width, stride, groups, dilation)
|
||||
self.bn2 = norm_layer(width)
|
||||
self.conv3 = conv1x1(width, planes * self.expansion)
|
||||
self.bn3 = norm_layer(planes * self.expansion)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
#ResNet18 : block=BasicBlock, layers=[2, 2, 2, 2]
|
||||
class ResNet(nn.Module):
|
||||
|
||||
def __init__(self, block=BasicBlock, layers=[2, 2, 2, 2], num_classes=1000, zero_init_residual=False,
|
||||
groups=1, width_per_group=64, replace_stride_with_dilation=None,
|
||||
norm_layer=None):
|
||||
super(ResNet, self).__init__()
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
self._norm_layer = norm_layer
|
||||
|
||||
self.inplanes = 64
|
||||
self.dilation = 1
|
||||
if replace_stride_with_dilation is None:
|
||||
# each element in the tuple indicates if we should replace
|
||||
# the 2x2 stride with a dilated convolution instead
|
||||
replace_stride_with_dilation = [False, False, False]
|
||||
if len(replace_stride_with_dilation) != 3:
|
||||
raise ValueError("replace_stride_with_dilation should be None "
|
||||
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
|
||||
self.groups = groups
|
||||
self.base_width = width_per_group
|
||||
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
|
||||
bias=False)
|
||||
self.bn1 = norm_layer(self.inplanes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
|
||||
dilate=replace_stride_with_dilation[0])
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
|
||||
dilate=replace_stride_with_dilation[1])
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
|
||||
dilate=replace_stride_with_dilation[2])
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
# Zero-initialize the last BN in each residual branch,
|
||||
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
|
||||
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
|
||||
if zero_init_residual:
|
||||
for m in self.modules():
|
||||
if isinstance(m, Bottleneck):
|
||||
nn.init.constant_(m.bn3.weight, 0)
|
||||
elif isinstance(m, BasicBlock):
|
||||
nn.init.constant_(m.bn2.weight, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
|
||||
norm_layer = self._norm_layer
|
||||
downsample = None
|
||||
previous_dilation = self.dilation
|
||||
if dilate:
|
||||
self.dilation *= stride
|
||||
stride = 1
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
conv1x1(self.inplanes, planes * block.expansion, stride),
|
||||
norm_layer(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
|
||||
self.base_width, previous_dilation, norm_layer))
|
||||
self.inplanes = planes * block.expansion
|
||||
for _ in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes, groups=self.groups,
|
||||
base_width=self.base_width, dilation=self.dilation,
|
||||
norm_layer=norm_layer))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def _forward_impl(self, x):
|
||||
# See note [TorchScript super()]
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = torch.flatten(x, 1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
def forward(self, x):
|
||||
return self._forward_impl(x)
|
||||
|
||||
def __str__(self):
|
||||
return "ResNet18"
|
||||
|
||||
## Wide ResNet ##
|
||||
#https://github.com/xternalz/WideResNet-pytorch/blob/master/wideresnet.py
|
||||
#https://github.com/arcelien/pba/blob/master/pba/wrn.py
|
||||
#https://github.com/szagoruyko/wide-residual-networks/blob/master/pytorch/resnet.py
|
||||
'''
|
||||
class BasicBlock(nn.Module):
|
||||
def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.bn1 = nn.BatchNorm2d(in_planes)
|
||||
self.relu1 = nn.ReLU(inplace=True)
|
||||
self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(out_planes)
|
||||
self.relu2 = nn.ReLU(inplace=True)
|
||||
self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
|
||||
padding=1, bias=False)
|
||||
self.droprate = dropRate
|
||||
self.equalInOut = (in_planes == out_planes)
|
||||
self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
|
||||
padding=0, bias=False) or None
|
||||
def forward(self, x):
|
||||
if not self.equalInOut:
|
||||
x = self.relu1(self.bn1(x))
|
||||
else:
|
||||
out = self.relu1(self.bn1(x))
|
||||
out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
|
||||
if self.droprate > 0:
|
||||
out = F.dropout(out, p=self.droprate, training=self.training)
|
||||
out = self.conv2(out)
|
||||
return torch.add(x if self.equalInOut else self.convShortcut(x), out)
|
||||
|
||||
class NetworkBlock(nn.Module):
|
||||
def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
|
||||
super(NetworkBlock, self).__init__()
|
||||
self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
|
||||
def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
|
||||
layers = []
|
||||
for i in range(int(nb_layers)):
|
||||
layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
|
||||
return nn.Sequential(*layers)
|
||||
def forward(self, x):
|
||||
return self.layer(x)
|
||||
|
||||
#wrn_size: 32 = WRN-28-2 ? 160 = WRN-28-10
|
||||
class WideResNet(nn.Module):
|
||||
#def __init__(self, depth, num_classes, widen_factor=1, dropRate=0.0):
|
||||
def __init__(self, num_classes, wrn_size, depth=28, dropRate=0.0):
|
||||
super(WideResNet, self).__init__()
|
||||
|
||||
self.kernel_size = wrn_size
|
||||
self.depth=depth
|
||||
filter_size = 3
|
||||
nChannels = [min(self.kernel_size, 16), self.kernel_size, self.kernel_size * 2, self.kernel_size * 4]
|
||||
strides = [1, 2, 2] # stride for each resblock
|
||||
|
||||
#nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
|
||||
assert((depth - 4) % 6 == 0)
|
||||
n = (depth - 4) / 6
|
||||
block = BasicBlock
|
||||
# 1st conv before any network block
|
||||
self.conv1 = nn.Conv2d(filter_size, nChannels[0], kernel_size=3, stride=1,
|
||||
padding=1, bias=False)
|
||||
# 1st block
|
||||
self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, strides[0], dropRate)
|
||||
# 2nd block
|
||||
self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, strides[1], dropRate)
|
||||
# 3rd block
|
||||
self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, strides[2], dropRate)
|
||||
# global average pooling and classifier
|
||||
self.bn1 = nn.BatchNorm2d(nChannels[3])
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.fc = nn.Linear(nChannels[3], num_classes)
|
||||
self.nChannels = nChannels[3]
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
m.weight.data.fill_(1)
|
||||
m.bias.data.zero_()
|
||||
elif isinstance(m, nn.Linear):
|
||||
m.bias.data.zero_()
|
||||
def forward(self, x):
|
||||
out = self.conv1(x)
|
||||
out = self.block1(out)
|
||||
out = self.block2(out)
|
||||
out = self.block3(out)
|
||||
out = self.relu(self.bn1(out))
|
||||
out = F.avg_pool2d(out, 8)
|
||||
out = out.view(-1, self.nChannels)
|
||||
return self.fc(out)
|
||||
|
||||
def architecture(self):
|
||||
return super(WideResNet, self).__str__()
|
||||
|
||||
def __str__(self):
|
||||
return "WideResNet(s{}-d{})".format(self.kernel_size, self.depth)
|
||||
'''
|
150
higher/smart_aug/old/test_lr.py
Executable file
150
higher/smart_aug/old/test_lr.py
Executable file
|
@ -0,0 +1,150 @@
|
|||
import numpy as np
|
||||
import json, math, time, os
|
||||
|
||||
from torch.utils.data import SubsetRandomSampler
|
||||
import torch.optim as optim
|
||||
import higher
|
||||
from model import *
|
||||
|
||||
import copy
|
||||
|
||||
BATCH_SIZE = 300
|
||||
TEST_SIZE = 300
|
||||
|
||||
mnist_train = torchvision.datasets.MNIST(
|
||||
"./data", train=True, download=True,
|
||||
transform=torchvision.transforms.Compose([
|
||||
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
||||
torchvision.transforms.ToTensor()
|
||||
])
|
||||
)
|
||||
|
||||
mnist_test = torchvision.datasets.MNIST(
|
||||
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
||||
)
|
||||
|
||||
#train_subset_indices=range(int(len(mnist_train)/2))
|
||||
train_subset_indices=range(BATCH_SIZE)
|
||||
val_subset_indices=range(int(len(mnist_train)/2),len(mnist_train))
|
||||
|
||||
dl_train = torch.utils.data.DataLoader(mnist_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
|
||||
dl_val = torch.utils.data.DataLoader(mnist_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
|
||||
dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=TEST_SIZE, shuffle=False)
|
||||
|
||||
|
||||
def test(model):
|
||||
model.eval()
|
||||
for i, (features, labels) in enumerate(dl_test):
|
||||
pred = model.forward(features)
|
||||
return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
|
||||
|
||||
def train_classic(model, optim, epochs=1):
|
||||
model.train()
|
||||
log = []
|
||||
for epoch in range(epochs):
|
||||
t0 = time.process_time()
|
||||
for i, (features, labels) in enumerate(dl_train):
|
||||
|
||||
optim.zero_grad()
|
||||
pred = model.forward(features)
|
||||
loss = F.cross_entropy(pred,labels)
|
||||
loss.backward()
|
||||
optim.step()
|
||||
|
||||
#### Log ####
|
||||
tf = time.process_time()
|
||||
data={
|
||||
"time": tf - t0,
|
||||
}
|
||||
log.append(data)
|
||||
|
||||
times = [x["time"] for x in log]
|
||||
print("Vanilla : acc", test(model), "in (ms):", np.mean(times), "+/-", np.std(times))
|
||||
##########################################
|
||||
if __name__ == "__main__":
|
||||
|
||||
device = torch.device('cpu')
|
||||
|
||||
model = LeNet(1,10)
|
||||
opt_param = {
|
||||
"lr": torch.tensor(1e-2).requires_grad_(),
|
||||
"momentum": torch.tensor(0.9).requires_grad_()
|
||||
}
|
||||
n_inner_iter = 1
|
||||
dl_train_it = iter(dl_train)
|
||||
dl_val_it = iter(dl_val)
|
||||
epoch = 0
|
||||
epochs = 10
|
||||
|
||||
####
|
||||
train_classic(model=model, optim=torch.optim.Adam(model.parameters(), lr=0.001), epochs=epochs)
|
||||
model = LeNet(1,10)
|
||||
|
||||
meta_opt = torch.optim.Adam(opt_param.values(), lr=1e-2)
|
||||
inner_opt = torch.optim.SGD(model.parameters(), lr=opt_param['lr'], momentum=opt_param['momentum'])
|
||||
#for xs_val, ys_val in dl_val:
|
||||
while epoch < epochs:
|
||||
#print(data_aug.params["mag"], data_aug.params["mag"].grad)
|
||||
meta_opt.zero_grad()
|
||||
model.train()
|
||||
with higher.innerloop_ctx(model, inner_opt, copy_initial_weights=True, track_higher_grads=True) as (fmodel, diffopt): #effet copy_initial_weight pas clair...
|
||||
|
||||
for param_group in diffopt.param_groups:
|
||||
param_group['lr'] = opt_param['lr']
|
||||
param_group['momentum'] = opt_param['momentum']
|
||||
|
||||
for i in range(n_inner_iter):
|
||||
try:
|
||||
xs, ys = next(dl_train_it)
|
||||
except StopIteration: #Fin epoch train
|
||||
epoch +=1
|
||||
dl_train_it = iter(dl_train)
|
||||
xs, ys = next(dl_train_it)
|
||||
|
||||
print('Epoch', epoch)
|
||||
print('train loss',loss.item(), '/ val loss', val_loss.item())
|
||||
print('acc', test(model))
|
||||
print('opt : lr', opt_param['lr'].item(), 'momentum', opt_param['momentum'].item())
|
||||
print('-'*9)
|
||||
model.train()
|
||||
|
||||
|
||||
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
|
||||
loss = F.cross_entropy(logits, ys) # no need to call loss.backwards()
|
||||
#print('loss',loss.item())
|
||||
diffopt.step(loss) # note that `step` must take `loss` as an argument!
|
||||
# The line above gets P[t+1] from P[t] and loss[t]. `step` also returns
|
||||
# these new parameters, as an alternative to getting them from
|
||||
# `fmodel.fast_params` or `fmodel.parameters()` after calling
|
||||
# `diffopt.step`.
|
||||
|
||||
# At this point, or at any point in the iteration, you can take the
|
||||
# gradient of `fmodel.parameters()` (or equivalently
|
||||
# `fmodel.fast_params`) w.r.t. `fmodel.parameters(time=0)` (equivalently
|
||||
# `fmodel.init_fast_params`). i.e. `fast_params` will always have
|
||||
# `grad_fn` as an attribute, and be part of the gradient tape.
|
||||
|
||||
# At the end of your inner loop you can obtain these e.g. ...
|
||||
#grad_of_grads = torch.autograd.grad(
|
||||
# meta_loss_fn(fmodel.parameters()), fmodel.parameters(time=0))
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val_it)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
|
||||
val_logits = fmodel(xs_val)
|
||||
val_loss = F.cross_entropy(val_logits, ys_val)
|
||||
#print('val_loss',val_loss.item())
|
||||
|
||||
val_loss.backward()
|
||||
#meta_grads = torch.autograd.grad(val_loss, opt_lr, allow_unused=True)
|
||||
#print(meta_grads)
|
||||
for param_group in diffopt.param_groups:
|
||||
print(param_group['lr'], '/',param_group['lr'].grad)
|
||||
print(param_group['momentum'], '/',param_group['momentum'].grad)
|
||||
|
||||
#model=copy.deepcopy(fmodel)
|
||||
model.load_state_dict(fmodel.state_dict())
|
||||
|
||||
meta_opt.step()
|
866
higher/smart_aug/old/train_utils_old.py
Normal file
866
higher/smart_aug/old/train_utils_old.py
Normal file
|
@ -0,0 +1,866 @@
|
|||
import torch
|
||||
#import torch.optim
|
||||
import torchvision
|
||||
import higher
|
||||
|
||||
from datasets import *
|
||||
from utils import *
|
||||
|
||||
def train_classic_higher(model, epochs=1):
|
||||
device = next(model.parameters()).device
|
||||
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
|
||||
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
|
||||
|
||||
model.train()
|
||||
dl_val_it = iter(dl_val)
|
||||
log = []
|
||||
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
diffopt = higher.optim.get_diff_optim(optim, model.parameters(),fmodel=fmodel,track_higher_grads=False)
|
||||
#with higher.innerloop_ctx(model, optim, copy_initial_weights=True, track_higher_grads=False) as (fmodel, diffopt):
|
||||
|
||||
for epoch in range(epochs):
|
||||
#print_torch_mem("Start epoch "+str(epoch))
|
||||
#print("Fast param ",len(fmodel._fast_params))
|
||||
t0 = time.process_time()
|
||||
for i, (features, labels) in enumerate(dl_train):
|
||||
#print_torch_mem("Start iter")
|
||||
features,labels = features.to(device), labels.to(device)
|
||||
|
||||
#optim.zero_grad()
|
||||
logits = model.forward(features)
|
||||
pred = F.log_softmax(logits, dim=1)
|
||||
loss = F.cross_entropy(pred,labels)
|
||||
#.backward()
|
||||
#optim.step()
|
||||
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
|
||||
|
||||
model_copy(src=fmodel, dst=model, patch_copy=False)
|
||||
optim_copy(dopt=diffopt, opt=optim)
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
diffopt = higher.optim.get_diff_optim(optim, model.parameters(),fmodel=fmodel,track_higher_grads=False)
|
||||
|
||||
#### Tests ####
|
||||
tf = time.process_time()
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
|
||||
|
||||
val_loss = F.cross_entropy(model(xs_val), ys_val)
|
||||
accuracy, _ =test(model)
|
||||
model.train()
|
||||
#### Log ####
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": None,
|
||||
}
|
||||
log.append(data)
|
||||
|
||||
return log
|
||||
|
||||
def train_classic_tests(model, epochs=1):
|
||||
device = next(model.parameters()).device
|
||||
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
|
||||
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
|
||||
|
||||
countcopy=0
|
||||
model.train()
|
||||
dl_val_it = iter(dl_val)
|
||||
log = []
|
||||
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
doptim = higher.optim.get_diff_optim(optim, model.parameters(), fmodel=fmodel, track_higher_grads=False)
|
||||
for epoch in range(epochs):
|
||||
print_torch_mem("Start epoch")
|
||||
print(len(fmodel._fast_params))
|
||||
t0 = time.process_time()
|
||||
#with higher.innerloop_ctx(model, optim, copy_initial_weights=True, track_higher_grads=True) as (fmodel, doptim):
|
||||
|
||||
#fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
#doptim = higher.optim.get_diff_optim(optim, model.parameters(), track_higher_grads=True)
|
||||
|
||||
for i, (features, labels) in enumerate(dl_train):
|
||||
features,labels = features.to(device), labels.to(device)
|
||||
|
||||
#with higher.innerloop_ctx(model, optim, copy_initial_weights=True, track_higher_grads=False) as (fmodel, doptim):
|
||||
|
||||
|
||||
#optim.zero_grad()
|
||||
pred = fmodel.forward(features)
|
||||
loss = F.cross_entropy(pred,labels)
|
||||
doptim.step(loss) #(opt.zero_grad, loss.backward, opt.step)
|
||||
#loss.backward()
|
||||
#new_params = doptim.step(loss, params=fmodel.parameters())
|
||||
#fmodel.update_params(new_params)
|
||||
|
||||
|
||||
#print('Fast param',len(fmodel._fast_params))
|
||||
#print('opt state', type(doptim.state[0][0]['momentum_buffer']), doptim.state[0][2]['momentum_buffer'].shape)
|
||||
|
||||
if False or (len(fmodel._fast_params)>1):
|
||||
print("fmodel fast param",len(fmodel._fast_params))
|
||||
'''
|
||||
#val_loss = F.cross_entropy(fmodel(features), labels)
|
||||
|
||||
#print_graph(val_loss)
|
||||
|
||||
#val_loss.backward()
|
||||
#print('bip')
|
||||
|
||||
tmp = fmodel.parameters()
|
||||
|
||||
#print(list(tmp)[1])
|
||||
tmp = [higher.utils._copy_tensor(t,safe_copy=True) if isinstance(t, torch.Tensor) else t for t in tmp]
|
||||
#print(len(tmp))
|
||||
|
||||
#fmodel._fast_params.clear()
|
||||
del fmodel._fast_params
|
||||
fmodel._fast_params=None
|
||||
|
||||
fmodel.fast_params=tmp # Surcharge la memoire
|
||||
#fmodel.update_params(tmp) #Meilleur perf / Surcharge la memoire avec trach higher grad
|
||||
|
||||
#optim._fmodel=fmodel
|
||||
'''
|
||||
|
||||
|
||||
countcopy+=1
|
||||
model_copy(src=fmodel, dst=model, patch_copy=False)
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
#doptim.detach_dyn()
|
||||
#tmp = doptim.state
|
||||
#tmp = doptim.state_dict()
|
||||
#for k, v in tmp['state'].items():
|
||||
# print('dict',k, type(v))
|
||||
|
||||
a = optim.param_groups[0]['params'][0]
|
||||
state = optim.state[a]
|
||||
#state['momentum_buffer'] = None
|
||||
#print('opt state', type(optim.state[a]), len(optim.state[a]))
|
||||
#optim.load_state_dict(tmp)
|
||||
|
||||
|
||||
for group_idx, group in enumerate(optim.param_groups):
|
||||
# print('gp idx',group_idx)
|
||||
for p_idx, p in enumerate(group['params']):
|
||||
optim.state[p]=doptim.state[group_idx][p_idx]
|
||||
|
||||
#print('opt state', type(optim.state[a]['momentum_buffer']), optim.state[a]['momentum_buffer'][0:10])
|
||||
#print('dopt state', type(doptim.state[0][0]['momentum_buffer']), doptim.state[0][0]['momentum_buffer'][0:10])
|
||||
'''
|
||||
for a in tmp:
|
||||
#print(type(a), len(a))
|
||||
for nb, b in a.items():
|
||||
#print(nb, type(b), len(b))
|
||||
for n, state in b.items():
|
||||
#print(n, type(states))
|
||||
#print(state.grad_fn)
|
||||
state = torch.tensor(state.data).requires_grad_()
|
||||
#print(state.grad_fn)
|
||||
'''
|
||||
|
||||
|
||||
doptim = higher.optim.get_diff_optim(optim, model.parameters(), track_higher_grads=True)
|
||||
#doptim.state = tmp
|
||||
|
||||
|
||||
countcopy+=1
|
||||
model_copy(src=fmodel, dst=model)
|
||||
optim_copy(dopt=diffopt, opt=inner_opt)
|
||||
|
||||
#### Tests ####
|
||||
tf = time.process_time()
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
|
||||
|
||||
val_loss = F.cross_entropy(model(xs_val), ys_val)
|
||||
accuracy, _ =test(model)
|
||||
model.train()
|
||||
#### Log ####
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": None,
|
||||
}
|
||||
log.append(data)
|
||||
|
||||
#countcopy+=1
|
||||
#model_copy(src=fmodel, dst=model, patch_copy=False)
|
||||
#optim.load_state_dict(doptim.state_dict()) #Besoin sauver etat otpim ?
|
||||
|
||||
print("Copy ", countcopy)
|
||||
return log
|
||||
|
||||
|
||||
from torchvision.datasets.vision import VisionDataset
|
||||
from PIL import Image
|
||||
import augmentation_transforms
|
||||
import numpy as np
|
||||
class AugmentedDatasetV2(VisionDataset):
|
||||
def __init__(self, root, train=True, transform=None, target_transform=None, download=False, subset=None):
|
||||
|
||||
super(AugmentedDatasetV2, self).__init__(root, transform=transform, target_transform=target_transform)
|
||||
|
||||
supervised_dataset = torchvision.datasets.CIFAR10(root, train=train, download=download, transform=transform)
|
||||
|
||||
self.sup_data = supervised_dataset.data if not subset else supervised_dataset.data[subset[0]:subset[1]]
|
||||
self.sup_targets = supervised_dataset.targets if not subset else supervised_dataset.targets[subset[0]:subset[1]]
|
||||
assert len(self.sup_data)==len(self.sup_targets)
|
||||
|
||||
for idx, img in enumerate(self.sup_data):
|
||||
self.sup_data[idx]= Image.fromarray(img) #to PIL Image
|
||||
|
||||
self.unsup_data=[]
|
||||
self.unsup_targets=[]
|
||||
self.origin_idx=[]
|
||||
|
||||
self.dataset_info= {
|
||||
'name': 'CIFAR10',
|
||||
'sup': len(self.sup_data),
|
||||
'unsup': len(self.unsup_data),
|
||||
'length': len(self.sup_data)+len(self.unsup_data),
|
||||
}
|
||||
|
||||
|
||||
self._TF = [
|
||||
## Geometric TF ##
|
||||
'Rotate',
|
||||
'TranslateX',
|
||||
'TranslateY',
|
||||
'ShearX',
|
||||
'ShearY',
|
||||
|
||||
'Cutout',
|
||||
|
||||
## Color TF ##
|
||||
'Contrast',
|
||||
'Color',
|
||||
'Brightness',
|
||||
'Sharpness',
|
||||
'Posterize',
|
||||
'Solarize',
|
||||
|
||||
'Invert',
|
||||
'AutoContrast',
|
||||
'Equalize',
|
||||
]
|
||||
self._op_list =[]
|
||||
self.prob=0.5
|
||||
self.mag_range=(1, 10)
|
||||
for tf in self._TF:
|
||||
for mag in range(self.mag_range[0], self.mag_range[1]):
|
||||
self._op_list+=[(tf, self.prob, mag)]
|
||||
self._nb_op = len(self._op_list)
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""
|
||||
Args:
|
||||
index (int): Index
|
||||
|
||||
Returns:
|
||||
tuple: (image, target) where target is index of the target class.
|
||||
"""
|
||||
aug_img, origin_img, target = self.unsup_data[index], self.sup_data[self.origin_idx[index]], self.unsup_targets[index]
|
||||
|
||||
# doing this so that it is consistent with all other datasets
|
||||
# to return a PIL Image
|
||||
#img = Image.fromarray(img)
|
||||
|
||||
if self.transform is not None:
|
||||
aug_img = self.transform(aug_img)
|
||||
origin_img = self.transform(origin_img)
|
||||
|
||||
if self.target_transform is not None:
|
||||
target = self.target_transform(target)
|
||||
|
||||
return aug_img, origin_img, target
|
||||
|
||||
def augement_data(self, aug_copy=1):
|
||||
|
||||
policies = []
|
||||
for op_1 in self._op_list:
|
||||
for op_2 in self._op_list:
|
||||
policies += [[op_1, op_2]]
|
||||
|
||||
for idx, image in enumerate(self.sup_data):
|
||||
if idx%(self.dataset_info['sup']/5)==0: print("Augmenting data... ", idx,"/", self.dataset_info['sup'])
|
||||
#if idx==10000:break
|
||||
|
||||
for _ in range(aug_copy):
|
||||
chosen_policy = policies[np.random.choice(len(policies))]
|
||||
aug_image = augmentation_transforms.apply_policy(chosen_policy, image, use_mean_std=False) #Cast en float image
|
||||
#aug_image = augmentation_transforms.cutout_numpy(aug_image)
|
||||
|
||||
self.unsup_data+=[(aug_image*255.).astype(self.sup_data.dtype)]#Cast float image to uint8
|
||||
self.unsup_targets+=[self.sup_targets[idx]]
|
||||
self.origin_idx+=[idx]
|
||||
|
||||
#self.unsup_data=(np.array(self.unsup_data)*255.).astype(self.sup_data.dtype) #Cast float image to uint8
|
||||
self.unsup_data=np.array(self.unsup_data)
|
||||
|
||||
assert len(self.unsup_data)==len(self.unsup_targets)
|
||||
|
||||
self.dataset_info['unsup']=len(self.unsup_data)
|
||||
self.dataset_info['length']=self.dataset_info['sup']+self.dataset_info['unsup']
|
||||
|
||||
|
||||
def __len__(self):
|
||||
return self.dataset_info['unsup']#self.dataset_info['length']
|
||||
|
||||
def __str__(self):
|
||||
return "CIFAR10(Sup:{}-Unsup:{}-{}TF(Mag{}-{}))".format(self.dataset_info['sup'], self.dataset_info['unsup'], len(self._TF), self.mag_range[0], self.mag_range[1])
|
||||
|
||||
def train_UDA(model, dl_unsup, opt_param, epochs=1, print_freq=1):
|
||||
"""Training of a model using UDA inspired approach.
|
||||
|
||||
Intended to be used alongside an already augmented dataset (see AugmentedDatasetV2).
|
||||
|
||||
Args:
|
||||
model (nn.Module): Model to train.
|
||||
dl_unsup (Dataloader): Data loader of unsupervised/augmented data.
|
||||
opt_param (dict): Dictionnary containing optimizers parameters.
|
||||
epochs (int): Number of epochs to perform. (default: 1)
|
||||
print_freq (int): Number of epoch between display of the state of training. If set to None, no display will be done. (default:1)
|
||||
|
||||
Returns:
|
||||
(list) Logs of training. Each items is a dict containing results of an epoch.
|
||||
"""
|
||||
device = next(model.parameters()).device
|
||||
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
|
||||
opt = torch.optim.SGD(model.parameters(), lr=opt_param['Inner']['lr'], momentum=opt_param['Inner']['momentum']) #lr=1e-2 / momentum=0.9
|
||||
|
||||
|
||||
model.train()
|
||||
dl_val_it = iter(dl_val)
|
||||
dl_unsup_it =iter(dl_unsup)
|
||||
log = []
|
||||
for epoch in range(epochs):
|
||||
#print_torch_mem("Start epoch")
|
||||
t0 = time.process_time()
|
||||
for i, (features, labels) in enumerate(dl_train):
|
||||
#print_torch_mem("Start iter")
|
||||
features,labels = features.to(device), labels.to(device)
|
||||
|
||||
optim.zero_grad()
|
||||
#Supervised
|
||||
logits = model.forward(features)
|
||||
pred = F.log_softmax(logits, dim=1)
|
||||
sup_loss = F.cross_entropy(pred,labels)
|
||||
|
||||
#Unsupervised
|
||||
try:
|
||||
aug_xs, origin_xs, ys = next(dl_unsup_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_unsup_it =iter(dl_unsup)
|
||||
aug_xs, origin_xs, ys = next(dl_unsup_it)
|
||||
aug_xs, origin_xs, ys = aug_xs.to(device), origin_xs.to(device), ys.to(device)
|
||||
|
||||
#print(aug_xs.shape, origin_xs.shape, ys.shape)
|
||||
sup_logits = model.forward(origin_xs)
|
||||
unsup_logits = model.forward(aug_xs)
|
||||
|
||||
log_sup=F.log_softmax(sup_logits, dim=1)
|
||||
log_unsup=F.log_softmax(unsup_logits, dim=1)
|
||||
#KL div w/ logits
|
||||
unsup_loss = F.softmax(sup_logits, dim=1)*(log_sup-log_unsup)
|
||||
unsup_loss=unsup_loss.sum(dim=-1).mean()
|
||||
|
||||
#print(unsup_loss)
|
||||
unsupp_coeff = 1
|
||||
loss = sup_loss + unsup_loss * unsupp_coeff
|
||||
|
||||
loss.backward()
|
||||
optim.step()
|
||||
|
||||
#### Tests ####
|
||||
tf = time.process_time()
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
|
||||
|
||||
val_loss = F.cross_entropy(model(xs_val), ys_val)
|
||||
accuracy, _ =test(model)
|
||||
model.train()
|
||||
|
||||
#### Print ####
|
||||
if(print_freq and epoch%print_freq==0):
|
||||
print('-'*9)
|
||||
print('Epoch : %d/%d'%(epoch,epochs))
|
||||
print('Time : %.00f'%(tf - t0))
|
||||
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
|
||||
print('Sup Loss :', sup_loss.item(), '/ unsup_loss :', unsup_loss.item())
|
||||
print('Accuracy :', accuracy)
|
||||
|
||||
#### Log ####
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": None,
|
||||
}
|
||||
log.append(data)
|
||||
|
||||
return log
|
||||
|
||||
|
||||
def run_simple_dataug(inner_it, epochs=1):
|
||||
device = next(model.parameters()).device
|
||||
dl_train_it = iter(dl_train)
|
||||
dl_val_it = iter(dl_val)
|
||||
|
||||
#aug_model = nn.Sequential(
|
||||
# Data_aug(),
|
||||
# LeNet(1,10),
|
||||
# )
|
||||
aug_model = Augmented_model(Data_aug(), LeNet(1,10)).to(device)
|
||||
print(str(aug_model))
|
||||
meta_opt = torch.optim.Adam(aug_model['data_aug'].parameters(), lr=1e-2)
|
||||
inner_opt = torch.optim.SGD(aug_model['model'].parameters(), lr=1e-2, momentum=0.9)
|
||||
|
||||
log = []
|
||||
t0 = time.process_time()
|
||||
|
||||
epoch = 0
|
||||
while epoch < epochs:
|
||||
meta_opt.zero_grad()
|
||||
aug_model.train()
|
||||
with higher.innerloop_ctx(aug_model, inner_opt, copy_initial_weights=True, track_higher_grads=True) as (fmodel, diffopt): #effet copy_initial_weight pas clair...
|
||||
|
||||
for i in range(n_inner_iter):
|
||||
try:
|
||||
xs, ys = next(dl_train_it)
|
||||
except StopIteration: #Fin epoch train
|
||||
tf = time.process_time()
|
||||
epoch +=1
|
||||
dl_train_it = iter(dl_train)
|
||||
xs, ys = next(dl_train_it)
|
||||
|
||||
accuracy, _ =test(model)
|
||||
aug_model.train()
|
||||
|
||||
#### Print ####
|
||||
print('-'*9)
|
||||
print('Epoch %d/%d'%(epoch,epochs))
|
||||
print('train loss',loss.item(), '/ val loss', val_loss.item())
|
||||
print('acc', accuracy)
|
||||
print('mag', aug_model['data_aug']['mag'].item())
|
||||
|
||||
#### Log ####
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": aug_model['data_aug']['mag'].item(),
|
||||
}
|
||||
log.append(data)
|
||||
t0 = time.process_time()
|
||||
|
||||
xs, ys = xs.to(device), ys.to(device)
|
||||
|
||||
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
|
||||
|
||||
loss = F.cross_entropy(logits, ys) # no need to call loss.backwards()
|
||||
#loss.backward(retain_graph=True)
|
||||
#print(fmodel['model']._params['b4'].grad)
|
||||
#print('mag', fmodel['data_aug']['mag'].grad)
|
||||
|
||||
diffopt.step(loss) # note that `step` must take `loss` as an argument!
|
||||
# The line above gets P[t+1] from P[t] and loss[t]. `step` also returns
|
||||
# these new parameters, as an alternative to getting them from
|
||||
# `fmodel.fast_params` or `fmodel.parameters()` after calling
|
||||
# `diffopt.step`.
|
||||
|
||||
# At this point, or at any point in the iteration, you can take the
|
||||
# gradient of `fmodel.parameters()` (or equivalently
|
||||
# `fmodel.fast_params`) w.r.t. `fmodel.parameters(time=0)` (equivalently
|
||||
# `fmodel.init_fast_params`). i.e. `fast_params` will always have
|
||||
# `grad_fn` as an attribute, and be part of the gradient tape.
|
||||
|
||||
# At the end of your inner loop you can obtain these e.g. ...
|
||||
#grad_of_grads = torch.autograd.grad(
|
||||
# meta_loss_fn(fmodel.parameters()), fmodel.parameters(time=0))
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
|
||||
|
||||
fmodel.augment(mode=False)
|
||||
val_logits = fmodel(xs_val) #Validation sans transfornations !
|
||||
val_loss = F.cross_entropy(val_logits, ys_val)
|
||||
#print('val_loss',val_loss.item())
|
||||
val_loss.backward()
|
||||
|
||||
#print('mag', fmodel['data_aug']['mag'], '/', fmodel['data_aug']['mag'].grad)
|
||||
|
||||
#model=copy.deepcopy(fmodel)
|
||||
aug_model.load_state_dict(fmodel.state_dict()) #Do not copy gradient !
|
||||
#Copie des gradients
|
||||
for paramName, paramValue, in fmodel.named_parameters():
|
||||
for netCopyName, netCopyValue, in aug_model.named_parameters():
|
||||
if paramName == netCopyName:
|
||||
netCopyValue.grad = paramValue.grad
|
||||
|
||||
#print('mag', aug_model['data_aug']['mag'], '/', aug_model['data_aug']['mag'].grad)
|
||||
meta_opt.step()
|
||||
|
||||
plot_res(log, fig_name="res/{}-{} epochs- {} in_it".format(str(aug_model),epochs,inner_it))
|
||||
print('-'*9)
|
||||
times = [x["time"] for x in log]
|
||||
print(str(aug_model),": acc", max([x["acc"] for x in log]), "in (ms):", np.mean(times), "+/-", np.std(times))
|
||||
|
||||
def run_dist_dataug(model, epochs=1, inner_it=1, dataug_epoch_start=0):
|
||||
device = next(model.parameters()).device
|
||||
dl_train_it = iter(dl_train)
|
||||
dl_val_it = iter(dl_val)
|
||||
|
||||
meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=1e-3)
|
||||
inner_opt = torch.optim.SGD(model['model'].parameters(), lr=1e-2, momentum=0.9)
|
||||
|
||||
high_grad_track = True
|
||||
if dataug_epoch_start>0:
|
||||
model.augment(mode=False)
|
||||
high_grad_track = False
|
||||
|
||||
model.train()
|
||||
|
||||
log = []
|
||||
t0 = time.process_time()
|
||||
|
||||
countcopy=0
|
||||
val_loss=torch.tensor(0)
|
||||
opt_param=None
|
||||
|
||||
epoch = 0
|
||||
while epoch < epochs:
|
||||
meta_opt.zero_grad()
|
||||
with higher.innerloop_ctx(model, inner_opt, copy_initial_weights=True, override=opt_param, track_higher_grads=high_grad_track) as (fmodel, diffopt): #effet copy_initial_weight pas clair...
|
||||
|
||||
for i in range(n_inner_iter):
|
||||
try:
|
||||
xs, ys = next(dl_train_it)
|
||||
except StopIteration: #Fin epoch train
|
||||
tf = time.process_time()
|
||||
epoch +=1
|
||||
dl_train_it = iter(dl_train)
|
||||
xs, ys = next(dl_train_it)
|
||||
|
||||
#viz_sample_data(imgs=xs, labels=ys, fig_name='samples/data_sample_epoch{}_noTF'.format(epoch))
|
||||
#viz_sample_data(imgs=aug_model['data_aug'](xs), labels=ys, fig_name='samples/data_sample_epoch{}'.format(epoch))
|
||||
|
||||
accuracy, _ =test(model)
|
||||
model.train()
|
||||
|
||||
#### Print ####
|
||||
print('-'*9)
|
||||
print('Epoch : %d/%d'%(epoch,epochs))
|
||||
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
|
||||
print('Accuracy :', accuracy)
|
||||
print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
|
||||
print('TF Proba :', model['data_aug']['prob'].data)
|
||||
#print('proba grad',aug_model['data_aug']['prob'].grad)
|
||||
#############
|
||||
#### Log ####
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": [p for p in model['data_aug']['prob']],
|
||||
}
|
||||
log.append(data)
|
||||
#############
|
||||
|
||||
if epoch == dataug_epoch_start:
|
||||
print('Starting Data Augmention...')
|
||||
model.augment(mode=True)
|
||||
high_grad_track = True
|
||||
|
||||
t0 = time.process_time()
|
||||
|
||||
xs, ys = xs.to(device), ys.to(device)
|
||||
|
||||
'''
|
||||
#Methode exacte
|
||||
final_loss = 0
|
||||
for tf_idx in range(fmodel['data_aug']._nb_tf):
|
||||
fmodel['data_aug'].transf_idx=tf_idx
|
||||
logits = fmodel(xs)
|
||||
loss = F.cross_entropy(logits, ys)
|
||||
#loss.backward(retain_graph=True)
|
||||
#print('idx', tf_idx)
|
||||
#print(fmodel['data_aug']['prob'][tf_idx], fmodel['data_aug']['prob'][tf_idx].grad)
|
||||
final_loss += loss*fmodel['data_aug']['prob'][tf_idx] #Take it in the forward function ?
|
||||
|
||||
loss = final_loss
|
||||
'''
|
||||
#Methode uniforme
|
||||
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
|
||||
loss = F.cross_entropy(logits, ys, reduction='none') # no need to call loss.backwards()
|
||||
if fmodel._data_augmentation: #Weight loss
|
||||
w_loss = fmodel['data_aug'].loss_weight().to(device)
|
||||
loss = loss * w_loss
|
||||
loss = loss.mean()
|
||||
#'''
|
||||
|
||||
#to visualize computational graph
|
||||
#print_graph(loss)
|
||||
|
||||
#loss.backward(retain_graph=True)
|
||||
#print(fmodel['model']._params['b4'].grad)
|
||||
#print('prob grad', fmodel['data_aug']['prob'].grad)
|
||||
|
||||
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
|
||||
|
||||
try:
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_val_it = iter(dl_val)
|
||||
xs_val, ys_val = next(dl_val_it)
|
||||
xs_val, ys_val = xs_val.to(device), ys_val.to(device)
|
||||
|
||||
fmodel.augment(mode=False) #Validation sans transfornations !
|
||||
val_loss = F.cross_entropy(fmodel(xs_val), ys_val)
|
||||
|
||||
#print_graph(val_loss)
|
||||
|
||||
val_loss.backward()
|
||||
|
||||
countcopy+=1
|
||||
model_copy(src=fmodel, dst=model)
|
||||
optim_copy(dopt=diffopt, opt=inner_opt)
|
||||
|
||||
meta_opt.step()
|
||||
model['data_aug'].adjust_param() #Contrainte sum(proba)=1
|
||||
|
||||
print("Copy ", countcopy)
|
||||
return log
|
||||
|
||||
def run_dist_dataugV2(model, opt_param, epochs=1, inner_it=0, dataug_epoch_start=0, print_freq=1, KLdiv=False, loss_patience=None, save_sample=False):
|
||||
device = next(model.parameters()).device
|
||||
log = []
|
||||
countcopy=0
|
||||
val_loss=torch.tensor(0) #Necessaire si pas de metastep sur une epoch
|
||||
dl_val_it = iter(dl_val)
|
||||
|
||||
#if inner_it!=0:
|
||||
meta_opt = torch.optim.Adam(model['data_aug'].parameters(), lr=opt_param['Meta']['lr']) #lr=1e-2
|
||||
inner_opt = torch.optim.SGD(model['model'].parameters(), lr=opt_param['Inner']['lr'], momentum=opt_param['Inner']['momentum']) #lr=1e-2 / momentum=0.9
|
||||
|
||||
high_grad_track = True
|
||||
if inner_it == 0:
|
||||
high_grad_track=False
|
||||
if dataug_epoch_start!=0:
|
||||
model.augment(mode=False)
|
||||
high_grad_track = False
|
||||
|
||||
val_loss_monitor= None
|
||||
if loss_patience != None :
|
||||
if dataug_epoch_start==-1: val_loss_monitor = loss_monitor(patience=loss_patience, end_train=2) #1st limit = dataug start
|
||||
else: val_loss_monitor = loss_monitor(patience=loss_patience) #Val loss monitor (Not on val data : used by Dataug... => Test data)
|
||||
|
||||
model.train()
|
||||
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
|
||||
|
||||
meta_opt.zero_grad()
|
||||
|
||||
for epoch in range(1, epochs+1):
|
||||
#print_torch_mem("Start epoch "+str(epoch))
|
||||
#print(high_grad_track, fmodel._data_augmentation, len(fmodel._fast_params))
|
||||
t0 = time.process_time()
|
||||
#with higher.innerloop_ctx(model, inner_opt, copy_initial_weights=True, override=opt_param, track_higher_grads=high_grad_track) as (fmodel, diffopt):
|
||||
|
||||
for i, (xs, ys) in enumerate(dl_train):
|
||||
xs, ys = xs.to(device), ys.to(device)
|
||||
|
||||
#Methode exacte
|
||||
#final_loss = 0
|
||||
#for tf_idx in range(fmodel['data_aug']._nb_tf):
|
||||
# fmodel['data_aug'].transf_idx=tf_idx
|
||||
# logits = fmodel(xs)
|
||||
# loss = F.cross_entropy(logits, ys)
|
||||
# #loss.backward(retain_graph=True)
|
||||
# final_loss += loss*fmodel['data_aug']['prob'][tf_idx] #Take it in the forward function ?
|
||||
#loss = final_loss
|
||||
|
||||
if(not KLdiv):
|
||||
#Methode uniforme
|
||||
logits = fmodel(xs) # modified `params` can also be passed as a kwarg
|
||||
loss = F.cross_entropy(F.log_softmax(logits, dim=1), ys, reduction='none') # no need to call loss.backwards()
|
||||
|
||||
if fmodel._data_augmentation: #Weight loss
|
||||
w_loss = fmodel['data_aug'].loss_weight()#.to(device)
|
||||
loss = loss * w_loss
|
||||
loss = loss.mean()
|
||||
|
||||
else:
|
||||
#Methode KL div
|
||||
if fmodel._data_augmentation :
|
||||
fmodel.augment(mode=False)
|
||||
sup_logits = fmodel(xs)
|
||||
fmodel.augment(mode=True)
|
||||
else:
|
||||
sup_logits = fmodel(xs)
|
||||
log_sup=F.log_softmax(sup_logits, dim=1)
|
||||
loss = F.cross_entropy(log_sup, ys)
|
||||
|
||||
if fmodel._data_augmentation:
|
||||
aug_logits = fmodel(xs)
|
||||
log_aug=F.log_softmax(aug_logits, dim=1)
|
||||
|
||||
w_loss = fmodel['data_aug'].loss_weight() #Weight loss
|
||||
|
||||
#if epoch>50: #debut differe ?
|
||||
#KL div w/ logits - Similarite predictions (distributions)
|
||||
aug_loss = F.softmax(sup_logits, dim=1)*(log_sup-log_aug)
|
||||
aug_loss = aug_loss.sum(dim=-1)
|
||||
#aug_loss = F.kl_div(aug_logits, sup_logits, reduction='none')
|
||||
aug_loss = (w_loss * aug_loss).mean()
|
||||
|
||||
aug_loss += (F.cross_entropy(log_aug, ys , reduction='none') * w_loss).mean()
|
||||
|
||||
unsupp_coeff = 1
|
||||
loss += aug_loss * unsupp_coeff
|
||||
|
||||
#to visualize computational graph
|
||||
#print_graph(loss)
|
||||
|
||||
#loss.backward(retain_graph=True)
|
||||
#print(fmodel['model']._params['b4'].grad)
|
||||
#print('prob grad', fmodel['data_aug']['prob'].grad)
|
||||
|
||||
#t = time.process_time()
|
||||
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
|
||||
#print(len(fmodel._fast_params),"step", time.process_time()-t)
|
||||
|
||||
if(high_grad_track and i>0 and i%inner_it==0): #Perform Meta step
|
||||
#print("meta")
|
||||
|
||||
val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val) #+ fmodel['data_aug'].reg_loss()
|
||||
#print_graph(val_loss)
|
||||
|
||||
#t = time.process_time()
|
||||
val_loss.backward()
|
||||
#print("meta", time.process_time()-t)
|
||||
#print('proba grad',model['data_aug']['prob'].grad)
|
||||
if model['data_aug']['prob'].grad is None or model['data_aug']['mag'] is None:
|
||||
print("Warning no grad (iter",i,") :\n Prob-",model['data_aug']['prob'].grad,"\n Mag-", model['data_aug']['mag'].grad)
|
||||
|
||||
countcopy+=1
|
||||
model_copy(src=fmodel, dst=model)
|
||||
optim_copy(dopt=diffopt, opt=inner_opt)
|
||||
|
||||
torch.nn.utils.clip_grad_norm_(model['data_aug'].parameters(), max_norm=10, norm_type=2) #Prevent exploding grad with RNN
|
||||
|
||||
#if epoch>50:
|
||||
meta_opt.step()
|
||||
model['data_aug'].adjust_param(soft=False) #Contrainte sum(proba)=1
|
||||
try: #Dataugv6
|
||||
model['data_aug'].next_TF_set()
|
||||
except:
|
||||
pass
|
||||
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
|
||||
|
||||
meta_opt.zero_grad()
|
||||
|
||||
tf = time.process_time()
|
||||
|
||||
#viz_sample_data(imgs=xs, labels=ys, fig_name='samples/data_sample_epoch{}_noTF'.format(epoch))
|
||||
#viz_sample_data(imgs=model['data_aug'](xs), labels=ys, fig_name='samples/data_sample_epoch{}'.format(epoch), weight_labels=model['data_aug'].loss_weight())
|
||||
|
||||
if(not high_grad_track):
|
||||
countcopy+=1
|
||||
model_copy(src=fmodel, dst=model)
|
||||
optim_copy(dopt=diffopt, opt=inner_opt)
|
||||
val_loss = compute_vaLoss(model=fmodel, dl_it=dl_val_it, dl=dl_val)
|
||||
|
||||
#Necessaire pour reset higher (Accumule les fast_param meme avec track_higher_grads = False)
|
||||
fmodel = higher.patch.monkeypatch(model, device=None, copy_initial_weights=True)
|
||||
diffopt = higher.optim.get_diff_optim(inner_opt, model.parameters(),fmodel=fmodel, track_higher_grads=high_grad_track)
|
||||
|
||||
accuracy, test_loss =test(model)
|
||||
model.train()
|
||||
|
||||
#### Log ####
|
||||
#print(type(model['data_aug']) is dataug.Data_augV5)
|
||||
param = [{'p': p.item(), 'm':model['data_aug']['mag'].item()} for p in model['data_aug']['prob']] if model['data_aug']._shared_mag else [{'p': p.item(), 'm': m.item()} for p, m in zip(model['data_aug']['prob'], model['data_aug']['mag'])]
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": param #if isinstance(model['data_aug'], Data_augV5)
|
||||
#else [p.item() for p in model['data_aug']['prob']],
|
||||
}
|
||||
log.append(data)
|
||||
#############
|
||||
#### Print ####
|
||||
if(print_freq and epoch%print_freq==0):
|
||||
print('-'*9)
|
||||
print('Epoch : %d/%d'%(epoch,epochs))
|
||||
print('Time : %.00f'%(tf - t0))
|
||||
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
|
||||
print('Accuracy :', max([x["acc"] for x in log]))
|
||||
print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
|
||||
print('TF Proba :', model['data_aug']['prob'].data)
|
||||
#print('proba grad',model['data_aug']['prob'].grad)
|
||||
print('TF Mag :', model['data_aug']['mag'].data)
|
||||
#print('Mag grad',model['data_aug']['mag'].grad)
|
||||
#print('Reg loss:', model['data_aug'].reg_loss().item())
|
||||
#print('Aug loss', aug_loss.item())
|
||||
#############
|
||||
if val_loss_monitor :
|
||||
model.eval()
|
||||
val_loss_monitor.register(test_loss)#val_loss.item())
|
||||
if val_loss_monitor.end_training(): break #Stop training
|
||||
model.train()
|
||||
|
||||
if not model.is_augmenting() and (epoch == dataug_epoch_start or (val_loss_monitor and val_loss_monitor.limit_reached()==1)):
|
||||
print('Starting Data Augmention...')
|
||||
dataug_epoch_start = epoch
|
||||
model.augment(mode=True)
|
||||
if inner_it != 0: high_grad_track = True
|
||||
|
||||
try:
|
||||
viz_sample_data(imgs=xs, labels=ys, fig_name='samples/data_sample_epoch{}_noTF'.format(epoch))
|
||||
viz_sample_data(imgs=model['data_aug'](xs), labels=ys, fig_name='samples/data_sample_epoch{}'.format(epoch), weight_labels=model['data_aug'].loss_weight())
|
||||
except:
|
||||
print("Couldn't save finals samples")
|
||||
pass
|
||||
|
||||
#print("Copy ", countcopy)
|
||||
return log
|
161
higher/smart_aug/old/utils_old.py
Normal file
161
higher/smart_aug/old/utils_old.py
Normal file
|
@ -0,0 +1,161 @@
|
|||
import numpy as np
|
||||
import json, math, time, os
|
||||
import matplotlib.pyplot as plt
|
||||
import copy
|
||||
import gc
|
||||
|
||||
from torchviz import make_dot
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
import time
|
||||
|
||||
class timer():
|
||||
def __init__(self):
|
||||
self._start_time=time.time()
|
||||
def exec_time(self):
|
||||
end = time.time()
|
||||
res = end-self._start_time
|
||||
self._start_time=end
|
||||
return res
|
||||
|
||||
def plot_res(log, fig_name='res', param_names=None):
|
||||
|
||||
epochs = [x["epoch"] for x in log]
|
||||
|
||||
fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
|
||||
|
||||
ax[0].set_title('Loss')
|
||||
ax[0].plot(epochs,[x["train_loss"] for x in log], label='Train')
|
||||
ax[0].plot(epochs,[x["val_loss"] for x in log], label='Val')
|
||||
ax[0].legend()
|
||||
|
||||
ax[1].set_title('Acc')
|
||||
ax[1].plot(epochs,[x["acc"] for x in log])
|
||||
|
||||
if log[0]["param"]!= None:
|
||||
if isinstance(log[0]["param"],float):
|
||||
ax[2].set_title('Mag')
|
||||
ax[2].plot(epochs,[x["param"] for x in log], label='Mag')
|
||||
ax[2].legend()
|
||||
else :
|
||||
ax[2].set_title('Prob')
|
||||
#for idx, _ in enumerate(log[0]["param"]):
|
||||
#ax[2].plot(epochs,[x["param"][idx] for x in log], label='P'+str(idx))
|
||||
if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])]
|
||||
proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])]
|
||||
ax[2].stackplot(epochs, proba, labels=param_names)
|
||||
ax[2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
|
||||
|
||||
|
||||
fig_name = fig_name.replace('.',',')
|
||||
plt.savefig(fig_name)
|
||||
plt.close()
|
||||
|
||||
def plot_res_compare(filenames, fig_name='res'):
|
||||
|
||||
all_data=[]
|
||||
#legend=""
|
||||
for idx, file in enumerate(filenames):
|
||||
#legend+=str(idx)+'-'+file+'\n'
|
||||
with open(file) as json_file:
|
||||
data = json.load(json_file)
|
||||
all_data.append(data)
|
||||
|
||||
n_tf = [len(x["Param_names"]) for x in all_data]
|
||||
acc = [x["Accuracy"] for x in all_data]
|
||||
time = [x["Time"][0] for x in all_data]
|
||||
|
||||
fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
|
||||
|
||||
ax[0].plot(n_tf, acc)
|
||||
ax[1].plot(n_tf, time)
|
||||
|
||||
ax[0].set_title('Acc')
|
||||
ax[1].set_title('Time')
|
||||
#for a in ax: a.legend()
|
||||
|
||||
fig_name = fig_name.replace('.',',')
|
||||
plt.savefig(fig_name, bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
def plot_TF_res(log, tf_names, fig_name='res'):
|
||||
|
||||
mean = np.mean([x["param"] for x in log], axis=0)
|
||||
std = np.std([x["param"] for x in log], axis=0)
|
||||
|
||||
fig, ax = plt.subplots(1, 1, figsize=(30, 8), sharey=True)
|
||||
ax.bar(tf_names, mean, yerr=std)
|
||||
#ax.bar(tf_names, log[-1]["param"])
|
||||
|
||||
fig_name = fig_name.replace('.',',')
|
||||
plt.savefig(fig_name, bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
def model_copy(src,dst, patch_copy=True, copy_grad=True):
|
||||
#model=copy.deepcopy(fmodel) #Pas approprie, on ne souhaite que les poids/grad (pas tout fmodel et ses etats)
|
||||
|
||||
dst.load_state_dict(src.state_dict()) #Do not copy gradient !
|
||||
|
||||
if patch_copy:
|
||||
dst['model'].load_state_dict(src['model'].state_dict()) #Copie donnee manquante ?
|
||||
dst['data_aug'].load_state_dict(src['data_aug'].state_dict())
|
||||
|
||||
#Copie des gradients
|
||||
if copy_grad:
|
||||
for paramName, paramValue, in src.named_parameters():
|
||||
for netCopyName, netCopyValue, in dst.named_parameters():
|
||||
if paramName == netCopyName:
|
||||
netCopyValue.grad = paramValue.grad
|
||||
#netCopyValue=copy.deepcopy(paramValue)
|
||||
|
||||
try: #Data_augV4
|
||||
dst['data_aug']._input_info = src['data_aug']._input_info
|
||||
dst['data_aug']._TF_matrix = src['data_aug']._TF_matrix
|
||||
except:
|
||||
pass
|
||||
|
||||
def optim_copy(dopt, opt):
|
||||
|
||||
#inner_opt.load_state_dict(diffopt.state_dict()) #Besoin sauver etat otpim (momentum, etc.) => Ne copie pas le state...
|
||||
#opt_param=higher.optim.get_trainable_opt_params(diffopt)
|
||||
|
||||
for group_idx, group in enumerate(opt.param_groups):
|
||||
# print('gp idx',group_idx)
|
||||
for p_idx, p in enumerate(group['params']):
|
||||
opt.state[p]=dopt.state[group_idx][p_idx]
|
||||
|
||||
class loss_monitor(): #Voir https://github.com/pytorch/ignite
|
||||
def __init__(self, patience, end_train=1):
|
||||
self.patience = patience
|
||||
self.end_train = end_train
|
||||
self.counter = 0
|
||||
self.best_score = None
|
||||
self.reached_limit = 0
|
||||
|
||||
def register(self, loss):
|
||||
if self.best_score is None:
|
||||
self.best_score = loss
|
||||
elif loss > self.best_score:
|
||||
self.counter += 1
|
||||
#if not self.reached_limit:
|
||||
print("loss no improve counter", self.counter, self.reached_limit)
|
||||
else:
|
||||
self.best_score = loss
|
||||
self.counter = 0
|
||||
def limit_reached(self):
|
||||
if self.counter >= self.patience:
|
||||
self.counter = 0
|
||||
self.reached_limit +=1
|
||||
self.best_score = None
|
||||
return self.reached_limit
|
||||
|
||||
def end_training(self):
|
||||
if self.limit_reached() >= self.end_train:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def reset(self):
|
||||
self.__init__(self.patience, self.end_train)
|
163
higher/smart_aug/test_brutus.py
Executable file
163
higher/smart_aug/test_brutus.py
Executable file
|
@ -0,0 +1,163 @@
|
|||
from model import *
|
||||
from dataug import *
|
||||
#from utils import *
|
||||
from train_utils import *
|
||||
|
||||
tf_names = [
|
||||
## Geometric TF ##
|
||||
'Identity',
|
||||
'FlipUD',
|
||||
'FlipLR',
|
||||
'Rotate',
|
||||
'TranslateX',
|
||||
'TranslateY',
|
||||
'ShearX',
|
||||
'ShearY',
|
||||
|
||||
## Color TF (Expect image in the range of [0, 1]) ##
|
||||
'Contrast',
|
||||
'Color',
|
||||
'Brightness',
|
||||
'Sharpness',
|
||||
'Posterize',
|
||||
'Solarize', #=>Image entre [0,1] #Pas opti pour des batch
|
||||
]
|
||||
|
||||
device = torch.device('cuda')
|
||||
|
||||
if device == torch.device('cpu'):
|
||||
device_name = 'CPU'
|
||||
else:
|
||||
device_name = torch.cuda.get_device_name(device)
|
||||
|
||||
##########################################
|
||||
if __name__ == "__main__":
|
||||
|
||||
|
||||
n_inner_iter = 1
|
||||
epochs = 150
|
||||
dataug_epoch_start=0
|
||||
optim_param={
|
||||
'Meta':{
|
||||
'optim':'Adam',
|
||||
'lr':1e-2, #1e-2
|
||||
},
|
||||
'Inner':{
|
||||
'optim': 'SGD',
|
||||
'lr':1e-1, #1e-2
|
||||
'momentum':0.9, #0.9
|
||||
}
|
||||
}
|
||||
|
||||
#model = LeNet(3,10)
|
||||
#model = ResNet(num_classes=10)
|
||||
#model = MobileNetV2(num_classes=10)
|
||||
#model = WideResNet(num_classes=10, wrn_size=32)
|
||||
|
||||
tf_dict = {k: TF.TF_dict[k] for k in tf_names}
|
||||
|
||||
####
|
||||
'''
|
||||
t0 = time.process_time()
|
||||
|
||||
aug_model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
|
||||
|
||||
print("{} on {} for {} epochs - {} inner_it".format(str(aug_model), device_name, epochs, n_inner_iter))
|
||||
log= run_dist_dataugV2(model=aug_model, epochs=epochs, inner_it=n_inner_iter, dataug_epoch_start=dataug_epoch_start, print_freq=10, KLdiv=True, loss_patience=None)
|
||||
|
||||
exec_time=time.process_time() - t0
|
||||
####
|
||||
times = [x["time"] for x in log]
|
||||
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), "Device": device_name, "Param_names": aug_model.TF_names(), "Log": log}
|
||||
filename = "{}-{} epochs (dataug:{})- {} in_it".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter)
|
||||
with open("res/log/%s.json" % filename, "w+") as f:
|
||||
json.dump(out, f, indent=True)
|
||||
print('Log :\"',f.name, '\" saved !')
|
||||
'''
|
||||
|
||||
####
|
||||
'''
|
||||
t0 = time.process_time()
|
||||
|
||||
aug_model = Augmented_model(Data_augV5(TF_dict=tf_dict, N_TF=3, mix_dist=0.0, fixed_prob=False, fixed_mag=False, shared_mag=False), model).to(device)
|
||||
|
||||
print("{} on {} for {} epochs - {} inner_it".format(str(aug_model), device_name, epochs, n_inner_iter))
|
||||
log= run_dist_dataugV2(model=aug_model, epochs=epochs, inner_it=n_inner_iter, dataug_epoch_start=dataug_epoch_start, print_freq=10, KLdiv=True, loss_patience=None)
|
||||
|
||||
exec_time=time.process_time() - t0
|
||||
####
|
||||
times = [x["time"] for x in log]
|
||||
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), "Device": device_name, "Param_names": aug_model.TF_names(), "Log": log}
|
||||
filename = "{}-{} epochs (dataug:{})- {} in_it".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter)
|
||||
with open("res/log/%s.json" % filename, "w+") as f:
|
||||
json.dump(out, f, indent=True)
|
||||
print('Log :\"',f.name, '\" saved !')
|
||||
'''
|
||||
res_folder="../res/brutus-tests2/"
|
||||
epochs= 150
|
||||
inner_its = [1]
|
||||
dist_mix = [0.0, 0.5, 0.8, 1.0]
|
||||
dataug_epoch_starts= [0]
|
||||
tf_dict = {k: TF.TF_dict[k] for k in tf_names}
|
||||
TF_nb = [len(tf_dict)] #range(10,len(TF.TF_dict)+1) #[len(TF.TF_dict)]
|
||||
N_seq_TF= [4, 3, 2]
|
||||
mag_setup = [(True,True), (False, False)] #(Fixed, Shared)
|
||||
#prob_setup = [True, False]
|
||||
nb_run= 3
|
||||
|
||||
try:
|
||||
os.mkdir(res_folder)
|
||||
os.mkdir(res_folder+"log/")
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
for n_inner_iter in inner_its:
|
||||
for dataug_epoch_start in dataug_epoch_starts:
|
||||
for n_tf in N_seq_TF:
|
||||
for dist in dist_mix:
|
||||
#for i in TF_nb:
|
||||
for m_setup in mag_setup:
|
||||
#for p_setup in prob_setup:
|
||||
p_setup=False
|
||||
for run in range(nb_run):
|
||||
if (n_inner_iter == 0 and (m_setup!=(True,True) and p_setup!=True)) or (p_setup and dist!=0.0): continue #Autres setup inutiles sans meta-opti
|
||||
#keys = list(TF.TF_dict.keys())[0:i]
|
||||
#ntf_dict = {k: TF.TF_dict[k] for k in keys}
|
||||
|
||||
t0 = time.process_time()
|
||||
|
||||
model = ResNet(num_classes=10)
|
||||
model = Higher_model(model) #run_dist_dataugV3
|
||||
aug_model = Augmented_model(Data_augV5(TF_dict=tf_dict, N_TF=n_tf, mix_dist=dist, fixed_prob=p_setup, fixed_mag=m_setup[0], shared_mag=m_setup[1]), model).to(device)
|
||||
#aug_model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
|
||||
|
||||
print("{} on {} for {} epochs - {} inner_it".format(str(aug_model), device_name, epochs, n_inner_iter))
|
||||
log= run_dist_dataugV3(model=aug_model,
|
||||
epochs=epochs,
|
||||
inner_it=n_inner_iter,
|
||||
dataug_epoch_start=dataug_epoch_start,
|
||||
opt_param=optim_param,
|
||||
print_freq=50,
|
||||
KLdiv=True)
|
||||
|
||||
exec_time=time.process_time() - t0
|
||||
####
|
||||
print('-'*9)
|
||||
times = [x["time"] for x in log]
|
||||
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), 'Optimizer': optim_param, "Device": device_name, "Param_names": aug_model.TF_names(), "Log": log}
|
||||
print(str(aug_model),": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
|
||||
filename = "{}-{} epochs (dataug:{})- {} in_it-{}".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter, run)
|
||||
with open("../res/log/%s.json" % filename, "w+") as f:
|
||||
try:
|
||||
json.dump(out, f, indent=True)
|
||||
print('Log :\"',f.name, '\" saved !')
|
||||
except:
|
||||
print("Failed to save logs :",f.name)
|
||||
try:
|
||||
plot_resV2(log, fig_name="../res/"+filename, param_names=aug_model.TF_names())
|
||||
except:
|
||||
print("Failed to plot res")
|
||||
|
||||
print('Execution Time : %.00f '%(exec_time))
|
||||
print('-'*9)
|
||||
#'''
|
218
higher/smart_aug/test_dataug.py
Executable file
218
higher/smart_aug/test_dataug.py
Executable file
|
@ -0,0 +1,218 @@
|
|||
""" Script to run experiment on smart augmentation.
|
||||
|
||||
"""
|
||||
|
||||
from model import *
|
||||
from dataug import *
|
||||
#from utils import *
|
||||
from train_utils import *
|
||||
|
||||
# Use available TF (see transformations.py)
|
||||
tf_names = [
|
||||
## Geometric TF ##
|
||||
'Identity',
|
||||
'FlipUD',
|
||||
'FlipLR',
|
||||
'Rotate',
|
||||
'TranslateX',
|
||||
'TranslateY',
|
||||
'ShearX',
|
||||
'ShearY',
|
||||
|
||||
## Color TF (Expect image in the range of [0, 1]) ##
|
||||
'Contrast',
|
||||
'Color',
|
||||
'Brightness',
|
||||
'Sharpness',
|
||||
'Posterize',
|
||||
'Solarize', #=>Image entre [0,1] #Pas opti pour des batch
|
||||
|
||||
#Color TF (Common mag scale)
|
||||
#'+Contrast',
|
||||
#'+Color',
|
||||
#'+Brightness',
|
||||
#'+Sharpness',
|
||||
#'-Contrast',
|
||||
#'-Color',
|
||||
#'-Brightness',
|
||||
#'-Sharpness',
|
||||
#'=Posterize',
|
||||
#'=Solarize',
|
||||
|
||||
## Bad Tranformations ##
|
||||
# Bad Geometric TF #
|
||||
#'BShearX',
|
||||
#'BShearY',
|
||||
#'BTranslateX-',
|
||||
#'BTranslateX-',
|
||||
#'BTranslateY',
|
||||
#'BTranslateY-',
|
||||
|
||||
#'BadContrast',
|
||||
#'BadBrightness',
|
||||
|
||||
#'Random',
|
||||
#'RandBlend'
|
||||
|
||||
#Non fonctionnel
|
||||
#'Auto_Contrast', #Pas opti pour des batch (Super lent)
|
||||
#'Equalize',
|
||||
]
|
||||
|
||||
|
||||
device = torch.device('cuda') #Select device to use
|
||||
|
||||
if device == torch.device('cpu'):
|
||||
device_name = 'CPU'
|
||||
else:
|
||||
device_name = torch.cuda.get_device_name(device)
|
||||
|
||||
##########################################
|
||||
if __name__ == "__main__":
|
||||
|
||||
#Task to perform
|
||||
tasks={
|
||||
#'classic',
|
||||
#'aug_dataset', #Moved to old code
|
||||
'aug_model'
|
||||
}
|
||||
#Parameters
|
||||
n_inner_iter = 1
|
||||
epochs = 200
|
||||
dataug_epoch_start=0
|
||||
optim_param={
|
||||
'Meta':{
|
||||
'optim':'Adam',
|
||||
'lr':1e-2, #1e-2
|
||||
},
|
||||
'Inner':{
|
||||
'optim': 'SGD',
|
||||
'lr':1e-2, #1e-2
|
||||
'momentum':0.9, #0.9
|
||||
}
|
||||
}
|
||||
|
||||
#Models
|
||||
model = LeNet(3,10)
|
||||
#model = ResNet(num_classes=10)
|
||||
#Lents
|
||||
#model = MobileNetV2(num_classes=10)
|
||||
#model = WideResNet(num_classes=10, wrn_size=32)
|
||||
|
||||
#### Classic ####
|
||||
if 'classic' in tasks:
|
||||
t0 = time.process_time()
|
||||
model = model.to(device)
|
||||
|
||||
print("{} on {} for {} epochs".format(str(model), device_name, epochs))
|
||||
log= train_classic(model=model, opt_param=optim_param, epochs=epochs, print_freq=1)
|
||||
#log= train_classic_higher(model=model, epochs=epochs)
|
||||
|
||||
exec_time=time.process_time() - t0
|
||||
####
|
||||
print('-'*9)
|
||||
times = [x["time"] for x in log]
|
||||
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), 'Optimizer': optim_param['Inner'], "Device": device_name, "Log": log}
|
||||
print(str(model),": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
|
||||
filename = "{}-{} epochs".format(str(model),epochs)
|
||||
with open("../res/log/%s.json" % filename, "w+") as f:
|
||||
json.dump(out, f, indent=True)
|
||||
print('Log :\"',f.name, '\" saved !')
|
||||
|
||||
plot_res(log, fig_name="../res/"+filename)
|
||||
|
||||
print('Execution Time : %.00f '%(exec_time))
|
||||
print('-'*9)
|
||||
|
||||
|
||||
#### Augmented Dataset ####
|
||||
'''
|
||||
if 'aug_dataset' in tasks:
|
||||
|
||||
t0 = time.process_time()
|
||||
|
||||
#data_train_aug = AugmentedDataset("./data", train=True, download=download_data, transform=transform, subset=(0,int(len(data_train)/2)))
|
||||
#data_train_aug.augement_data(aug_copy=30)
|
||||
#print(data_train_aug)
|
||||
#dl_train = torch.utils.data.DataLoader(data_train_aug, batch_size=BATCH_SIZE, shuffle=True)
|
||||
|
||||
#xs, ys = next(iter(dl_train))
|
||||
#viz_sample_data(imgs=xs, labels=ys, fig_name='samples/data_sample_{}'.format(str(data_train_aug)))
|
||||
|
||||
#model = model.to(device)
|
||||
|
||||
#print("{} on {} for {} epochs".format(str(model), device_name, epochs))
|
||||
#log= train_classic(model=model, epochs=epochs, print_freq=10)
|
||||
##log= train_classic_higher(model=model, epochs=epochs)
|
||||
|
||||
data_train_aug = AugmentedDatasetV2("./data", train=True, download=download_data, transform=transform, subset=(0,int(len(data_train)/2)))
|
||||
data_train_aug.augement_data(aug_copy=1)
|
||||
print(data_train_aug)
|
||||
unsup_ratio = 5
|
||||
dl_unsup = torch.utils.data.DataLoader(data_train_aug, batch_size=BATCH_SIZE*unsup_ratio, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)
|
||||
|
||||
unsup_xs, sup_xs, ys = next(iter(dl_unsup))
|
||||
viz_sample_data(imgs=sup_xs, labels=ys, fig_name='samples/data_sample_{}'.format(str(data_train_aug)))
|
||||
viz_sample_data(imgs=unsup_xs, labels=ys, fig_name='samples/data_sample_{}_unsup'.format(str(data_train_aug)))
|
||||
|
||||
model = model.to(device)
|
||||
|
||||
print("{} on {} for {} epochs".format(str(model), device_name, epochs))
|
||||
log= train_UDA(model=model, dl_unsup=dl_unsup, epochs=epochs, opt_param=optim_param, print_freq=10)
|
||||
|
||||
exec_time=time.process_time() - t0
|
||||
####
|
||||
print('-'*9)
|
||||
times = [x["time"] for x in log]
|
||||
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), 'Optimizer': optim_param['Inner'], "Device": device_name, "Param_names": data_train_aug._TF, "Log": log}
|
||||
print(str(model),": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
|
||||
filename = "{}-{}-{} epochs".format(str(data_train_aug),str(model),epochs)
|
||||
with open("res/log/%s.json" % filename, "w+") as f:
|
||||
json.dump(out, f, indent=True)
|
||||
print('Log :\"',f.name, '\" saved !')
|
||||
|
||||
plot_res(log, fig_name="res/"+filename)
|
||||
|
||||
print('Execution Time : %.00f '%(exec_time))
|
||||
print('-'*9)
|
||||
'''
|
||||
|
||||
#### Augmented Model ####
|
||||
if 'aug_model' in tasks:
|
||||
t0 = time.process_time()
|
||||
|
||||
tf_dict = {k: TF.TF_dict[k] for k in tf_names}
|
||||
model = Higher_model(model) #run_dist_dataugV3
|
||||
aug_model = Augmented_model(Data_augV7(TF_dict=tf_dict, N_TF=3, mix_dist=0.8, fixed_prob=False, fixed_mag=False, shared_mag=False), model).to(device)
|
||||
#aug_model = Augmented_model(RandAug(TF_dict=tf_dict, N_TF=2), model).to(device)
|
||||
|
||||
print("{} on {} for {} epochs - {} inner_it".format(str(aug_model), device_name, epochs, n_inner_iter))
|
||||
log= run_dist_dataugV3(model=aug_model,
|
||||
epochs=epochs,
|
||||
inner_it=n_inner_iter,
|
||||
dataug_epoch_start=dataug_epoch_start,
|
||||
opt_param=optim_param,
|
||||
print_freq=20,
|
||||
KLdiv=True,
|
||||
hp_opt=False)
|
||||
|
||||
exec_time=time.process_time() - t0
|
||||
####
|
||||
print('-'*9)
|
||||
times = [x["time"] for x in log]
|
||||
out = {"Accuracy": max([x["acc"] for x in log]), "Time": (np.mean(times),np.std(times), exec_time), 'Optimizer': optim_param, "Device": device_name, "Param_names": aug_model.TF_names(), "Log": log}
|
||||
print(str(aug_model),": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
|
||||
filename = "{}-{} epochs (dataug:{})- {} in_it".format(str(aug_model),epochs,dataug_epoch_start,n_inner_iter)
|
||||
with open("res/log/%s.json" % filename, "w+") as f:
|
||||
try:
|
||||
json.dump(out, f, indent=True)
|
||||
print('Log :\"',f.name, '\" saved !')
|
||||
except:
|
||||
print("Failed to save logs :",f.name)
|
||||
try:
|
||||
plot_resV2(log, fig_name="res/"+filename, param_names=aug_model.TF_names())
|
||||
except:
|
||||
print("Failed to plot res")
|
||||
|
||||
print('Execution Time : %.00f '%(exec_time))
|
||||
print('-'*9)
|
336
higher/smart_aug/train_utils.py
Executable file
336
higher/smart_aug/train_utils.py
Executable file
|
@ -0,0 +1,336 @@
|
|||
""" Utilities function for training.
|
||||
|
||||
"""
|
||||
|
||||
import torch
|
||||
#import torch.optim
|
||||
import torchvision
|
||||
import higher
|
||||
|
||||
from datasets import *
|
||||
from utils import *
|
||||
|
||||
def test(model):
|
||||
"""Evaluate a model on test data.
|
||||
|
||||
Args:
|
||||
model (nn.Module): Model to test.
|
||||
|
||||
Returns:
|
||||
(float, Tensor) Returns the accuracy and test loss of the model.
|
||||
"""
|
||||
device = next(model.parameters()).device
|
||||
model.eval()
|
||||
|
||||
#for i, (features, labels) in enumerate(dl_test):
|
||||
# features,labels = features.to(device), labels.to(device)
|
||||
|
||||
# pred = model.forward(features)
|
||||
# return pred.argmax(dim=1).eq(labels).sum().item() / dl_test.batch_size * 100
|
||||
|
||||
correct = 0
|
||||
total = 0
|
||||
loss = []
|
||||
with torch.no_grad():
|
||||
for features, labels in dl_test:
|
||||
features,labels = features.to(device), labels.to(device)
|
||||
|
||||
outputs = model(features)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum().item()
|
||||
|
||||
loss.append(F.cross_entropy(outputs, labels).item())
|
||||
|
||||
accuracy = 100 * correct / total
|
||||
|
||||
return accuracy, np.mean(loss)
|
||||
|
||||
def compute_vaLoss(model, dl_it, dl):
|
||||
"""Evaluate a model on a batch of data.
|
||||
|
||||
Args:
|
||||
model (nn.Module): Model to evaluate.
|
||||
dl_it (Iterator): Data loader iterator.
|
||||
dl (DataLoader): Data loader.
|
||||
|
||||
Returns:
|
||||
(Tensor) Loss on a single batch of data.
|
||||
"""
|
||||
device = next(model.parameters()).device
|
||||
try:
|
||||
xs, ys = next(dl_it)
|
||||
except StopIteration: #Fin epoch val
|
||||
dl_it = iter(dl)
|
||||
xs, ys = next(dl_it)
|
||||
xs, ys = xs.to(device), ys.to(device)
|
||||
|
||||
model.eval() #Validation sans transfornations !
|
||||
return F.cross_entropy(F.log_softmax(model(xs), dim=1), ys)
|
||||
|
||||
def train_classic(model, opt_param, epochs=1, print_freq=1):
|
||||
"""Classic training of a model.
|
||||
|
||||
Args:
|
||||
model (nn.Module): Model to train.
|
||||
opt_param (dict): Dictionnary containing optimizers parameters.
|
||||
epochs (int): Number of epochs to perform. (default: 1)
|
||||
print_freq (int): Number of epoch between display of the state of training. If set to None, no display will be done. (default:1)
|
||||
|
||||
Returns:
|
||||
(list) Logs of training. Each items is a dict containing results of an epoch.
|
||||
"""
|
||||
device = next(model.parameters()).device
|
||||
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
|
||||
optim = torch.optim.SGD(model.parameters(), lr=opt_param['Inner']['lr'], momentum=opt_param['Inner']['momentum']) #lr=1e-2 / momentum=0.9
|
||||
|
||||
model.train()
|
||||
dl_val_it = iter(dl_val)
|
||||
log = []
|
||||
for epoch in range(epochs):
|
||||
#print_torch_mem("Start epoch")
|
||||
t0 = time.process_time()
|
||||
for i, (features, labels) in enumerate(dl_train):
|
||||
#print_torch_mem("Start iter")
|
||||
features,labels = features.to(device), labels.to(device)
|
||||
|
||||
optim.zero_grad()
|
||||
logits = model.forward(features)
|
||||
pred = F.log_softmax(logits, dim=1)
|
||||
loss = F.cross_entropy(pred,labels)
|
||||
loss.backward()
|
||||
optim.step()
|
||||
|
||||
#### Tests ####
|
||||
tf = time.process_time()
|
||||
|
||||
val_loss = compute_vaLoss(model=model, dl_it=dl_val_it, dl=dl_val)
|
||||
accuracy, _ =test(model)
|
||||
model.train()
|
||||
|
||||
#### Print ####
|
||||
if(print_freq and epoch%print_freq==0):
|
||||
print('-'*9)
|
||||
print('Epoch : %d/%d'%(epoch,epochs))
|
||||
print('Time : %.00f'%(tf - t0))
|
||||
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
|
||||
print('Accuracy :', accuracy)
|
||||
|
||||
#### Log ####
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"param": None,
|
||||
}
|
||||
log.append(data)
|
||||
|
||||
return log
|
||||
|
||||
def run_dist_dataugV3(model, opt_param, epochs=1, inner_it=1, dataug_epoch_start=0, print_freq=1, KLdiv=1, hp_opt=False, save_sample_freq=None):
|
||||
"""Training of an augmented model with higher.
|
||||
|
||||
This function is intended to be used with Augmented_model containing an Higher_model (see dataug.py).
|
||||
Ex : Augmented_model(Data_augV5(...), Higher_model(model))
|
||||
|
||||
Training loss can either be computed directly from augmented inputs (KLdiv=0).
|
||||
However, it is recommended to use the KLdiv loss computation, inspired from UDA, which combine original and augmented inputs to compute the loss (KLdiv>0).
|
||||
See : https://github.com/google-research/uda
|
||||
|
||||
Args:
|
||||
model (nn.Module): Augmented model to train.
|
||||
opt_param (dict): Dictionnary containing optimizers parameters.
|
||||
epochs (int): Number of epochs to perform. (default: 1)
|
||||
inner_it (int): Number of inner iteration before a meta-step. 0 inner iteration means there's no meta-step. (default: 1)
|
||||
dataug_epoch_start (int): Epoch when to start data augmentation. (default: 0)
|
||||
print_freq (int): Number of epoch between display of the state of training. If set to None, no display will be done. (default:1)
|
||||
KLdiv (float): Proportion of the KLdiv loss added to the supervised loss. If set to 0, the loss is classicly computed on augmented inputs. (default: 1)
|
||||
hp_opt (bool): Wether to learn inner optimizer parameters. (default: False)
|
||||
save_sample_freq (int): Number of epochs between saves of samples of data. If set to None, only one save would be done at the end of the training. (default: None)
|
||||
|
||||
Returns:
|
||||
(list) Logs of training. Each items is a dict containing results of an epoch.
|
||||
"""
|
||||
device = next(model.parameters()).device
|
||||
log = []
|
||||
dl_val_it = iter(dl_val)
|
||||
val_loss=None
|
||||
|
||||
high_grad_track = True
|
||||
if inner_it == 0: #No HP optimization
|
||||
high_grad_track=False
|
||||
if dataug_epoch_start!=0: #Augmentation de donnee differee
|
||||
model.augment(mode=False)
|
||||
high_grad_track = False
|
||||
|
||||
## Optimizers ##
|
||||
#Inner Opt
|
||||
inner_opt = torch.optim.SGD(model['model']['original'].parameters(), lr=opt_param['Inner']['lr'], momentum=opt_param['Inner']['momentum']) #lr=1e-2 / momentum=0.9
|
||||
|
||||
diffopt = model['model'].get_diffopt(
|
||||
inner_opt,
|
||||
grad_callback=(lambda grads: clip_norm(grads, max_norm=10)),
|
||||
track_higher_grads=high_grad_track)
|
||||
|
||||
#Meta Opt
|
||||
hyper_param = list(model['data_aug'].parameters())
|
||||
if hp_opt :
|
||||
for param_group in diffopt.param_groups:
|
||||
for param in list(opt_param['Inner'].keys())[1:]:
|
||||
param_group[param]=torch.tensor(param_group[param]).to(device).requires_grad_()
|
||||
hyper_param += [param_group[param]]
|
||||
meta_opt = torch.optim.Adam(hyper_param, lr=opt_param['Meta']['lr']) #lr=1e-2
|
||||
|
||||
model.train()
|
||||
meta_opt.zero_grad()
|
||||
|
||||
for epoch in range(1, epochs+1):
|
||||
t0 = time.process_time()
|
||||
|
||||
for i, (xs, ys) in enumerate(dl_train):
|
||||
xs, ys = xs.to(device), ys.to(device)
|
||||
|
||||
if(KLdiv<=0):
|
||||
#Methode uniforme
|
||||
logits = model(xs) # modified `params` can also be passed as a kwarg
|
||||
loss = F.cross_entropy(F.log_softmax(logits, dim=1), ys, reduction='none') # no need to call loss.backwards()
|
||||
|
||||
if model._data_augmentation: #Weight loss
|
||||
w_loss = model['data_aug'].loss_weight()#.to(device)
|
||||
loss = loss * w_loss
|
||||
loss = loss.mean()
|
||||
|
||||
else:
|
||||
#Methode KL div
|
||||
# Supervised loss (classic)
|
||||
if model.is_augmenting() :
|
||||
model.augment(mode=False)
|
||||
sup_logits = model(xs)
|
||||
model.augment(mode=True)
|
||||
else:
|
||||
sup_logits = model(xs)
|
||||
log_sup=F.log_softmax(sup_logits, dim=1)
|
||||
loss = F.cross_entropy(log_sup, ys)
|
||||
|
||||
# Unsupervised loss (KLdiv)
|
||||
if model.is_augmenting() :
|
||||
aug_logits = model(xs)
|
||||
log_aug=F.log_softmax(aug_logits, dim=1)
|
||||
aug_loss=0
|
||||
w_loss = model['data_aug'].loss_weight() #Weight loss
|
||||
|
||||
#KL div w/ logits - Similarite predictions (distributions)
|
||||
aug_loss = F.softmax(sup_logits, dim=1)*(log_sup-log_aug)
|
||||
aug_loss = aug_loss.sum(dim=-1)
|
||||
aug_loss = (w_loss * aug_loss).mean()
|
||||
aug_loss += (F.cross_entropy(log_aug, ys , reduction='none') * w_loss).mean()
|
||||
|
||||
loss += aug_loss * KLdiv
|
||||
|
||||
#print_graph(loss) #to visualize computational graph
|
||||
|
||||
#t = time.process_time()
|
||||
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
|
||||
#print(len(model['model']['functional']._fast_params),"step", time.process_time()-t)
|
||||
|
||||
|
||||
if(high_grad_track and i>0 and i%inner_it==0): #Perform Meta step
|
||||
#print("meta")
|
||||
val_loss = compute_vaLoss(model=model, dl_it=dl_val_it, dl=dl_val) + model['data_aug'].reg_loss()
|
||||
#print_graph(val_loss) #to visualize computational graph
|
||||
val_loss.backward()
|
||||
|
||||
torch.nn.utils.clip_grad_norm_(model['data_aug'].parameters(), max_norm=10, norm_type=2) #Prevent exploding grad with RNN
|
||||
|
||||
meta_opt.step()
|
||||
|
||||
#Adjust Hyper-parameters
|
||||
model['data_aug'].adjust_param(soft=False) #Contrainte sum(proba)=1
|
||||
if hp_opt:
|
||||
for param_group in diffopt.param_groups:
|
||||
for param in list(opt_param['Inner'].keys())[1:]:
|
||||
param_group[param].data = param_group[param].data.clamp(min=1e-4)
|
||||
|
||||
#Reset gradients
|
||||
diffopt.detach_()
|
||||
model['model'].detach_()
|
||||
meta_opt.zero_grad()
|
||||
|
||||
tf = time.process_time()
|
||||
|
||||
if (save_sample_freq and epoch%save_sample_freq==0): #Data sample saving
|
||||
try:
|
||||
viz_sample_data(imgs=xs, labels=ys, fig_name='../samples/data_sample_epoch{}_noTF'.format(epoch))
|
||||
viz_sample_data(imgs=model['data_aug'](xs), labels=ys, fig_name='../samples/data_sample_epoch{}'.format(epoch))
|
||||
except:
|
||||
print("Couldn't save samples epoch"+epoch)
|
||||
pass
|
||||
|
||||
|
||||
if(not val_loss): #Compute val loss for logs
|
||||
val_loss = compute_vaLoss(model=model, dl_it=dl_val_it, dl=dl_val)
|
||||
|
||||
# Test model
|
||||
accuracy, test_loss =test(model)
|
||||
model.train()
|
||||
|
||||
#### Log ####
|
||||
param = [{'p': p.item(), 'm':model['data_aug']['mag'].item()} for p in model['data_aug']['prob']] if model['data_aug']._shared_mag else [{'p': p.item(), 'm': m.item()} for p, m in zip(model['data_aug']['prob'], model['data_aug']['mag'])]
|
||||
data={
|
||||
"epoch": epoch,
|
||||
"train_loss": loss.item(),
|
||||
"val_loss": val_loss.item(),
|
||||
"acc": accuracy,
|
||||
"time": tf - t0,
|
||||
|
||||
"mix_dist": model['data_aug']['mix_dist'].item(),
|
||||
"param": param,
|
||||
}
|
||||
if hp_opt : data["opt_param"]=[{'lr': p_grp['lr'].item(), 'momentum': p_grp['momentum'].item()} for p_grp in diffopt.param_groups]
|
||||
log.append(data)
|
||||
#############
|
||||
#### Print ####
|
||||
if(print_freq and epoch%print_freq==0):
|
||||
print('-'*9)
|
||||
print('Epoch : %d/%d'%(epoch,epochs))
|
||||
print('Time : %.00f'%(tf - t0))
|
||||
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
|
||||
print('Accuracy :', max([x["acc"] for x in log]))
|
||||
print('Data Augmention : {} (Epoch {})'.format(model._data_augmentation, dataug_epoch_start))
|
||||
if not model['data_aug']._fixed_prob: print('TF Proba :', model['data_aug']['prob'].data)
|
||||
#print('proba grad',model['data_aug']['prob'].grad)
|
||||
if not model['data_aug']._fixed_mag: print('TF Mag :', model['data_aug']['mag'].data)
|
||||
#print('Mag grad',model['data_aug']['mag'].grad)
|
||||
if not model['data_aug']._fixed_mix: print('Mix:', model['data_aug']['mix_dist'].item())
|
||||
#print('Reg loss:', model['data_aug'].reg_loss().item())
|
||||
|
||||
if hp_opt :
|
||||
for param_group in diffopt.param_groups:
|
||||
print('Opt param - lr:', param_group['lr'].item(),'- momentum:', param_group['momentum'].item())
|
||||
#############
|
||||
|
||||
#Augmentation de donnee differee
|
||||
if not model.is_augmenting() and (epoch == dataug_epoch_start):
|
||||
print('Starting Data Augmention...')
|
||||
dataug_epoch_start = epoch
|
||||
model.augment(mode=True)
|
||||
if inner_it != 0: #Rebuild diffopt if needed
|
||||
high_grad_track = True
|
||||
diffopt = model['model'].get_diffopt(
|
||||
inner_opt,
|
||||
grad_callback=(lambda grads: clip_norm(grads, max_norm=10)),
|
||||
track_higher_grads=high_grad_track)
|
||||
|
||||
|
||||
#Data sample saving
|
||||
try:
|
||||
viz_sample_data(imgs=xs, labels=ys, fig_name='../samples/data_sample_epoch{}_noTF'.format(epoch))
|
||||
viz_sample_data(imgs=model['data_aug'](xs), labels=ys, fig_name='../samples/data_sample_epoch{}'.format(epoch))
|
||||
except:
|
||||
print("Couldn't save finals samples")
|
||||
pass
|
||||
|
||||
return log
|
467
higher/smart_aug/transformations.py
Executable file
467
higher/smart_aug/transformations.py
Executable file
|
@ -0,0 +1,467 @@
|
|||
""" PyTorch implementation of some PIL image transformations.
|
||||
|
||||
Those implementation are thinked to take advantages of batched computation of PyTorch on GPU.
|
||||
|
||||
Based on Kornia library.
|
||||
See: https://github.com/kornia/kornia
|
||||
|
||||
And PIL.
|
||||
See:
|
||||
https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py
|
||||
https://github.com/python-pillow/Pillow/blob/9c78c3f97291bd681bc8637922d6a2fa9415916c/src/PIL/Image.py#L2818
|
||||
|
||||
Inspired from AutoAugment.
|
||||
See: https://github.com/tensorflow/models/blob/fc2056bce6ab17eabdc139061fef8f4f2ee763ec/research/autoaugment/augmentation_transforms.py
|
||||
"""
|
||||
|
||||
import torch
|
||||
import kornia
|
||||
import random
|
||||
|
||||
|
||||
TF_no_mag={'Identity', 'FlipUD', 'FlipLR', 'Random', 'RandBlend'} #TF that don't have use for magnitude parameter.
|
||||
TF_no_grad={'Solarize', 'Posterize', '=Solarize', '=Posterize'} #TF which implemetation doesn't allow gradient propagaition.
|
||||
TF_ignore_mag= TF_no_mag | TF_no_grad #TF for which magnitude should be ignored (Magnitude fixed).
|
||||
|
||||
PARAMETER_MAX = 1 # What is the max 'level' a transform could be predicted
|
||||
PARAMETER_MIN = 0.1 # What is the min 'level' a transform could be predicted
|
||||
|
||||
### Available TF for Dataug ###
|
||||
# Dictionnary mapping tranformations identifiers to their function.
|
||||
# Each value of the dict should be a lambda function taking a (batch of data, magnitude of transformations) tuple as input and returns a batch of data.
|
||||
TF_dict={ #Dataugv5+
|
||||
## Geometric TF ##
|
||||
'Identity' : (lambda x, mag: x),
|
||||
'FlipUD' : (lambda x, mag: flipUD(x)),
|
||||
'FlipLR' : (lambda x, mag: flipLR(x)),
|
||||
'Rotate': (lambda x, mag: rotate(x, angle=rand_floats(size=x.shape[0], mag=mag, maxval=30))),
|
||||
'TranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=0))),
|
||||
'TranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=20), zero_pos=1))),
|
||||
'ShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=0))),
|
||||
'ShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, maxval=0.3), zero_pos=1))),
|
||||
|
||||
## Color TF (Expect image in the range of [0, 1]) ##
|
||||
'Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
|
||||
'Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
|
||||
'Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
|
||||
'Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.9))),
|
||||
'Posterize': (lambda x, mag: posterize(x, bits=rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
|
||||
'Solarize': (lambda x, mag: solarize(x, thresholds=rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
|
||||
|
||||
#Color TF (Common mag scale)
|
||||
'+Contrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
|
||||
'+Color':(lambda x, mag: color(x, color_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
|
||||
'+Brightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
|
||||
'+Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.0, maxval=1.9))),
|
||||
'-Contrast': (lambda x, mag: contrast(x, contrast_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
|
||||
'-Color':(lambda x, mag: color(x, color_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
|
||||
'-Brightness':(lambda x, mag: brightness(x, brightness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
|
||||
'-Sharpness':(lambda x, mag: sharpeness(x, sharpness_factor=invScale_rand_floats(size=x.shape[0], mag=mag, minval=0.1, maxval=1.0))),
|
||||
'=Posterize': (lambda x, mag: posterize(x, bits=invScale_rand_floats(size=x.shape[0], mag=mag, minval=4., maxval=8.))),#Perte du gradient
|
||||
'=Solarize': (lambda x, mag: solarize(x, thresholds=invScale_rand_floats(size=x.shape[0], mag=mag, minval=1/256., maxval=256/256.))), #Perte du gradient #=>Image entre [0,1]
|
||||
|
||||
## Bad Tranformations ##
|
||||
# Bad Geometric TF #
|
||||
'BShearX': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=0.3*3, maxval=0.3*4), zero_pos=0))),
|
||||
'BShearY': (lambda x, mag: shear(x, shear=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=0.3*3, maxval=0.3*4), zero_pos=1))),
|
||||
'BTranslateX': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=25, maxval=30), zero_pos=0))),
|
||||
'BTranslateX-': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-25, maxval=-30), zero_pos=0))),
|
||||
'BTranslateY': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=25, maxval=30), zero_pos=1))),
|
||||
'BTranslateY-': (lambda x, mag: translate(x, translation=zero_stack(rand_floats(size=(x.shape[0],), mag=mag, minval=-25, maxval=-30), zero_pos=1))),
|
||||
|
||||
# Bad Color TF #
|
||||
'BadContrast': (lambda x, mag: contrast(x, contrast_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9*2, maxval=2*4))),
|
||||
'BadBrightness':(lambda x, mag: brightness(x, brightness_factor=rand_floats(size=x.shape[0], mag=mag, minval=1.9, maxval=2*3))),
|
||||
|
||||
# Random TF #
|
||||
'Random':(lambda x, mag: torch.rand_like(x)),
|
||||
'RandBlend': (lambda x, mag: blend(x,torch.rand_like(x), alpha=torch.tensor(0.7,device=mag.device).expand(x.shape[0]))),
|
||||
|
||||
#Not ready for use
|
||||
#'Auto_Contrast': (lambda mag: None), #Pas opti pour des batch (Super lent)
|
||||
#'Equalize': (lambda mag: None),
|
||||
}
|
||||
|
||||
## Image type cast ##
|
||||
def int_image(float_image):
|
||||
"""Convert a float Tensor/Image to an int Tensor/Image.
|
||||
|
||||
Be warry that this transformation isn't bijective, each conversion will result in small loss of information.
|
||||
Granularity: 1/256 = 0.0039.
|
||||
|
||||
This will also result in the loss of the gradient associated to input as gradient cannot be tracked on int Tensor.
|
||||
|
||||
Args:
|
||||
float_image (FloatTensor): Image tensor.
|
||||
|
||||
Returns:
|
||||
(ByteTensor) Converted tensor.
|
||||
"""
|
||||
return (float_image*255.).type(torch.uint8)
|
||||
|
||||
def float_image(int_image):
|
||||
"""Convert a int Tensor/Image to an float Tensor/Image.
|
||||
|
||||
Args:
|
||||
int_image (ByteTensor): Image tensor.
|
||||
|
||||
Returns:
|
||||
(FloatTensor) Converted tensor.
|
||||
"""
|
||||
return int_image.type(torch.float)/255.
|
||||
|
||||
## Parameters utils ##
|
||||
def rand_floats(size, mag, maxval, minval=None):
|
||||
"""Generate a batch of random values.
|
||||
|
||||
Args:
|
||||
size (int): Number of value to generate.
|
||||
mag (float): Level of the operation that will be between [PARAMETER_MIN, PARAMETER_MAX].
|
||||
maxval (float): Maximum value that can be generated. This will be scaled to mag/PARAMETER_MAX.
|
||||
minval (float): Minimum value that can be generated. (default: -maxval)
|
||||
|
||||
Returns:
|
||||
(Tensor) Generated batch of float values between [minval, maxval].
|
||||
"""
|
||||
real_mag = float_parameter(mag, maxval=maxval)
|
||||
if not minval : minval = -real_mag
|
||||
#return random.uniform(minval, real_max)
|
||||
return minval + (real_mag-minval) * torch.rand(size, device=mag.device) #[min_val, real_mag]
|
||||
|
||||
def invScale_rand_floats(size, mag, maxval, minval):
|
||||
"""Generate a batch of random values.
|
||||
|
||||
Similar to rand_floats() except that the mag is used in an inversed scale.
|
||||
|
||||
Mag:[0,PARAMETER_MAX] => [PARAMETER_MAX, 0]
|
||||
|
||||
Args:
|
||||
size (int): Number of value to generate.
|
||||
mag (float): Level of the operation that will be between [PARAMETER_MIN, PARAMETER_MAX].
|
||||
maxval (float): Maximum value that can be generated. This will be scaled to mag/PARAMETER_MAX.
|
||||
minval (float): Minimum value that can be generated. (default: -maxval)
|
||||
|
||||
Returns:
|
||||
(Tensor) Generated batch of float values between [minval, maxval].
|
||||
"""
|
||||
real_mag = float_parameter(float(PARAMETER_MAX) - mag, maxval=maxval-minval)+minval
|
||||
return real_mag + (maxval-real_mag) * torch.rand(size, device=mag.device) #[real_mag, max_val]
|
||||
|
||||
def zero_stack(tensor, zero_pos):
|
||||
"""Add a row of zeros to a Tensor.
|
||||
|
||||
This function is intended to be used with single row Tensor, thus returning a 2 dimension Tensor.
|
||||
|
||||
Args:
|
||||
tensor (Tensor): Tensor to be stacked with zeros.
|
||||
zero_pos (int): Wheter the zeros should be added before or after the Tensor. Either 0 or 1.
|
||||
|
||||
Returns:
|
||||
Stacked Tensor.
|
||||
"""
|
||||
if zero_pos==0:
|
||||
return torch.stack((tensor, torch.zeros((tensor.shape[0],), device=tensor.device)), dim=1)
|
||||
if zero_pos==1:
|
||||
return torch.stack((torch.zeros((tensor.shape[0],), device=tensor.device), tensor), dim=1)
|
||||
else:
|
||||
raise Exception("Invalid zero_pos : ", zero_pos)
|
||||
|
||||
def float_parameter(level, maxval):
|
||||
"""Scale level between 0 and maxval.
|
||||
|
||||
Args:
|
||||
level (float): Level of the operation that will be between [PARAMETER_MIN, PARAMETER_MAX].
|
||||
maxval: Maximum value that the operation can have. This will be scaled to level/PARAMETER_MAX.
|
||||
Returns:
|
||||
A float that results from scaling `maxval` according to `level`.
|
||||
"""
|
||||
|
||||
#return float(level) * maxval / PARAMETER_MAX
|
||||
return (level * maxval / PARAMETER_MAX)#.to(torch.float)
|
||||
|
||||
## Tranformations ##
|
||||
def flipLR(x):
|
||||
"""Flip horizontaly/Left-Right images.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of images.
|
||||
|
||||
Returns:
|
||||
(Tensor): Batch of fliped images.
|
||||
"""
|
||||
device = x.device
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
|
||||
M =torch.tensor( [[[-1., 0., w-1],
|
||||
[ 0., 1., 0.],
|
||||
[ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
|
||||
|
||||
# warp the original image by the found transform
|
||||
return kornia.warp_perspective(x, M, dsize=(h, w))
|
||||
|
||||
def flipUD(x):
|
||||
"""Flip vertically/Up-Down images.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of images.
|
||||
|
||||
Returns:
|
||||
(Tensor): Batch of fliped images.
|
||||
"""
|
||||
device = x.device
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
|
||||
M =torch.tensor( [[[ 1., 0., 0.],
|
||||
[ 0., -1., h-1],
|
||||
[ 0., 0., 1.]]], device=device).expand(batch_size,-1,-1)
|
||||
|
||||
# warp the original image by the found transform
|
||||
return kornia.warp_perspective(x, M, dsize=(h, w))
|
||||
|
||||
def rotate(x, angle):
|
||||
"""Rotate images.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of images.
|
||||
angle (Tensor): Angles (degrees) of rotation for each images.
|
||||
|
||||
Returns:
|
||||
(Tensor): Batch of rotated images.
|
||||
"""
|
||||
return kornia.rotate(x, angle=angle.type(torch.float)) #Kornia ne supporte pas les int
|
||||
|
||||
def translate(x, translation):
|
||||
"""Translate images.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of images.
|
||||
translation (Tensor): Distance (pixels) of translation for each images.
|
||||
|
||||
Returns:
|
||||
(Tensor): Batch of translated images.
|
||||
"""
|
||||
return kornia.translate(x, translation=translation.type(torch.float)) #Kornia ne supporte pas les int
|
||||
|
||||
def shear(x, shear):
|
||||
"""Shear images.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of images.
|
||||
shear (Tensor): Angle of shear for each images.
|
||||
|
||||
Returns:
|
||||
(Tensor): Batch of skewed images.
|
||||
"""
|
||||
return kornia.shear(x, shear=shear)
|
||||
|
||||
def contrast(x, contrast_factor):
|
||||
"""Adjust contast of images.
|
||||
|
||||
Args:
|
||||
x (FloatTensor): Batch of images.
|
||||
contrast_factor (FloatTensor): Contrast adjust factor per element in the batch.
|
||||
0 generates a compleatly black image, 1 does not modify the input image while any other non-negative number modify the brightness by this factor.
|
||||
|
||||
Returns:
|
||||
(Tensor): Batch of adjusted images.
|
||||
"""
|
||||
return kornia.adjust_contrast(x, contrast_factor=contrast_factor) #Expect image in the range of [0, 1]
|
||||
|
||||
def color(x, color_factor):
|
||||
"""Adjust color of images.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of images.
|
||||
color_factor (Tensor): Color factor for each images.
|
||||
0.0 gives a black and white image. A factor of 1.0 gives the original image.
|
||||
|
||||
Returns:
|
||||
(Tensor): Batch of adjusted images.
|
||||
"""
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
|
||||
gray_x = kornia.rgb_to_grayscale(x)
|
||||
gray_x = gray_x.repeat_interleave(channels, dim=1)
|
||||
return blend(gray_x, x, color_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
|
||||
|
||||
def brightness(x, brightness_factor):
|
||||
"""Adjust brightness of images.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of images.
|
||||
brightness_factor (Tensor): Brightness factor for each images.
|
||||
0.0 gives a black image. A factor of 1.0 gives the original image.
|
||||
|
||||
Returns:
|
||||
(Tensor): Batch of adjusted images.
|
||||
"""
|
||||
device = x.device
|
||||
|
||||
return blend(torch.zeros(x.size(), device=device), x, brightness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
|
||||
|
||||
def sharpeness(x, sharpness_factor):
|
||||
"""Adjust sharpness of images.
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of images.
|
||||
sharpness_factor (Tensor): Sharpness factor for each images.
|
||||
0.0 gives a black image. A factor of 1.0 gives the original image.
|
||||
|
||||
Returns:
|
||||
(Tensor): Batch of adjusted images.
|
||||
"""
|
||||
device = x.device
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
|
||||
k = torch.tensor([[[ 1., 1., 1.],
|
||||
[ 1., 5., 1.],
|
||||
[ 1., 1., 1.]]], device=device) #Smooth Filter : https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageFilter.py
|
||||
smooth_x = kornia.filter2D(x, kernel=k, border_type='reflect', normalized=True) #Peut etre necessaire de s'occuper du channel Alhpa differement
|
||||
|
||||
return blend(smooth_x, x, sharpness_factor).clamp(min=0.0,max=1.0) #Expect image in the range of [0, 1]
|
||||
|
||||
def posterize(x, bits):
|
||||
"""Reduce the number of bits for each color channel.
|
||||
|
||||
Be warry that the cast to integers block the gradient propagation.
|
||||
Args:
|
||||
x (Tensor): Batch of images.
|
||||
bits (Tensor): The number of bits to keep for each channel (1-8).
|
||||
|
||||
Returns:
|
||||
(Tensor): Batch of posterized images.
|
||||
"""
|
||||
bits = bits.type(torch.uint8) #Perte du gradient
|
||||
x = int_image(x) #Expect image in the range of [0, 1]
|
||||
|
||||
mask = ~(2 ** (8 - bits) - 1).type(torch.uint8)
|
||||
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
mask = mask.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
|
||||
|
||||
return float_image(x & mask)
|
||||
|
||||
def solarize(x, thresholds):
|
||||
"""Invert all pixel values above a threshold.
|
||||
|
||||
Be warry that the use of the inequality (x>tresholds) block the gradient propagation.
|
||||
Args:
|
||||
x (Tensor): Batch of images.
|
||||
thresholds (Tensor): All pixels above this level are inverted
|
||||
|
||||
Returns:
|
||||
(Tensor): Batch of solarized images.
|
||||
"""
|
||||
batch_size, channels, h, w = x.shape
|
||||
#imgs=[]
|
||||
#for idx, t in enumerate(thresholds): #Operation par image
|
||||
# mask = x[idx] > t #Perte du gradient
|
||||
#In place
|
||||
# inv_x = 1-x[idx][mask]
|
||||
# x[idx][mask]=inv_x
|
||||
#
|
||||
|
||||
#Out of place
|
||||
# im = x[idx]
|
||||
# inv_x = 1-im[mask]
|
||||
|
||||
# imgs.append(im.masked_scatter(mask,inv_x))
|
||||
|
||||
#idxs=torch.tensor(range(x.shape[0]), device=x.device)
|
||||
#idxs=idxs.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
|
||||
#x=x.scatter(dim=0, index=idxs, src=torch.stack(imgs))
|
||||
#
|
||||
|
||||
thresholds = thresholds.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
|
||||
x=torch.where(x>thresholds,1-x, x)
|
||||
|
||||
#x=x.min(thresholds)
|
||||
#inv_x = 1-x[mask]
|
||||
#x=x.where(x<thresholds,1-x)
|
||||
#x[mask]=inv_x
|
||||
#x=x.masked_scatter(mask, inv_x)
|
||||
|
||||
return x
|
||||
|
||||
def blend(x,y,alpha):
|
||||
"""Creates a new images by interpolating between two input images, using a constant alpha.
|
||||
|
||||
x and y should have the same size.
|
||||
alpha should have the same batch size as the images.
|
||||
|
||||
Apply batch wise :
|
||||
out = image1 * (1.0 - alpha) + image2 * alpha
|
||||
|
||||
Args:
|
||||
x (Tensor): Batch of images.
|
||||
y (Tensor): Batch of images.
|
||||
alpha (Tensor): The interpolation alpha factor for each images.
|
||||
Returns:
|
||||
(Tensor): Batch of solarized images.
|
||||
"""
|
||||
#return kornia.add_weighted(src1=x, alpha=(1-alpha), src2=y, beta=alpha, gamma=0) #out=src1∗alpha+src2∗beta+gamma #Ne fonctionne pas pour des batch de alpha
|
||||
|
||||
if not isinstance(x, torch.Tensor):
|
||||
raise TypeError("x should be a tensor. Got {}".format(type(x)))
|
||||
|
||||
if not isinstance(y, torch.Tensor):
|
||||
raise TypeError("y should be a tensor. Got {}".format(type(y)))
|
||||
|
||||
assert(x.shape==y.shape and x.shape[0]==alpha.shape[0])
|
||||
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
alpha = alpha.unsqueeze(dim=1).expand(-1,channels).unsqueeze(dim=2).expand(-1,channels, h).unsqueeze(dim=3).expand(-1,channels, h, w) #Il y a forcement plus simple ...
|
||||
res = x*(1-alpha) + y*alpha
|
||||
|
||||
return res
|
||||
|
||||
#Not working
|
||||
def auto_contrast(x): #PAS OPTIMISE POUR DES BATCH #EXTRA LENT
|
||||
# Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
|
||||
print("Warning : Pas encore check !")
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
x = int_image(x) #Expect image in the range of [0, 1]
|
||||
#print('Start',x[0])
|
||||
for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
|
||||
#print(img.shape)
|
||||
for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
|
||||
#print(chan.shape)
|
||||
hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
|
||||
|
||||
# find lowest/highest samples after preprocessing
|
||||
for lo in range(256):
|
||||
if hist[lo]:
|
||||
break
|
||||
for hi in range(255, -1, -1):
|
||||
if hist[hi]:
|
||||
break
|
||||
if hi <= lo:
|
||||
# don't bother
|
||||
pass
|
||||
else:
|
||||
scale = 255.0 / (hi - lo)
|
||||
offset = -lo * scale
|
||||
for ix in range(256):
|
||||
n_ix = int(ix * scale + offset)
|
||||
if n_ix < 0: n_ix = 0
|
||||
elif n_ix > 255: n_ix = 255
|
||||
|
||||
chan[chan==ix]=n_ix
|
||||
x[im_idx, chan_idx]=chan
|
||||
|
||||
#print('End',x[0])
|
||||
return float_image(x)
|
||||
|
||||
def equalize(x): #PAS OPTIMISE POUR DES BATCH
|
||||
raise Exception(self, "not implemented")
|
||||
# Optimisation : Application de LUT efficace / Calcul d'histogramme par batch/channel
|
||||
(batch_size, channels, h, w) = x.shape
|
||||
x = int_image(x) #Expect image in the range of [0, 1]
|
||||
#print('Start',x[0])
|
||||
for im_idx, img in enumerate(x.chunk(batch_size, dim=0)): #Operation par image
|
||||
#print(img.shape)
|
||||
for chan_idx, chan in enumerate(img.chunk(channels, dim=1)): # Operation par channel
|
||||
#print(chan.shape)
|
||||
hist = torch.histc(chan, bins=256, min=0, max=255) #PAS DIFFERENTIABLE
|
||||
|
||||
return float_image(x)
|
236
higher/smart_aug/utils.py
Executable file
236
higher/smart_aug/utils.py
Executable file
|
@ -0,0 +1,236 @@
|
|||
""" Utilties function.
|
||||
|
||||
"""
|
||||
import numpy as np
|
||||
import json, math, time, os
|
||||
import matplotlib.pyplot as plt
|
||||
import copy
|
||||
import gc
|
||||
|
||||
from torchviz import make_dot
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
import time
|
||||
|
||||
def print_graph(PyTorch_obj, fig_name='graph'):
|
||||
"""Save the computational graph.
|
||||
|
||||
Args:
|
||||
PyTorch_obj (Tensor): End of the graph. Commonly, the loss tensor to get the whole graph.
|
||||
fig_name (string): Relative path where to save the graph. (default: graph)
|
||||
"""
|
||||
graph=make_dot(PyTorch_obj)
|
||||
graph.format = 'pdf' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
|
||||
graph.render(fig_name)
|
||||
|
||||
def plot_resV2(log, fig_name='res', param_names=None):
|
||||
"""Save a visual graph of the logs.
|
||||
|
||||
Args:
|
||||
log (dict): Logs of the training generated by most of train_utils.
|
||||
fig_name (string): Relative path where to save the graph. (default: res)
|
||||
param_names (list): Labels for the parameters. (default: None)
|
||||
"""
|
||||
epochs = [x["epoch"] for x in log]
|
||||
|
||||
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(30, 15))
|
||||
|
||||
ax[0, 0].set_title('Loss')
|
||||
ax[0, 0].plot(epochs,[x["train_loss"] for x in log], label='Train')
|
||||
ax[0, 0].plot(epochs,[x["val_loss"] for x in log], label='Val')
|
||||
ax[0, 0].legend()
|
||||
|
||||
ax[1, 0].set_title('Acc')
|
||||
ax[1, 0].plot(epochs,[x["acc"] for x in log])
|
||||
|
||||
if log[0]["param"]!= None:
|
||||
if not param_names : param_names = ['P'+str(idx) for idx, _ in enumerate(log[0]["param"])]
|
||||
#proba=[[x["param"][idx] for x in log] for idx, _ in enumerate(log[0]["param"])]
|
||||
proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])]
|
||||
mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])]
|
||||
|
||||
ax[0, 1].set_title('Prob =f(epoch)')
|
||||
ax[0, 1].stackplot(epochs, proba, labels=param_names)
|
||||
#ax[0, 1].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
|
||||
|
||||
ax[1, 1].set_title('Prob =f(TF)')
|
||||
mean = np.mean(proba, axis=1)
|
||||
std = np.std(proba, axis=1)
|
||||
ax[1, 1].bar(param_names, mean, yerr=std)
|
||||
plt.sca(ax[1, 1]), plt.xticks(rotation=90)
|
||||
|
||||
ax[0, 2].set_title('Mag =f(epoch)')
|
||||
ax[0, 2].stackplot(epochs, mag, labels=param_names)
|
||||
#ax[0, 2].plot(epochs, np.array(mag).T, label=param_names)
|
||||
ax[0, 2].legend(param_names, loc='center left', bbox_to_anchor=(1, 0.5))
|
||||
|
||||
ax[1, 2].set_title('Mag =f(TF)')
|
||||
mean = np.mean(mag, axis=1)
|
||||
std = np.std(mag, axis=1)
|
||||
ax[1, 2].bar(param_names, mean, yerr=std)
|
||||
plt.sca(ax[1, 2]), plt.xticks(rotation=90)
|
||||
|
||||
|
||||
fig_name = fig_name.replace('.',',')
|
||||
plt.savefig(fig_name, bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
def plot_compare(filenames, fig_name='res'):
|
||||
"""Save a visual graph comparing trainings stats.
|
||||
|
||||
Args:
|
||||
filenames (list[Strings]): Relative paths to the logs (JSON files).
|
||||
fig_name (string): Relative path where to save the graph. (default: res)
|
||||
"""
|
||||
all_data=[]
|
||||
legend=""
|
||||
for idx, file in enumerate(filenames):
|
||||
legend+=str(idx)+'-'+file+'\n'
|
||||
with open(file) as json_file:
|
||||
data = json.load(json_file)
|
||||
all_data.append(data)
|
||||
|
||||
fig, ax = plt.subplots(ncols=3, figsize=(30, 8))
|
||||
|
||||
for data_idx, log in enumerate(all_data):
|
||||
log=log['Log']
|
||||
epochs = [x["epoch"] for x in log]
|
||||
|
||||
ax[0].plot(epochs,[x["train_loss"] for x in log], label=str(data_idx)+'-Train')
|
||||
ax[0].plot(epochs,[x["val_loss"] for x in log], label=str(data_idx)+'-Val')
|
||||
|
||||
ax[1].plot(epochs,[x["acc"] for x in log], label=str(data_idx))
|
||||
#ax[1].text(x=0.5,y=0,s=str(data_idx)+'-'+filenames[data_idx], transform=ax[1].transAxes)
|
||||
|
||||
if log[0]["param"]!= None:
|
||||
if isinstance(log[0]["param"],float):
|
||||
ax[2].plot(epochs,[x["param"] for x in log], label=str(data_idx)+'-Mag')
|
||||
|
||||
else :
|
||||
for idx, _ in enumerate(log[0]["param"]):
|
||||
ax[2].plot(epochs,[x["param"][idx] for x in log], label=str(data_idx)+'-P'+str(idx))
|
||||
|
||||
fig.suptitle(legend)
|
||||
ax[0].set_title('Loss')
|
||||
ax[1].set_title('Acc')
|
||||
ax[2].set_title('Param')
|
||||
for a in ax: a.legend()
|
||||
|
||||
fig_name = fig_name.replace('.',',')
|
||||
plt.savefig(fig_name, bbox_inches='tight')
|
||||
plt.close()
|
||||
|
||||
def viz_sample_data(imgs, labels, fig_name='data_sample', weight_labels=None):
|
||||
"""Save data samples.
|
||||
|
||||
Args:
|
||||
imgs (Tensor): Batch of image to sample from. Intended to contain at least 25 images.
|
||||
labels (Tensor): Labels of the images.
|
||||
fig_name (string): Relative path where to save the graph. (default: data_sample)
|
||||
weight_labels (Tensor): Weights associated to each labels. (default: None)
|
||||
"""
|
||||
sample = imgs[0:25,].permute(0, 2, 3, 1).squeeze().cpu()
|
||||
|
||||
plt.figure(figsize=(10,10))
|
||||
for i in range(25):
|
||||
plt.subplot(5,5,i+1) #Trop de figure cree ?
|
||||
plt.xticks([])
|
||||
plt.yticks([])
|
||||
plt.grid(False)
|
||||
plt.imshow(sample[i,].detach().numpy(), cmap=plt.cm.binary)
|
||||
label = str(labels[i].item())
|
||||
if weight_labels is not None : label+= (" - p %.2f" % weight_labels[i].item())
|
||||
plt.xlabel(label)
|
||||
|
||||
plt.savefig(fig_name)
|
||||
print("Sample saved :", fig_name)
|
||||
plt.close('all')
|
||||
|
||||
def print_torch_mem(add_info=''):
|
||||
"""Print informations on PyTorch memory usage.
|
||||
|
||||
Args:
|
||||
add_info (string): Prefix added before the print. (default: None)
|
||||
"""
|
||||
nb=0
|
||||
max_size=0
|
||||
for obj in gc.get_objects():
|
||||
#print(type(obj))
|
||||
try:
|
||||
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
|
||||
#print(i, type(obj), obj.size())
|
||||
size = np.sum(obj.size())
|
||||
if(size>max_size): max_size=size
|
||||
nb+=1
|
||||
except:
|
||||
pass
|
||||
print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
|
||||
|
||||
#print(add_info, "-Garbage size :",len(gc.garbage))
|
||||
|
||||
"""Simple GPU memory report."""
|
||||
|
||||
mega_bytes = 1024.0 * 1024.0
|
||||
string = add_info + ' memory (MB)'
|
||||
string += ' | allocated: {}'.format(
|
||||
torch.cuda.memory_allocated() / mega_bytes)
|
||||
string += ' | max allocated: {}'.format(
|
||||
torch.cuda.max_memory_allocated() / mega_bytes)
|
||||
string += ' | cached: {}'.format(torch.cuda.memory_cached() / mega_bytes)
|
||||
string += ' | max cached: {}'.format(
|
||||
torch.cuda.max_memory_cached()/ mega_bytes)
|
||||
print(string)
|
||||
|
||||
'''
|
||||
def plot_TF_influence(log, fig_name='TF_influence', param_names=None):
|
||||
proba=[[x["param"][idx]['p'] for x in log] for idx, _ in enumerate(log[0]["param"])]
|
||||
mag=[[x["param"][idx]['m'] for x in log] for idx, _ in enumerate(log[0]["param"])]
|
||||
|
||||
plt.figure()
|
||||
|
||||
mean = np.mean(proba, axis=1)*np.mean(mag, axis=1) #Pourrait etre interessant de multiplier avant le mean
|
||||
std = np.std(proba, axis=1)*np.std(mag, axis=1)
|
||||
plt.bar(param_names, mean, yerr=std)
|
||||
|
||||
plt.xticks(rotation=90)
|
||||
fig_name = fig_name.replace('.',',')
|
||||
plt.savefig(fig_name, bbox_inches='tight')
|
||||
plt.close()
|
||||
'''
|
||||
|
||||
from torch._six import inf
|
||||
def clip_norm(tensors, max_norm, norm_type=2):
|
||||
"""Clips norm of passed tensors.
|
||||
The norm is computed over all tensors together, as if they were
|
||||
concatenated into a single vector. Clipped tensors are returned.
|
||||
|
||||
See: https://github.com/facebookresearch/higher/issues/18
|
||||
|
||||
Args:
|
||||
tensors (Iterable[Tensor]): an iterable of Tensors or a
|
||||
single Tensor to be normalized.
|
||||
max_norm (float or int): max norm of the gradients
|
||||
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for
|
||||
infinity norm.
|
||||
Returns:
|
||||
Clipped (List[Tensor]) tensors.
|
||||
"""
|
||||
if isinstance(tensors, torch.Tensor):
|
||||
tensors = [tensors]
|
||||
tensors = list(tensors)
|
||||
max_norm = float(max_norm)
|
||||
norm_type = float(norm_type)
|
||||
if norm_type == inf:
|
||||
total_norm = max(t.abs().max() for t in tensors)
|
||||
else:
|
||||
total_norm = 0
|
||||
for t in tensors:
|
||||
param_norm = t.norm(norm_type)
|
||||
total_norm += param_norm.item() ** norm_type
|
||||
total_norm = total_norm ** (1. / norm_type)
|
||||
clip_coef = max_norm / (total_norm + 1e-6)
|
||||
if clip_coef >= 1:
|
||||
return tensors
|
||||
return [t.mul(clip_coef) for t in tensors]
|
Loading…
Add table
Add a link
Reference in a new issue