mirror of
https://github.com/AntoineHX/smart_augmentation.git
synced 2025-05-04 04:00:46 +02:00
LR scheduler + Resolution pb ResNet50/WRN
This commit is contained in:
parent
383f63c7b8
commit
79de0191a8
4 changed files with 78 additions and 21 deletions
|
@ -7,7 +7,7 @@ from torch.utils.data.dataset import ConcatDataset
|
||||||
import torchvision
|
import torchvision
|
||||||
|
|
||||||
#Train/Validation batch size.
|
#Train/Validation batch size.
|
||||||
BATCH_SIZE = 300
|
BATCH_SIZE = 512
|
||||||
#Test batch size.
|
#Test batch size.
|
||||||
TEST_SIZE = BATCH_SIZE
|
TEST_SIZE = BATCH_SIZE
|
||||||
#TEST_SIZE = 10000 #legerement +Rapide / + Consomation memoire !
|
#TEST_SIZE = 10000 #legerement +Rapide / + Consomation memoire !
|
||||||
|
|
|
@ -958,6 +958,8 @@ class Augmented_model(nn.Module):
|
||||||
|
|
||||||
model.step(loss)
|
model.step(loss)
|
||||||
|
|
||||||
|
Does not support LR scheduler.
|
||||||
|
|
||||||
See ''run_simple_smartaug'' for a complete example.
|
See ''run_simple_smartaug'' for a complete example.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
|
@ -82,7 +82,7 @@ if __name__ == "__main__":
|
||||||
}
|
}
|
||||||
#Parameters
|
#Parameters
|
||||||
n_inner_iter = 1
|
n_inner_iter = 1
|
||||||
epochs = 150
|
epochs = 200
|
||||||
dataug_epoch_start=0
|
dataug_epoch_start=0
|
||||||
optim_param={
|
optim_param={
|
||||||
'Meta':{
|
'Meta':{
|
||||||
|
@ -91,10 +91,11 @@ if __name__ == "__main__":
|
||||||
},
|
},
|
||||||
'Inner':{
|
'Inner':{
|
||||||
'optim': 'SGD',
|
'optim': 'SGD',
|
||||||
'lr':1e-2, #1e-2
|
'lr':1e-1, #1e-2/1e-1
|
||||||
'momentum':0.9, #0.9
|
'momentum':0.9, #0.9
|
||||||
'decay':0.0001,
|
'decay':0.0005, #0.0005
|
||||||
'nesterov':True,
|
'nesterov':True,
|
||||||
|
'scheduler':'exponential', #None, 'cosine', 'multiStep', 'exponential'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,21 +104,26 @@ if __name__ == "__main__":
|
||||||
#model = ResNet(num_classes=10)
|
#model = ResNet(num_classes=10)
|
||||||
import torchvision.models as models
|
import torchvision.models as models
|
||||||
#model=models.resnet18()
|
#model=models.resnet18()
|
||||||
model_name = 'resnet50' #'wide_resnet50_2' #'resnet18' #str(model)
|
model_name = 'resnet18' #'wide_resnet50_2' #'resnet18' #str(model)
|
||||||
model = getattr(models.resnet, model_name)(pretrained=False)
|
model = getattr(models.resnet, model_name)(pretrained=False, num_classes=len(dl_train.dataset.classes))
|
||||||
|
|
||||||
#### Classic ####
|
#### Classic ####
|
||||||
if 'classic' in tasks:
|
if 'classic' in tasks:
|
||||||
|
torch.cuda.reset_max_memory_allocated() #reset_peak_stats
|
||||||
|
torch.cuda.reset_max_memory_cached() #reset_peak_stats
|
||||||
t0 = time.perf_counter()
|
t0 = time.perf_counter()
|
||||||
|
|
||||||
model = model.to(device)
|
model = model.to(device)
|
||||||
|
|
||||||
|
|
||||||
print("{} on {} for {} epochs".format(model_name, device_name, epochs))
|
print("{} on {} for {} epochs".format(model_name, device_name, epochs))
|
||||||
|
#print("RandAugment(N{}-M{:.2f})-{} on {} for {} epochs".format(rand_aug['N'],rand_aug['M'],model_name, device_name, epochs))
|
||||||
log= train_classic(model=model, opt_param=optim_param, epochs=epochs, print_freq=10)
|
log= train_classic(model=model, opt_param=optim_param, epochs=epochs, print_freq=10)
|
||||||
#log= train_classic_higher(model=model, epochs=epochs)
|
#log= train_classic_higher(model=model, epochs=epochs)
|
||||||
|
|
||||||
exec_time=time.perf_counter() - t0
|
exec_time=time.perf_counter() - t0
|
||||||
max_cached = torch.cuda.max_memory_cached()/(1024.0 * 1024.0) #torch.cuda.max_memory_reserved()
|
max_allocated = torch.cuda.max_memory_allocated()/(1024.0 * 1024.0)
|
||||||
|
max_cached = torch.cuda.max_memory_cached()/(1024.0 * 1024.0) #torch.cuda.max_memory_reserved() #MB
|
||||||
####
|
####
|
||||||
print('-'*9)
|
print('-'*9)
|
||||||
times = [x["time"] for x in log]
|
times = [x["time"] for x in log]
|
||||||
|
@ -125,10 +131,13 @@ if __name__ == "__main__":
|
||||||
"Time": (np.mean(times),np.std(times), exec_time),
|
"Time": (np.mean(times),np.std(times), exec_time),
|
||||||
'Optimizer': optim_param['Inner'],
|
'Optimizer': optim_param['Inner'],
|
||||||
"Device": device_name,
|
"Device": device_name,
|
||||||
"Memory": max_cached,
|
"Memory": [max_allocated, max_cached],
|
||||||
|
#"Rand_Aug": rand_aug,
|
||||||
"Log": log}
|
"Log": log}
|
||||||
print(model_name,": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
|
print(model_name,": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
|
||||||
filename = "{}-{} epochs".format(model_name,epochs)
|
filename = "{}-{} epochs".format(model_name,epochs)
|
||||||
|
#print("RandAugment-",model_name,": acc", out["Accuracy"], "in:", out["Time"][0], "+/-", out["Time"][1])
|
||||||
|
#filename = "RandAugment(N{}-M{:.2f})-{}-{} epochs".format(rand_aug['N'],rand_aug['M'],model_name,epochs)
|
||||||
with open("../res/log/%s.json" % filename, "w+") as f:
|
with open("../res/log/%s.json" % filename, "w+") as f:
|
||||||
try:
|
try:
|
||||||
json.dump(out, f, indent=True)
|
json.dump(out, f, indent=True)
|
||||||
|
@ -163,7 +172,7 @@ if __name__ == "__main__":
|
||||||
inner_it=n_inner_iter,
|
inner_it=n_inner_iter,
|
||||||
dataug_epoch_start=dataug_epoch_start,
|
dataug_epoch_start=dataug_epoch_start,
|
||||||
opt_param=optim_param,
|
opt_param=optim_param,
|
||||||
print_freq=10,
|
print_freq=1,
|
||||||
unsup_loss=1,
|
unsup_loss=1,
|
||||||
hp_opt=False,
|
hp_opt=False,
|
||||||
save_sample_freq=None)
|
save_sample_freq=None)
|
||||||
|
|
|
@ -143,6 +143,8 @@ def train_classic(model, opt_param, epochs=1, print_freq=1):
|
||||||
(list) Logs of training. Each items is a dict containing results of an epoch.
|
(list) Logs of training. Each items is a dict containing results of an epoch.
|
||||||
"""
|
"""
|
||||||
device = next(model.parameters()).device
|
device = next(model.parameters()).device
|
||||||
|
|
||||||
|
#Optimizer
|
||||||
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
|
#opt = torch.optim.Adam(model.parameters(), lr=1e-3)
|
||||||
optim = torch.optim.SGD(model.parameters(),
|
optim = torch.optim.SGD(model.parameters(),
|
||||||
lr=opt_param['Inner']['lr'],
|
lr=opt_param['Inner']['lr'],
|
||||||
|
@ -150,11 +152,28 @@ def train_classic(model, opt_param, epochs=1, print_freq=1):
|
||||||
weight_decay=opt_param['Inner']['decay'],
|
weight_decay=opt_param['Inner']['decay'],
|
||||||
nesterov=opt_param['Inner']['nesterov']) #lr=1e-2 / momentum=0.9
|
nesterov=opt_param['Inner']['nesterov']) #lr=1e-2 / momentum=0.9
|
||||||
|
|
||||||
|
#Scheduler
|
||||||
|
inner_scheduler=None
|
||||||
|
if opt_param['Inner']['scheduler']=='cosine':
|
||||||
|
inner_scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(optim, T_max=epochs, eta_min=0.)
|
||||||
|
elif opt_param['Inner']['scheduler']=='multiStep':
|
||||||
|
#Multistep milestones inspired by AutoAugment
|
||||||
|
inner_scheduler=torch.optim.lr_scheduler.MultiStepLR(optim,
|
||||||
|
milestones=[int(epochs/3), int(epochs*2/3), int(epochs*2.7/3)],
|
||||||
|
gamma=0.1)
|
||||||
|
elif opt_param['Inner']['scheduler']=='exponential':
|
||||||
|
#inner_scheduler=torch.optim.lr_scheduler.ExponentialLR(optim, gamma=0.1) #Wrong gamma
|
||||||
|
inner_scheduler=torch.optim.lr_scheduler.LambdaLR(optim, lambda epoch: (1 - epoch / epochs) ** 0.9)
|
||||||
|
elif opt_param['Inner']['scheduler'] is not None:
|
||||||
|
raise ValueError("Lr scheduler unknown : %s"%opt_param['Inner']['scheduler'])
|
||||||
|
|
||||||
|
#Training
|
||||||
model.train()
|
model.train()
|
||||||
dl_val_it = iter(dl_val)
|
dl_val_it = iter(dl_val)
|
||||||
log = []
|
log = []
|
||||||
for epoch in range(epochs):
|
for epoch in range(epochs):
|
||||||
#print_torch_mem("Start epoch")
|
#print_torch_mem("Start epoch")
|
||||||
|
#print(optim.param_groups[0]['lr'])
|
||||||
t0 = time.perf_counter()
|
t0 = time.perf_counter()
|
||||||
for i, (features, labels) in enumerate(dl_train):
|
for i, (features, labels) in enumerate(dl_train):
|
||||||
#viz_sample_data(imgs=features, labels=labels, fig_name='../samples/data_sample_epoch{}_noTF'.format(epoch))
|
#viz_sample_data(imgs=features, labels=labels, fig_name='../samples/data_sample_epoch{}_noTF'.format(epoch))
|
||||||
|
@ -168,6 +187,10 @@ def train_classic(model, opt_param, epochs=1, print_freq=1):
|
||||||
loss.backward()
|
loss.backward()
|
||||||
optim.step()
|
optim.step()
|
||||||
|
|
||||||
|
|
||||||
|
if inner_scheduler is not None:
|
||||||
|
inner_scheduler.step()
|
||||||
|
|
||||||
#### Tests ####
|
#### Tests ####
|
||||||
tf = time.perf_counter()
|
tf = time.perf_counter()
|
||||||
|
|
||||||
|
@ -175,15 +198,6 @@ def train_classic(model, opt_param, epochs=1, print_freq=1):
|
||||||
accuracy, f1 =test(model)
|
accuracy, f1 =test(model)
|
||||||
model.train()
|
model.train()
|
||||||
|
|
||||||
#### Print ####
|
|
||||||
if(print_freq and epoch%print_freq==0):
|
|
||||||
print('-'*9)
|
|
||||||
print('Epoch : %d/%d'%(epoch,epochs))
|
|
||||||
print('Time : %.00f'%(tf - t0))
|
|
||||||
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
|
|
||||||
print('Accuracy max:', accuracy)
|
|
||||||
print('F1 :', ["{0:0.4f}".format(i) for i in f1])
|
|
||||||
|
|
||||||
#### Log ####
|
#### Log ####
|
||||||
data={
|
data={
|
||||||
"epoch": epoch,
|
"epoch": epoch,
|
||||||
|
@ -196,6 +210,14 @@ def train_classic(model, opt_param, epochs=1, print_freq=1):
|
||||||
"param": None,
|
"param": None,
|
||||||
}
|
}
|
||||||
log.append(data)
|
log.append(data)
|
||||||
|
#### Print ####
|
||||||
|
if(print_freq and epoch%print_freq==0):
|
||||||
|
print('-'*9)
|
||||||
|
print('Epoch : %d/%d'%(epoch,epochs))
|
||||||
|
print('Time : %.00f'%(tf - t0))
|
||||||
|
print('Train loss :',loss.item(), '/ val loss', val_loss.item())
|
||||||
|
print('Accuracy max:', max([x["acc"] for x in log]))
|
||||||
|
print('F1 :', ["{0:0.4f}".format(i) for i in f1])
|
||||||
|
|
||||||
return log
|
return log
|
||||||
|
|
||||||
|
@ -236,7 +258,7 @@ def run_dist_dataugV3(model, opt_param, epochs=1, inner_it=1, dataug_epoch_start
|
||||||
|
|
||||||
## Optimizers ##
|
## Optimizers ##
|
||||||
#Inner Opt
|
#Inner Opt
|
||||||
optim = torch.optim.SGD(model.parameters(),
|
inner_opt = torch.optim.SGD(model['model']['original'].parameters(),
|
||||||
lr=opt_param['Inner']['lr'],
|
lr=opt_param['Inner']['lr'],
|
||||||
momentum=opt_param['Inner']['momentum'],
|
momentum=opt_param['Inner']['momentum'],
|
||||||
weight_decay=opt_param['Inner']['decay'],
|
weight_decay=opt_param['Inner']['decay'],
|
||||||
|
@ -247,6 +269,21 @@ def run_dist_dataugV3(model, opt_param, epochs=1, inner_it=1, dataug_epoch_start
|
||||||
grad_callback=(lambda grads: clip_norm(grads, max_norm=10)),
|
grad_callback=(lambda grads: clip_norm(grads, max_norm=10)),
|
||||||
track_higher_grads=high_grad_track)
|
track_higher_grads=high_grad_track)
|
||||||
|
|
||||||
|
#Scheduler
|
||||||
|
inner_scheduler=None
|
||||||
|
if opt_param['Inner']['scheduler']=='cosine':
|
||||||
|
inner_scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(optim, T_max=epochs, eta_min=0.)
|
||||||
|
elif opt_param['Inner']['scheduler']=='multiStep':
|
||||||
|
#Multistep milestones inspired by AutoAugment
|
||||||
|
inner_scheduler=torch.optim.lr_scheduler.MultiStepLR(optim,
|
||||||
|
milestones=[int(epochs/3), int(epochs*2/3), int(epochs*2.7/3)],
|
||||||
|
gamma=0.1)
|
||||||
|
elif opt_param['Inner']['scheduler']=='exponential':
|
||||||
|
#inner_scheduler=torch.optim.lr_scheduler.ExponentialLR(optim, gamma=0.1) #Wrong gamma
|
||||||
|
inner_scheduler=torch.optim.lr_scheduler.LambdaLR(optim, lambda epoch: (1 - epoch / epochs) ** 0.9)
|
||||||
|
elif opt_param['Inner']['scheduler'] is not None:
|
||||||
|
raise ValueError("Lr scheduler unknown : %s"%opt_param['Inner']['scheduler'])
|
||||||
|
|
||||||
#Meta Opt
|
#Meta Opt
|
||||||
hyper_param = list(model['data_aug'].parameters())
|
hyper_param = list(model['data_aug'].parameters())
|
||||||
if hp_opt :
|
if hp_opt :
|
||||||
|
@ -286,7 +323,7 @@ def run_dist_dataugV3(model, opt_param, epochs=1, inner_it=1, dataug_epoch_start
|
||||||
#print_graph(loss) #to visualize computational graph
|
#print_graph(loss) #to visualize computational graph
|
||||||
|
|
||||||
#t = time.process_time()
|
#t = time.process_time()
|
||||||
diffopt.step(loss) #(opt.zero_grad, loss.backward, opt.step)
|
diffopt.step(loss)#(opt.zero_grad, loss.backward, opt.step)
|
||||||
#print(len(model['model']['functional']._fast_params),"step", time.process_time()-t)
|
#print(len(model['model']['functional']._fast_params),"step", time.process_time()-t)
|
||||||
|
|
||||||
|
|
||||||
|
@ -318,6 +355,13 @@ def run_dist_dataugV3(model, opt_param, epochs=1, inner_it=1, dataug_epoch_start
|
||||||
|
|
||||||
tf = time.perf_counter()
|
tf = time.perf_counter()
|
||||||
|
|
||||||
|
if inner_scheduler is not None:
|
||||||
|
inner_scheduler.step()
|
||||||
|
#Transfer inner_opt lr to diffopt
|
||||||
|
for diff_param_group in diffopt.param_groups:
|
||||||
|
for param_group in inner_opt.param_groups:
|
||||||
|
diff_param_group['lr'] = param_group['lr']
|
||||||
|
|
||||||
if (save_sample_freq and epoch%save_sample_freq==0): #Data sample saving
|
if (save_sample_freq and epoch%save_sample_freq==0): #Data sample saving
|
||||||
try:
|
try:
|
||||||
viz_sample_data(imgs=xs, labels=ys, fig_name='../samples/data_sample_epoch{}_noTF'.format(epoch))
|
viz_sample_data(imgs=xs, labels=ys, fig_name='../samples/data_sample_epoch{}_noTF'.format(epoch))
|
||||||
|
@ -396,6 +440,8 @@ def run_simple_smartaug(model, opt_param, epochs=1, inner_it=1, print_freq=1, un
|
||||||
Training loss can either be computed directly from augmented inputs (unsup_loss=0).
|
Training loss can either be computed directly from augmented inputs (unsup_loss=0).
|
||||||
However, it is recommended to use the mixed loss computation, which combine original and augmented inputs to compute the loss (unsup_loss>0).
|
However, it is recommended to use the mixed loss computation, which combine original and augmented inputs to compute the loss (unsup_loss>0).
|
||||||
|
|
||||||
|
Does not support LR scheduler.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
model (nn.Module): Augmented model to train.
|
model (nn.Module): Augmented model to train.
|
||||||
opt_param (dict): Dictionnary containing optimizers parameters.
|
opt_param (dict): Dictionnary containing optimizers parameters.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue