mirror of
https://github.com/AntoineHX/smart_augmentation.git
synced 2025-05-04 20:20:46 +02:00
Rangement
This commit is contained in:
parent
ca3367d19f
commit
4166922c34
453 changed files with 9797 additions and 7 deletions
5
Old/Gradient-Descent-The-Ultimate-Optimizer/.gitignore
vendored
Executable file
5
Old/Gradient-Descent-The-Ultimate-Optimizer/.gitignore
vendored
Executable file
|
@ -0,0 +1,5 @@
|
|||
venv/
|
||||
__pycache__
|
||||
data/
|
||||
log/
|
||||
.vscode/
|
BIN
Old/Gradient-Descent-The-Ultimate-Optimizer/20190929-paper.pdf
Executable file
BIN
Old/Gradient-Descent-The-Ultimate-Optimizer/20190929-paper.pdf
Executable file
Binary file not shown.
33
Old/Gradient-Descent-The-Ultimate-Optimizer/README.md
Executable file
33
Old/Gradient-Descent-The-Ultimate-Optimizer/README.md
Executable file
|
@ -0,0 +1,33 @@
|
|||
# Gradient Descent: The Ultimate Optimizer
|
||||
|
||||
[](https://github.com/ambv/black)
|
||||
|
||||
| ⚠️ WARNING: THIS IS NOT MY WORK ⚠️ |
|
||||
| --- |
|
||||
|
||||
This repository contains the paper and code to the paper [Gradient Descent:
|
||||
The Ultimate Optimizer](https://arxiv.org/abs/1909.13371).
|
||||
|
||||
I couldn't find the code (which is found in the appendix at the end of the
|
||||
paper) anywhere on the web. What I present here is the code of the paper with
|
||||
instructions on how to set it up.
|
||||
|
||||
Getting the code in a runnable state required some fixes on my part so the
|
||||
code might be slightly different than that presented in the paper.
|
||||
|
||||
## Set up
|
||||
|
||||
```sh
|
||||
git clone https://github.com/Rainymood/Gradient-Descent-The-Ultimate-Optimizer
|
||||
cd Gradient-Descent-The-Ultimate-Optimizer
|
||||
virtualenv -p python3 venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
python main.py
|
||||
```
|
||||
|
||||
When you are done you can exit the virtualenv with
|
||||
|
||||
```shell
|
||||
deactivate
|
||||
```
|
244
Old/Gradient-Descent-The-Ultimate-Optimizer/data_aug.py
Executable file
244
Old/Gradient-Descent-The-Ultimate-Optimizer/data_aug.py
Executable file
|
@ -0,0 +1,244 @@
|
|||
from hyperopt import *
|
||||
#from hyperopt_v2 import *
|
||||
|
||||
import torchvision.transforms.functional as TF
|
||||
import torchvision.transforms as T
|
||||
|
||||
#from scipy import ndimage
|
||||
import kornia
|
||||
|
||||
import random
|
||||
|
||||
|
||||
class MNIST_FullyConnected_Augmented(Optimizable):
|
||||
"""
|
||||
A fully-connected NN for the MNIST task. This is Optimizable but not itself
|
||||
an optimizer.
|
||||
"""
|
||||
|
||||
def __init__(self, num_inp, num_hid, num_out, optimizer, device = torch.device('cuda')):
|
||||
self.device = device
|
||||
#print(self.device)
|
||||
parameters = {
|
||||
"w1": torch.zeros(num_inp, num_hid, device=self.device).t(),
|
||||
"b1": torch.zeros(num_hid, device=self.device).t(),
|
||||
"w2": torch.zeros(num_hid, num_out, device=self.device).t(),
|
||||
"b2": torch.zeros(num_out, device=self.device).t(),
|
||||
|
||||
#Data augmentation
|
||||
"prob": torch.tensor(0.5, device=self.device),
|
||||
"mag": torch.tensor(180.0, device=self.device),
|
||||
}
|
||||
super().__init__(parameters, optimizer)
|
||||
|
||||
def initialize(self):
|
||||
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
|
||||
self.optimizer.initialize()
|
||||
#print(self.device)
|
||||
|
||||
def forward(self, x):
|
||||
"""Compute a prediction."""
|
||||
#print("Prob:",self.parameters["prob"].item())
|
||||
if random.random() < self.parameters["prob"]:
|
||||
#angle = 45
|
||||
#x = TF.rotate(x, angle)
|
||||
#print(self.device)
|
||||
#x = F.linear(x, torch.ones(28*28, 28*28, device=self.device).t()*self.parameters["mag"], bias=None)
|
||||
x = x + self.parameters["mag"]
|
||||
|
||||
x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
|
||||
x = torch.tanh(x)
|
||||
x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
|
||||
x = torch.tanh(x)
|
||||
x = F.log_softmax(x, dim=1)
|
||||
return x
|
||||
|
||||
def adjust(self):
|
||||
self.optimizer.adjust(self.parameters)
|
||||
|
||||
def __str__(self):
|
||||
return "mnist_FC_augmented / " + str(self.optimizer)
|
||||
|
||||
class LeNet(Optimizable, nn.Module):
|
||||
def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
|
||||
nn.Module.__init__(self)
|
||||
self.device = device
|
||||
parameters = {
|
||||
"w1": torch.zeros(20, num_inp, 5, 5, device=self.device),
|
||||
"b1": torch.zeros(20, device=self.device),
|
||||
"w2": torch.zeros(50, 20, 5, 5, device=self.device),
|
||||
"b2": torch.zeros(50, device=self.device),
|
||||
"w3": torch.zeros(500,4*4*50, device=self.device),
|
||||
"b3": torch.zeros(500, device=self.device),
|
||||
"w4": torch.zeros(10, 500, device=self.device),
|
||||
"b4": torch.zeros(10, device=self.device),
|
||||
|
||||
#Data augmentation
|
||||
"prob": torch.tensor(1.0, device=self.device),
|
||||
"mag": torch.tensor(180.0, device=self.device),
|
||||
}
|
||||
super().__init__(parameters, optimizer)
|
||||
|
||||
def initialize(self):
|
||||
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.parameters["w3"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.parameters["w4"], a=math.sqrt(5))
|
||||
self.optimizer.initialize()
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
if random.random() < self.parameters["prob"]:
|
||||
|
||||
batch_size = x.shape[0]
|
||||
# create transformation (rotation)
|
||||
alpha = self.parameters["mag"] # in degrees
|
||||
angle = torch.ones(batch_size, device=self.device) * alpha
|
||||
|
||||
# define the rotation center
|
||||
center = torch.ones(batch_size, 2, device=self.device)
|
||||
center[..., 0] = x.shape[3] / 2 # x
|
||||
center[..., 1] = x.shape[2] / 2 # y
|
||||
|
||||
#print(x.shape, center)
|
||||
# define the scale factor
|
||||
scale = torch.ones(batch_size, device=self.device)
|
||||
|
||||
# compute the transformation matrix
|
||||
M = kornia.get_rotation_matrix2d(center, angle, scale)
|
||||
|
||||
# apply the transformation to original image
|
||||
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
|
||||
|
||||
#print("Start Shape ", x.shape)
|
||||
out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = out.view(out.size(0), -1)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
|
||||
#print("Shape ", out.shape)
|
||||
return F.log_softmax(out, dim=1)
|
||||
|
||||
def adjust(self):
|
||||
self.optimizer.adjust(self.parameters)
|
||||
|
||||
def __str__(self):
|
||||
return "mnist_CNN_augmented / " + str(self.optimizer)
|
||||
|
||||
class LeNet_v2(Optimizable, nn.Module):
|
||||
def __init__(self, num_inp, num_out, optimizer, device = torch.device('cuda')):
|
||||
|
||||
nn.Module.__init__(self)
|
||||
self.device = device
|
||||
self.conv1 = nn.Conv2d(num_inp, 20, 5, 1)
|
||||
self.conv2 = nn.Conv2d(20, 50, 5, 1)
|
||||
#self.fc1 = nn.Linear(4*4*50, 500)
|
||||
self.fc1 = nn.Linear(1250, 500)
|
||||
self.fc2 = nn.Linear(500, 10)
|
||||
|
||||
#print(self.conv1.weight)
|
||||
parameters = {
|
||||
"w1": self.conv1.weight,
|
||||
"b1": self.conv1.bias,
|
||||
"w2": self.conv2.weight,
|
||||
"b2": self.conv2.bias,
|
||||
"w3": self.fc1.weight,
|
||||
"b3": self.fc1.bias,
|
||||
"w4": self.fc2.weight,
|
||||
"b4": self.fc2.bias,
|
||||
|
||||
#Data augmentation
|
||||
"prob": torch.tensor(0.5, device=self.device),
|
||||
"mag": torch.tensor(1.0, device=self.device),
|
||||
}
|
||||
Optimizable.__init__(self, parameters, optimizer)
|
||||
|
||||
'''
|
||||
def forward(self, x): #Sature la memoire ???
|
||||
x = F.relu(self.conv1(x))
|
||||
x = F.max_pool2d(x, 2, 2)
|
||||
x = F.relu(self.conv2(x))
|
||||
x = F.max_pool2d(x, 2, 2)
|
||||
#x = x.view(-1, 4*4*50)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = self.fc2(x)
|
||||
return F.log_softmax(x, dim=1)
|
||||
'''
|
||||
def forward(self, x):
|
||||
|
||||
if random.random() < self.parameters["prob"].item():
|
||||
#print(self.parameters["prob"])
|
||||
#x = [T.ToTensor()(
|
||||
# TF.affine(img=T.ToPILImage()(im), angle=self.parameters["mag"], translate=(0,0), scale=1, shear=0, resample=0, fillcolor=None))
|
||||
# for im in torch.unbind(x,dim=0)]
|
||||
#x = torch.stack(x,dim=0)
|
||||
|
||||
#x = [ndimage.rotate(im, self.parameters["mag"], reshape=False)
|
||||
# for im in torch.unbind(x,dim=0)]
|
||||
#x = torch.stack(x,dim=0)
|
||||
|
||||
#x = [im + self.parameters["mag"]
|
||||
# for im in torch.unbind(x,dim=0)]
|
||||
#x = torch.stack(x,dim=0)
|
||||
|
||||
batch_size = x.shape[0]
|
||||
# create transformation (rotation)
|
||||
alpha = self.parameters["mag"] * 180 # in degrees
|
||||
angle = torch.ones(batch_size, device=self.device) * alpha
|
||||
|
||||
# define the rotation center
|
||||
center = torch.ones(batch_size, 2, device=self.device)
|
||||
center[..., 0] = x.shape[3] / 2 # x
|
||||
center[..., 1] = x.shape[2] / 2 # y
|
||||
|
||||
#print(x.shape, center)
|
||||
# define the scale factor
|
||||
scale = torch.ones(batch_size, device=self.device)
|
||||
|
||||
# compute the transformation matrix
|
||||
M = kornia.get_rotation_matrix2d(center, angle, scale)
|
||||
|
||||
# apply the transformation to original image
|
||||
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
|
||||
|
||||
#print("Start Shape ", x.shape)
|
||||
out = F.relu(F.conv2d(input=x, weight=self.parameters["w1"], bias=self.parameters["b1"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.conv2d(input=out, weight=self.parameters["w2"], bias=self.parameters["b2"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = out.view(out.size(0), -1)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.linear(out, self.parameters["w3"], self.parameters["b3"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.linear(out, self.parameters["w4"], self.parameters["b4"])
|
||||
#print("Shape ", out.shape)
|
||||
return F.log_softmax(out, dim=1)
|
||||
|
||||
def initialize(self):
|
||||
self.optimizer.initialize()
|
||||
|
||||
def adjust(self):
|
||||
self.optimizer.adjust(self.parameters)
|
||||
|
||||
def adjust_val(self):
|
||||
self.optimizer.adjust_val(self.parameters)
|
||||
|
||||
def eval(self):
|
||||
self.parameters['prob']=torch.tensor(0.0, device=self.device)
|
||||
|
||||
def __str__(self):
|
||||
return "mnist_CNN_augmented / " + str(self.optimizer)
|
52
Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug.py
Executable file
52
Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug.py
Executable file
|
@ -0,0 +1,52 @@
|
|||
import torch
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from torchvision import transforms
|
||||
import torchvision.transforms.functional as TF
|
||||
|
||||
class MNIST_aug(Dataset):
|
||||
|
||||
training_file = 'training.pt'
|
||||
test_file = 'test.pt'
|
||||
classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four',
|
||||
'5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
|
||||
|
||||
def __init__(self):
|
||||
self.images = [TF.to_pil_image(x) for x in torch.ByteTensor(10, 3, 48, 48)]
|
||||
self.set_stage(0) # initial stage
|
||||
|
||||
def __getitem__(self, index):
|
||||
image = self.images[index]
|
||||
|
||||
# Just apply your transformations here
|
||||
image = self.crop(image)
|
||||
x = TF.to_tensor(image)
|
||||
return x
|
||||
|
||||
def set_stage(self, stage):
|
||||
if stage == 0:
|
||||
print('Using (32, 32) crops')
|
||||
self.crop = transforms.RandomCrop((32, 32))
|
||||
elif stage == 1:
|
||||
print('Using (28, 28) crops')
|
||||
self.crop = transforms.RandomCrop((28, 28))
|
||||
|
||||
def __len__(self):
|
||||
return len(self.images)
|
||||
|
||||
|
||||
dataset = MyData()
|
||||
loader = DataLoader(dataset,
|
||||
batch_size=2,
|
||||
num_workers=2,
|
||||
shuffle=True)
|
||||
|
||||
for batch_idx, data in enumerate(loader):
|
||||
print('Batch idx {}, data shape {}'.format(
|
||||
batch_idx, data.shape))
|
||||
|
||||
loader.dataset.set_stage(1)
|
||||
|
||||
for batch_idx, data in enumerate(loader):
|
||||
print('Batch idx {}, data shape {}'.format(
|
||||
batch_idx, data.shape))
|
||||
|
150
Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug_v2.py
Executable file
150
Old/Gradient-Descent-The-Ultimate-Optimizer/dataset_aug_v2.py
Executable file
|
@ -0,0 +1,150 @@
|
|||
#from hyperopt import *
|
||||
from hyperopt_v2 import *
|
||||
|
||||
import torchvision.transforms.functional as TF
|
||||
import torchvision.transforms as T
|
||||
|
||||
#from scipy import ndimage
|
||||
import kornia
|
||||
|
||||
import random
|
||||
|
||||
|
||||
class LeNet_v3(nn.Module):
|
||||
def __init__(self, num_inp, num_out):
|
||||
super(LeNet_v3, self).__init__()
|
||||
self.params = nn.ParameterDict({
|
||||
'w1': nn.Parameter(torch.zeros(20, num_inp, 5, 5)),
|
||||
'b1': nn.Parameter(torch.zeros(20)),
|
||||
'w2': nn.Parameter(torch.zeros(50, 20, 5, 5)),
|
||||
'b2': nn.Parameter(torch.zeros(50)),
|
||||
'w3': nn.Parameter(torch.zeros(500,4*4*50)),
|
||||
'b3': nn.Parameter(torch.zeros(500)),
|
||||
'w4': nn.Parameter(torch.zeros(10, 500)),
|
||||
'b4': nn.Parameter(torch.zeros(10))
|
||||
})
|
||||
|
||||
|
||||
def initialize(self):
|
||||
nn.init.kaiming_uniform_(self.params["w1"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.params["w2"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.params["w3"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.params["w4"], a=math.sqrt(5))
|
||||
|
||||
def forward(self, x):
|
||||
#print("Start Shape ", x.shape)
|
||||
out = F.relu(F.conv2d(input=x, weight=self.params["w1"], bias=self.params["b1"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.conv2d(input=out, weight=self.params["w2"], bias=self.params["b2"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.max_pool2d(out, 2)
|
||||
#print("Shape ", out.shape)
|
||||
out = out.view(out.size(0), -1)
|
||||
#print("Shape ", out.shape)
|
||||
out = F.relu(F.linear(out, self.params["w3"], self.params["b3"]))
|
||||
#print("Shape ", out.shape)
|
||||
out = F.linear(out, self.params["w4"], self.params["b4"])
|
||||
#print("Shape ", out.shape)
|
||||
return F.log_softmax(out, dim=1)
|
||||
|
||||
|
||||
def print_grad_fn(self):
|
||||
for n, p in self.params.items():
|
||||
print(n, p.grad_fn)
|
||||
|
||||
def __str__(self):
|
||||
return "mnist_CNN_augmented / "
|
||||
|
||||
class Data_aug(nn.Module):
|
||||
def __init__(self):
|
||||
super(Data_aug, self).__init__()
|
||||
self.data_augmentation = True
|
||||
self.params = nn.ParameterDict({
|
||||
"prob": nn.Parameter(torch.tensor(0.5)),
|
||||
"mag": nn.Parameter(torch.tensor(180.0))
|
||||
})
|
||||
|
||||
#self.params["mag"].register_hook(print)
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
if self.data_augmentation and self.training and random.random() < self.params["prob"]:
|
||||
#print('Aug')
|
||||
batch_size = x.shape[0]
|
||||
# create transformation (rotation)
|
||||
alpha = self.params["mag"] # in degrees
|
||||
angle = torch.ones(batch_size, device=x.device) * alpha
|
||||
|
||||
# define the rotation center
|
||||
center = torch.ones(batch_size, 2, device=x.device)
|
||||
center[..., 0] = x.shape[3] / 2 # x
|
||||
center[..., 1] = x.shape[2] / 2 # y
|
||||
|
||||
#print(x.shape, center)
|
||||
# define the scale factor
|
||||
scale = torch.ones(batch_size, device=x.device)
|
||||
|
||||
# compute the transformation matrix
|
||||
M = kornia.get_rotation_matrix2d(center, angle, scale)
|
||||
|
||||
# apply the transformation to original image
|
||||
x = kornia.warp_affine(x, M, dsize=(x.shape[2], x.shape[3])) #dsize=(h, w)
|
||||
|
||||
return x
|
||||
|
||||
def eval(self):
|
||||
self.params['prob']=torch.tensor(0.0, device=self.device)
|
||||
nn.Module.eval(self)
|
||||
|
||||
def data_augmentation(self, mode=True):
|
||||
self.data_augmentation=mode
|
||||
|
||||
def print_grad_fn(self):
|
||||
for n, p in self.params.items():
|
||||
print(n, p.grad_fn)
|
||||
|
||||
def __str__(self):
|
||||
return "Data_Augmenter / "
|
||||
|
||||
class Augmented_model(nn.Module):
|
||||
def __init__(self, model, data_augmenter):
|
||||
#self.model = model
|
||||
#self.data_aug = data_augmenter
|
||||
super(Augmented_model, self).__init__()#nn.Module.__init__(self)
|
||||
#super().__init__()
|
||||
self.mods = nn.ModuleDict({
|
||||
'data_aug': data_augmenter,
|
||||
'model': model
|
||||
})
|
||||
#for name, param in self.mods.named_parameters():
|
||||
# print(name, type(param.data), param.size())
|
||||
|
||||
#params = self.mods.named_parameters() #self.parameters()
|
||||
#parameters = [param for param in self.model.parameters()] + [param for param in self.data_aug.parameters()]
|
||||
#Optimizable.__init__(self, params, optimizer)
|
||||
|
||||
def initialize(self):
|
||||
self.mods['model'].initialize()
|
||||
|
||||
def forward(self, x):
|
||||
return self.mods['model'](self.mods['data_aug'](x))
|
||||
|
||||
#def adjust(self):
|
||||
# self.optimizer.adjust(self) #Parametres des dict
|
||||
|
||||
def data_augmentation(self, mode=True):
|
||||
self.mods['data_aug'].data_augmentation=mode
|
||||
|
||||
def begin(self):
|
||||
for param in self.parameters():
|
||||
param.requires_grad_() # keep gradient information…
|
||||
param.retain_grad() # even if not a leaf…
|
||||
|
||||
def print_grad_fn(self):
|
||||
for n, m in self.mods.items():
|
||||
m.print_grad_fn()
|
||||
|
||||
def __str__(self):
|
||||
return str(self.mods['data_aug'])+ str(self.mods['model'])# + str(self.optimizer)
|
5
Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph
Executable file
5
Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph
Executable file
|
@ -0,0 +1,5 @@
|
|||
digraph {
|
||||
graph [size="12,12"]
|
||||
node [align=left fontsize=12 height=0.2 ranksep=0.1 shape=box style=filled]
|
||||
94296775052080 [label=NoneType fillcolor=darkolivegreen1]
|
||||
}
|
19
Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph.svg
Executable file
19
Old/Gradient-Descent-The-Ultimate-Optimizer/graph/graph.svg
Executable file
|
@ -0,0 +1,19 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<!-- Generated by graphviz version 2.40.1 (20161225.0304)
|
||||
-->
|
||||
<!-- Title: %3 Pages: 1 -->
|
||||
<svg width="75pt" height="30pt"
|
||||
viewBox="0.00 0.00 74.65 30.40" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 26.4)">
|
||||
<title>%3</title>
|
||||
<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-26.4 70.6472,-26.4 70.6472,4 -4,4"/>
|
||||
<!-- 94296775052080 -->
|
||||
<g id="node1" class="node">
|
||||
<title>94296775052080</title>
|
||||
<polygon fill="#caff70" stroke="#000000" points="66.4717,-22.6036 .1755,-22.6036 .1755,.2036 66.4717,.2036 66.4717,-22.6036"/>
|
||||
<text text-anchor="middle" x="33.3236" y="-7.6" font-family="Times,serif" font-size="12.00" fill="#000000">NoneType</text>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 937 B |
345
Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt.py
Executable file
345
Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt.py
Executable file
|
@ -0,0 +1,345 @@
|
|||
import math
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
|
||||
|
||||
class Optimizable():#nn.Module):
|
||||
"""
|
||||
This is the interface for anything that has parameters that need to be
|
||||
optimized, somewhat like torch.nn.Model but with the right plumbing for
|
||||
hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
|
||||
interface which does not give us enough control about the detachments.)
|
||||
Nominal operation of an Optimizable at the lowest level is as follows:
|
||||
o = MyOptimizable(…)
|
||||
o.initialize()
|
||||
loop {
|
||||
o.begin()
|
||||
o.zero_grad()
|
||||
loss = –compute loss function from parameters–
|
||||
loss.backward()
|
||||
o.adjust()
|
||||
}
|
||||
Optimizables recursively handle updates to their optimiz*ers*.
|
||||
"""
|
||||
#def __init__(self):
|
||||
# super(Optimizable, self).__init__()
|
||||
# self.parameters = nn.Parameter(torch.zeros(()))
|
||||
|
||||
def __init__(self, parameters, optimizer):
|
||||
#super(Optimizable, self).__init__()
|
||||
self.parameters = parameters # a dict mapping names to tensors
|
||||
self.optimizer = optimizer # which must itself be Optimizable!
|
||||
self.all_params_with_gradients = []
|
||||
#self.device = device
|
||||
|
||||
def initialize(self):
|
||||
"""Initialize parameters, e.g. with a Kaiming initializer."""
|
||||
pass
|
||||
|
||||
def begin(self):
|
||||
"""Enable gradient tracking on current parameters."""
|
||||
self.all_params_with_gradients = [] #Reintialisation pour eviter surcharge de la memoire
|
||||
for name, param in self.parameters.items():
|
||||
#for param in self.parameters:
|
||||
param.requires_grad_() # keep gradient information…
|
||||
param.retain_grad() # even if not a leaf…
|
||||
#param.to(self.device)
|
||||
#if param.device == torch.device('cuda:0'):
|
||||
# print(name, param.device)
|
||||
self.all_params_with_gradients.append(param)
|
||||
self.optimizer.begin()
|
||||
|
||||
def zero_grad(self):
|
||||
""" Set all gradients to zero. """
|
||||
for param in self.all_params_with_gradients:
|
||||
#param = param.to(self.device)
|
||||
param.grad = torch.zeros(param.shape, device=param.device)
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
""" Note: at this point you would probably call .backwards() on the loss
|
||||
function. """
|
||||
|
||||
def adjust(self):
|
||||
""" Update parameters """
|
||||
pass
|
||||
|
||||
|
||||
def print_grad_fn(self):
|
||||
self.optimizer.print_grad_fn()
|
||||
for n, p in self.parameters.items():
|
||||
print(n," - ", p.grad_fn)
|
||||
|
||||
def param_grad(self):
|
||||
return self.all_params_with_gradients
|
||||
|
||||
def param(self, param_name):
|
||||
return self.parameters[param_name].item()
|
||||
|
||||
|
||||
class MNIST_FullyConnected(Optimizable):
|
||||
"""
|
||||
A fully-connected NN for the MNIST task. This is Optimizable but not itself
|
||||
an optimizer.
|
||||
"""
|
||||
|
||||
def __init__(self, num_inp, num_hid, num_out, optimizer):
|
||||
parameters = {
|
||||
"w1": torch.zeros(num_inp, num_hid).t(),
|
||||
"b1": torch.zeros(num_hid).t(),
|
||||
"w2": torch.zeros(num_hid, num_out).t(),
|
||||
"b2": torch.zeros(num_out).t(),
|
||||
}
|
||||
super().__init__(parameters, optimizer)
|
||||
|
||||
def initialize(self):
|
||||
nn.init.kaiming_uniform_(self.parameters["w1"], a=math.sqrt(5))
|
||||
nn.init.kaiming_uniform_(self.parameters["w2"], a=math.sqrt(5))
|
||||
self.optimizer.initialize()
|
||||
|
||||
def forward(self, x):
|
||||
"""Compute a prediction."""
|
||||
x = F.linear(x, self.parameters["w1"], self.parameters["b1"])
|
||||
x = torch.tanh(x)
|
||||
x = F.linear(x, self.parameters["w2"], self.parameters["b2"])
|
||||
x = torch.tanh(x)
|
||||
x = F.log_softmax(x, dim=1)
|
||||
return x
|
||||
|
||||
def adjust(self):
|
||||
self.optimizer.adjust(self.parameters)
|
||||
|
||||
def __str__(self):
|
||||
return "mnist / " + str(self.optimizer)
|
||||
|
||||
|
||||
class NoOpOptimizer(Optimizable):#, nn.Module):
|
||||
"""
|
||||
NoOpOptimizer sits on top of a stack, and does not affect what lies below.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
#super(Optimizable, self).__init__()
|
||||
pass
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def begin(self):
|
||||
pass
|
||||
|
||||
def zero_grad(self):
|
||||
pass
|
||||
|
||||
def adjust(self, params):
|
||||
pass
|
||||
|
||||
def adjust_val(self, params):
|
||||
pass
|
||||
|
||||
def print_grad_fn(self):
|
||||
pass
|
||||
|
||||
def __str__(self):
|
||||
return "static"
|
||||
|
||||
class Adam(Optimizable):
|
||||
"""
|
||||
A fully hyperoptimizable Adam optimizer
|
||||
"""
|
||||
|
||||
def clamp(x):
|
||||
return (x.tanh() + 1.0) / 2.0
|
||||
|
||||
def unclamp(y):
|
||||
z = y * 2.0 - 1.0
|
||||
return ((1.0 + z) / (1.0 - z)).log() / 2.0
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
alpha=0.001,
|
||||
beta1=0.9,
|
||||
beta2=0.999,
|
||||
log_eps=-8.0,
|
||||
optimizer=NoOpOptimizer(),
|
||||
device = torch.device('cuda')
|
||||
):
|
||||
self.device = device
|
||||
parameters = {
|
||||
"alpha": torch.tensor(alpha, device=self.device),
|
||||
"beta1": Adam.unclamp(torch.tensor(beta1, device=self.device)),
|
||||
"beta2": Adam.unclamp(torch.tensor(beta2, device=self.device)),
|
||||
"log_eps": torch.tensor(log_eps, device=self.device),
|
||||
}
|
||||
super().__init__(parameters, optimizer)
|
||||
self.num_adjustments = 0
|
||||
self.num_adjustments_val = 0
|
||||
self.cache = {}
|
||||
|
||||
for name, param in parameters.items():
|
||||
param.requires_grad_() # keep gradient information…
|
||||
param.retain_grad() # even if not a leaf…
|
||||
#param.to(self.device)
|
||||
#if param.device == torch.device('cuda:0'):
|
||||
# print(name, param.device)
|
||||
|
||||
def adjust(self, params): #Update param d'apprentissage
|
||||
self.num_adjustments += 1
|
||||
self.optimizer.adjust(self.parameters)
|
||||
#print('Adam update')
|
||||
t = self.num_adjustments
|
||||
beta1 = Adam.clamp(self.parameters["beta1"])
|
||||
beta2 = Adam.clamp(self.parameters["beta2"])
|
||||
for name, param in params.items():
|
||||
if name == "mag": continue
|
||||
if name not in self.cache:
|
||||
self.cache[name] = {
|
||||
"m": torch.zeros(param.shape, device=self.device),
|
||||
"v": torch.zeros(param.shape, device=self.device)
|
||||
+ 10.0 ** self.parameters["log_eps"].data
|
||||
# NOTE that we add a little ‘fudge factor' here because sqrt is not
|
||||
# differentiable at exactly zero
|
||||
}
|
||||
#print(name, param.device)
|
||||
g = param.grad.detach()
|
||||
self.cache[name]["m"] = m = (
|
||||
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
|
||||
)
|
||||
self.cache[name]["v"] = v = (
|
||||
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
|
||||
)
|
||||
self.all_params_with_gradients.append(m)
|
||||
self.all_params_with_gradients.append(v)
|
||||
m_hat = m / (1.0 - beta1 ** float(t))
|
||||
v_hat = v / (1.0 - beta2 ** float(t))
|
||||
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
|
||||
params[name] = param.detach() - self.parameters["alpha"] * dparam
|
||||
#print(name)
|
||||
|
||||
def adjust_val(self, params): #Update param Transformations
|
||||
self.num_adjustments_val += 1
|
||||
self.optimizer.adjust_val(self.parameters)
|
||||
#print('Adam update')
|
||||
t = self.num_adjustments_val
|
||||
beta1 = Adam.clamp(self.parameters["beta1"])
|
||||
beta2 = Adam.clamp(self.parameters["beta2"])
|
||||
for name, param in params.items():
|
||||
if name != "mag": continue
|
||||
if name not in self.cache:
|
||||
self.cache[name] = {
|
||||
"m": torch.zeros(param.shape, device=self.device),
|
||||
"v": torch.zeros(param.shape, device=self.device)
|
||||
+ 10.0 ** self.parameters["log_eps"].data
|
||||
# NOTE that we add a little ‘fudge factor' here because sqrt is not
|
||||
# differentiable at exactly zero
|
||||
}
|
||||
#print(name, param.device)
|
||||
g = param.grad.detach()
|
||||
self.cache[name]["m"] = m = (
|
||||
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
|
||||
)
|
||||
self.cache[name]["v"] = v = (
|
||||
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
|
||||
)
|
||||
self.all_params_with_gradients.append(m)
|
||||
self.all_params_with_gradients.append(v)
|
||||
m_hat = m / (1.0 - beta1 ** float(t))
|
||||
v_hat = v / (1.0 - beta2 ** float(t))
|
||||
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.parameters["log_eps"])
|
||||
params[name] = param.detach() - self.parameters["alpha"] * dparam
|
||||
#print(name)
|
||||
|
||||
def __str__(self):
|
||||
return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
|
||||
'''
|
||||
class SGD(Optimizable):
|
||||
"""
|
||||
A hyperoptimizable SGD
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=0.01, optimizer=NoOpOptimizer()):
|
||||
parameters = {"alpha": torch.tensor(alpha)}
|
||||
super().__init__(parameters, optimizer)
|
||||
|
||||
def adjust(self, params):
|
||||
self.optimizer.adjust(self.parameters)
|
||||
for name, param in params.items():
|
||||
g = param.grad.detach()
|
||||
params[name] = param.detach() - g * self.parameters["alpha"]
|
||||
|
||||
def __str__(self):
|
||||
return "sgd(%f) / " % self.parameters["alpha"] + str(self.optimizer)
|
||||
|
||||
class SGDPerParam(Optimizable):
|
||||
"""
|
||||
Like above, but can be taught a separate step size for each parameter it
|
||||
tunes.
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=0.01, params=[], optimizer=NoOpOptimizer()):
|
||||
parameters = {name + "_alpha": torch.tensor(alpha) for name in params}
|
||||
super().__init__(parameters, optimizer)
|
||||
|
||||
def adjust(self, params):
|
||||
self.optimizer.adjust(self.parameters)
|
||||
for name, param in params.items():
|
||||
g = param.grad.detach()
|
||||
params[name] = param.detach() - g * self.parameters[name + "_alpha"]
|
||||
|
||||
def __str__(self):
|
||||
return "sgd(%s) / " % str(
|
||||
{k: t.item() for k, t in self.parameters.items()}
|
||||
) + str(self.optimizer)
|
||||
'''
|
||||
'''
|
||||
class AdamBaydin(Optimizable):
|
||||
""" Same as above, but only optimizes the learning rate, treating the
|
||||
remaining hyperparameters as constants. """
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
alpha=0.001,
|
||||
beta1=0.9,
|
||||
beta2=0.999,
|
||||
log_eps=-8.0,
|
||||
optimizer=NoOpOptimizer(),
|
||||
):
|
||||
parameters = {"alpha": torch.tensor(alpha)}
|
||||
self.beta1 = beta1
|
||||
self.beta2 = beta2
|
||||
self.log_eps = log_eps
|
||||
super().__init__(parameters, optimizer)
|
||||
self.num_adjustments = 0
|
||||
self.cache = {}
|
||||
|
||||
def adjust(self, params):
|
||||
self.num_adjustments += 1
|
||||
self.optimizer.adjust(self.parameters)
|
||||
t = self.num_adjustments
|
||||
beta1 = self.beta1
|
||||
beta2 = self.beta2
|
||||
for name, param in params.items():
|
||||
if name not in self.cache:
|
||||
self.cache[name] = {
|
||||
"m": torch.zeros(param.shape),
|
||||
"v": torch.zeros(param.shape) + 10.0 ** self.log_eps,
|
||||
}
|
||||
g = param.grad.detach()
|
||||
self.cache[name]["m"] = m = (
|
||||
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
|
||||
)
|
||||
self.cache[name]["v"] = v = (
|
||||
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
|
||||
)
|
||||
self.all_params_with_gradients.append(m)
|
||||
self.all_params_with_gradients.append(v)
|
||||
m_hat = m / (1.0 - beta1 ** float(t))
|
||||
v_hat = v / (1.0 - beta2 ** float(t))
|
||||
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.log_eps)
|
||||
params[name] = param.detach() - self.parameters["alpha"] * dparam
|
||||
|
||||
def __str__(self):
|
||||
return "adam(" + str(self.parameters) + ") / " + str(self.optimizer)
|
||||
'''
|
296
Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt_v2.py
Executable file
296
Old/Gradient-Descent-The-Ultimate-Optimizer/hyperopt_v2.py
Executable file
|
@ -0,0 +1,296 @@
|
|||
import math
|
||||
import torch
|
||||
import torchvision
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.optim.optimizer import Optimizer
|
||||
|
||||
class Optimizable():
|
||||
"""
|
||||
This is the interface for anything that has parameters that need to be
|
||||
optimized, somewhat like torch.nn.Model but with the right plumbing for
|
||||
hyperoptimizability. (Specifically, torch.nn.Model uses the Parameter
|
||||
interface which does not give us enough control about the detachments.)
|
||||
Nominal operation of an Optimizable at the lowest level is as follows:
|
||||
o = MyOptimizable(…)
|
||||
o.initialize()
|
||||
loop {
|
||||
o.begin()
|
||||
o.zero_grad()
|
||||
loss = –compute loss function from parameters–
|
||||
loss.backward()
|
||||
o.adjust()
|
||||
}
|
||||
Optimizables recursively handle updates to their optimiz*ers*.
|
||||
"""
|
||||
#def __init__(self):
|
||||
# super(Optimizable, self).__init__()
|
||||
# self.parameters = nn.Parameter(torch.zeros(()))
|
||||
|
||||
def __init__(self, parameters, optimizer):
|
||||
self.params = parameters # a dict mapping names to tensors
|
||||
self.optimizer = optimizer # which must itself be Optimizable!
|
||||
self.all_params_with_gradients = []
|
||||
#self.device = device
|
||||
|
||||
def initialize(self):
|
||||
"""Initialize parameters, e.g. with a Kaiming initializer."""
|
||||
pass
|
||||
|
||||
def begin(self):
|
||||
"""Enable gradient tracking on current parameters."""
|
||||
self.all_params_with_gradients = nn.ParameterList() #Reintialisation pour eviter surcharge de la memoire
|
||||
print("Opti param :", type(self.params))
|
||||
#for name, param in self.params:
|
||||
if isinstance(self.params,dict): #Dict
|
||||
for name, param in self.params:
|
||||
param.requires_grad_() # keep gradient information…
|
||||
param.retain_grad() # even if not a leaf…
|
||||
self.all_params_with_gradients.append(param)
|
||||
if isinstance(self.params,list): #List
|
||||
for param in self.params:
|
||||
param.requires_grad_() # keep gradient information…
|
||||
param.retain_grad() # even if not a leaf…
|
||||
self.all_params_with_gradients.append(param)
|
||||
self.optimizer.begin()
|
||||
|
||||
def zero_grad(self):
|
||||
""" Set all gradients to zero. """
|
||||
for param in self.all_params_with_gradients:
|
||||
param.grad = torch.zeros(param.shape, device=param.device)
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
""" Note: at this point you would probably call .backwards() on the loss
|
||||
function. """
|
||||
|
||||
def adjust(self):
|
||||
""" Update parameters """
|
||||
pass
|
||||
|
||||
|
||||
class NoOpOptimizer(Optimizable):#, nn.Module):
|
||||
"""
|
||||
NoOpOptimizer sits on top of a stack, and does not affect what lies below.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
#super(Optimizable, self).__init__()
|
||||
pass
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def begin(self):
|
||||
#print("NoOpt begin")
|
||||
pass
|
||||
|
||||
def zero_grad(self):
|
||||
pass
|
||||
|
||||
def adjust(self, params):
|
||||
pass
|
||||
|
||||
def step(self):
|
||||
pass
|
||||
|
||||
def print_grad_fn(self):
|
||||
pass
|
||||
|
||||
def __str__(self):
|
||||
return "static"
|
||||
|
||||
|
||||
class SGD(Optimizer, nn.Module): #Eviter Optimizer
|
||||
"""
|
||||
A hyperoptimizable SGD
|
||||
"""
|
||||
|
||||
def __init__(self, params, lr=0.01, height=0):
|
||||
self.height=height
|
||||
#params : a optimiser
|
||||
#reste (defaults) param de l'opti
|
||||
print('SGD - H', height)
|
||||
nn.Module.__init__(self)
|
||||
|
||||
optim_keys = ('lr','') #A mettre dans Optimizable ? #'' pour eviter iteration dans la chaine de charactere...
|
||||
'''
|
||||
self_params = {"lr": torch.tensor(lr),
|
||||
"momentum": 0,
|
||||
"dampening":0,
|
||||
"weight_decay":0,
|
||||
"nesterov": False}
|
||||
'''
|
||||
#self_params = dict(lr=torch.tensor(lr),
|
||||
# momentum=0, dampening=0, weight_decay=0, nesterov=False)
|
||||
|
||||
self_params = nn.ParameterDict({
|
||||
"lr": nn.Parameter(torch.tensor(lr)),
|
||||
"momentum": nn.Parameter(torch.tensor(0.0)),
|
||||
"dampening": nn.Parameter(torch.tensor(0.0)),
|
||||
"weight_decay": nn.Parameter(torch.tensor(0.0)),
|
||||
})
|
||||
|
||||
for k in self_params.keys() & optim_keys:
|
||||
self_params[k].requires_grad_() # keep gradient information…
|
||||
self_params[k].retain_grad() # even if not a leaf…
|
||||
#self_params[k].register_hook(print)
|
||||
|
||||
if height==0:
|
||||
optimizer = NoOpOptimizer()
|
||||
else:
|
||||
#def dict_generator(): yield {k: self_params[k] for k in self_params.keys() & optim_keys}
|
||||
#(dict for dict in {k: self_params[k] for k in self_params.keys() & optim_keys}) #Devrait mar
|
||||
optimizer = SGD(params=(self_params[k]for k in self_params.keys() & optim_keys), lr=lr, height=height-1)
|
||||
#optimizer.register_backward_hook(print)
|
||||
|
||||
self.optimizer = optimizer
|
||||
#if(height==0):
|
||||
# for n,p in params.items():
|
||||
# print(n,p)
|
||||
|
||||
#Optimizable.__init__(self, self_params, optimizer)
|
||||
|
||||
#print(type(params))
|
||||
#for p in params:
|
||||
# print(type(p))
|
||||
Optimizer.__init__(self, params, self_params)
|
||||
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
print(type(p.data), p.size())
|
||||
print('End SGD-H', height)
|
||||
|
||||
def begin(self):
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
#print(type(p.data), p.size())
|
||||
p.requires_grad_() # keep gradient information…
|
||||
p.retain_grad() # even if not a leaf…
|
||||
#p.register_hook(lambda x: print(self.height, x.grad_fn))
|
||||
|
||||
self.optimizer.begin()
|
||||
|
||||
def print_grad_fn(self):
|
||||
self.optimizer.print_grad_fn()
|
||||
for group in self.param_groups:
|
||||
for i, p in enumerate(group['params']):
|
||||
print(self.height," - ", i, p.grad_fn)
|
||||
|
||||
#def adjust(self, params):
|
||||
# self.optimizer.adjust(self.params)
|
||||
# for name, param in params.items():
|
||||
# g = param.grad.detach()
|
||||
# params[name] = param.detach() - g * self.params["lr"]
|
||||
|
||||
def step(self):
|
||||
"""Performs a single optimization step.
|
||||
|
||||
Arguments:
|
||||
closure (callable, optional): A closure that reevaluates the model
|
||||
and returns the loss.
|
||||
"""
|
||||
print('SGD start')
|
||||
self.optimizer.step()
|
||||
|
||||
for group in self.param_groups:
|
||||
for i, p in enumerate(group['params']):
|
||||
if p.grad is None:
|
||||
continue
|
||||
#d_p = p.grad.data
|
||||
d_p = p.grad.detach()
|
||||
|
||||
#print(group['lr'])
|
||||
p.data.add_(-group['lr'].item(), d_p)
|
||||
#group['params'][i] = p.detach() - d_p * group['lr']
|
||||
p.data-= group['lr']*d_p #Data ne pas utiliser perte info
|
||||
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
print(p, p.grad)
|
||||
continue
|
||||
|
||||
print("SGD end")
|
||||
#return loss
|
||||
|
||||
def __str__(self):
|
||||
return "sgd(%f) / " % self.params["lr"] + str(self.optimizer)
|
||||
|
||||
|
||||
class Adam(Optimizable, nn.Module):
|
||||
"""
|
||||
A fully hyperoptimizable Adam optimizer
|
||||
"""
|
||||
|
||||
def clamp(x):
|
||||
return (x.tanh() + 1.0) / 2.0
|
||||
|
||||
def unclamp(y):
|
||||
z = y * 2.0 - 1.0
|
||||
return ((1.0 + z) / (1.0 - z)).log() / 2.0
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
alpha=0.001,
|
||||
beta1=0.9,
|
||||
beta2=0.999,
|
||||
log_eps=-8.0,
|
||||
optimizer=NoOpOptimizer(),
|
||||
device = torch.device('cuda')
|
||||
):
|
||||
#super(Adam, self).__init__()
|
||||
nn.Module.__init__(self)
|
||||
self.device = device
|
||||
params = nn.ParameterDict({
|
||||
"alpha": nn.Parameter(torch.tensor(alpha, device=self.device)),
|
||||
"beta1": nn.Parameter(Adam.unclamp(torch.tensor(beta1, device=self.device))),
|
||||
"beta2": nn.Parameter(Adam.unclamp(torch.tensor(beta2, device=self.device))),
|
||||
"log_eps": nn.Parameter(torch.tensor(log_eps, device=self.device)),
|
||||
})
|
||||
Optimizable.__init__(self, params, optimizer)
|
||||
self.num_adjustments = 0
|
||||
self.cache = {}
|
||||
|
||||
for name, param in params.items():
|
||||
param.requires_grad_() # keep gradient information…
|
||||
param.retain_grad() # even if not a leaf…
|
||||
|
||||
def adjust(self, params, pytorch_mod=False):
|
||||
self.num_adjustments += 1
|
||||
self.optimizer.adjust(self.params)
|
||||
t = self.num_adjustments
|
||||
beta1 = Adam.clamp(self.params["beta1"])
|
||||
beta2 = Adam.clamp(self.params["beta2"])
|
||||
|
||||
updated_param = []
|
||||
if pytorch_mod:
|
||||
params = params.named_parameters(prefix='') #Changer nom d'input...
|
||||
|
||||
for name, param in params:
|
||||
if name not in self.cache:
|
||||
self.cache[name] = {
|
||||
"m": torch.zeros(param.shape, device=self.device),
|
||||
"v": torch.zeros(param.shape, device=self.device)
|
||||
+ 10.0 ** self.params["log_eps"].data
|
||||
# NOTE that we add a little ‘fudge factor' here because sqrt is not
|
||||
# differentiable at exactly zero
|
||||
}
|
||||
#print(name, param.device)
|
||||
g = param.grad.detach()
|
||||
self.cache[name]["m"] = m = (
|
||||
beta1 * self.cache[name]["m"].detach() + (1.0 - beta1) * g
|
||||
)
|
||||
self.cache[name]["v"] = v = (
|
||||
beta2 * self.cache[name]["v"].detach() + (1.0 - beta2) * g * g
|
||||
)
|
||||
self.all_params_with_gradients.append(nn.Parameter(m)) #Risque de surcharger la memoire => Dict mieux ?
|
||||
self.all_params_with_gradients.append(nn.Parameter(v))
|
||||
m_hat = m / (1.0 - beta1 ** float(t))
|
||||
v_hat = v / (1.0 - beta2 ** float(t))
|
||||
dparam = m_hat / (v_hat ** 0.5 + 10.0 ** self.params["log_eps"])
|
||||
updated_param[name] = param.detach() - self.params["alpha"] * dparam
|
||||
|
||||
if pytorch_mod: params.update(updated_param) #Changer nom d'input...
|
||||
else: params = updated_param
|
||||
|
||||
def __str__(self):
|
||||
return "adam(" + str(self.params) + ") / " + str(self.optimizer)
|
182
Old/Gradient-Descent-The-Ultimate-Optimizer/main.py
Executable file
182
Old/Gradient-Descent-The-Ultimate-Optimizer/main.py
Executable file
|
@ -0,0 +1,182 @@
|
|||
import numpy as np
|
||||
import json, math, time, os
|
||||
from hyperopt import *
|
||||
import gc
|
||||
|
||||
BATCH_SIZE = 300
|
||||
|
||||
mnist_train = torchvision.datasets.MNIST(
|
||||
"./data", train=True, download=True, transform=torchvision.transforms.ToTensor()
|
||||
)
|
||||
|
||||
mnist_test = torchvision.datasets.MNIST(
|
||||
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
||||
)
|
||||
|
||||
dl_train = torch.utils.data.DataLoader(
|
||||
mnist_train, batch_size=BATCH_SIZE, shuffle=False
|
||||
)
|
||||
dl_test = torch.utils.data.DataLoader(mnist_test, batch_size=10000, shuffle=False)
|
||||
|
||||
|
||||
def test(model):
|
||||
for i, (features_, labels_) in enumerate(dl_test):
|
||||
features, labels = torch.reshape(features_, (10000, 28 * 28)), labels_
|
||||
pred = model.forward(features)
|
||||
return pred.argmax(dim=1).eq(labels).sum().item() / 10000 * 100
|
||||
|
||||
|
||||
def train(model, epochs=3, height=1):
|
||||
stats = []
|
||||
for epoch in range(epochs):
|
||||
for i, (features_, labels_) in enumerate(dl_train):
|
||||
t0 = time.process_time()
|
||||
model.begin()
|
||||
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
|
||||
pred = model.forward(
|
||||
features
|
||||
) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
|
||||
loss = F.nll_loss(pred, labels)
|
||||
model.zero_grad()
|
||||
loss.backward(create_graph=True)
|
||||
model.adjust()
|
||||
tf = time.process_time()
|
||||
data = {
|
||||
"time": tf - t0,
|
||||
"iter": epoch * len(dl_train) + i,
|
||||
"loss": loss.item(),
|
||||
"params": {
|
||||
k: v.item()
|
||||
for k, v in model.optimizer.parameters.items()
|
||||
if "." not in k
|
||||
},
|
||||
}
|
||||
stats.append(data)
|
||||
return stats
|
||||
|
||||
|
||||
def run(opt, name="out", usr={}, epochs=3, height=1):
|
||||
torch.manual_seed(0x42)
|
||||
model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
|
||||
print("Running...", str(model))
|
||||
model.initialize()
|
||||
log = train(model, epochs, height)
|
||||
acc = test(model)
|
||||
out = {"acc": acc, "log": log, "usr": usr}
|
||||
with open("log/%s.json" % name, "w+") as f:
|
||||
json.dump(out, f, indent=True)
|
||||
times = [x["time"] for x in log]
|
||||
print("Times (ms):", np.mean(times), "+/-", np.std(times))
|
||||
print("Final accuracy:", acc)
|
||||
return out
|
||||
|
||||
|
||||
def sgd_experiments():
|
||||
run(SGD(0.01), "sgd", epochs=1)
|
||||
out = run(SGD(0.01, optimizer=SGD(0.01)), "sgd+sgd", epochs=1)
|
||||
alpha = out["log"][-1]["params"]["alpha"]
|
||||
print(alpha)
|
||||
run(SGD(alpha), "sgd-final", epochs=1)
|
||||
|
||||
|
||||
def adam_experiments():
|
||||
run(Adam(), "adam", epochs=1)
|
||||
print()
|
||||
mo = SGDPerParam(
|
||||
0.001, ["alpha", "beta1", "beta2", "log_eps"], optimizer=SGD(0.0001)
|
||||
)
|
||||
out = run(Adam(optimizer=mo), "adam+sgd", epochs=1)
|
||||
p = out["log"][-1]["params"]
|
||||
alpha = p["alpha"]
|
||||
beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
|
||||
beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
|
||||
log_eps = p["log_eps"]
|
||||
print(alpha, beta1, beta2, log_eps)
|
||||
print(mo)
|
||||
run(
|
||||
Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
|
||||
"adam+sgd-final",
|
||||
epochs=1,
|
||||
)
|
||||
print()
|
||||
out = run(Adam(optimizer=Adam()), "adam2", epochs=1)
|
||||
p = out["log"][-1]["params"]
|
||||
alpha = p["alpha"]
|
||||
beta1 = Adam.clamp(torch.tensor(p["beta1"])).item()
|
||||
beta2 = Adam.clamp(torch.tensor(p["beta2"])).item()
|
||||
log_eps = p["log_eps"]
|
||||
print(alpha, beta1, beta2, log_eps)
|
||||
run(
|
||||
Adam(alpha=p["alpha"], beta1=beta1, beta2=beta2, log_eps=log_eps),
|
||||
"adam2-final",
|
||||
epochs=1,
|
||||
)
|
||||
print()
|
||||
mo = SGDPerParam(0.001, ["alpha"], optimizer=SGD(0.0001))
|
||||
out = run(AdamBaydin(optimizer=mo), "adambaydin+sgd", epochs=1)
|
||||
p = out["log"][-1]["params"]
|
||||
alpha = p["alpha"]
|
||||
print(alpha)
|
||||
print(mo)
|
||||
run(Adam(alpha=p["alpha"]), "adambaydin+sgd-final", epochs=1)
|
||||
print()
|
||||
out = run(AdamBaydin(optimizer=Adam()), "adambaydin2", epochs=1)
|
||||
p = out["log"][-1]["params"]
|
||||
alpha = p["alpha"]
|
||||
print(alpha)
|
||||
run(Adam(alpha=p["alpha"]), "adambaydin2-final", epochs=1)
|
||||
|
||||
|
||||
def surface():
|
||||
run(SGD(10 ** -3, optimizer=SGD(10 ** -1)), "tst", epochs=1)
|
||||
for log_alpha in np.linspace(-3, 2, 10):
|
||||
run(SGD(10 ** log_alpha), "sgd@1e%+.2f" % log_alpha, epochs=1)
|
||||
|
||||
|
||||
def make_sgd_stack(height, top):
|
||||
if height == 0:
|
||||
return SGD(alpha=top)
|
||||
return SGD(alpha=top, optimizer=make_sgd_stack(height - 1, top))
|
||||
|
||||
|
||||
def make_adam_stack(height, top=0.0000001):
|
||||
if height == 0:
|
||||
return Adam(alpha=top)
|
||||
return Adam(alpha=top, optimizer=make_adam_stack(height - 1))
|
||||
|
||||
|
||||
def stack_test():
|
||||
for top in np.linspace(-7, 3, 20):
|
||||
for height in range(6):
|
||||
print("height =", height, "to p=", top)
|
||||
opt = make_sgd_stack(height, 10 ** top)
|
||||
run(
|
||||
opt,
|
||||
"metasgd3-%d@%+.2f" % (height, top),
|
||||
{"height": height, "top": top},
|
||||
epochs=1,
|
||||
height=height,
|
||||
)
|
||||
gc.collect()
|
||||
|
||||
|
||||
def perf_test():
|
||||
for h in range(51):
|
||||
print("height:", h)
|
||||
# opt = make_sgd_stack(h, 0.01)
|
||||
opt = make_adam_stack(h)
|
||||
run(opt, "adamperf-%d" % h, {"height": h}, epochs=1)
|
||||
gc.collect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
os.mkdir("log")
|
||||
except:
|
||||
print("log/ exists already")
|
||||
|
||||
surface()
|
||||
sgd_experiments()
|
||||
adam_experiments()
|
||||
stack_test()
|
||||
perf_test()
|
5
Old/Gradient-Descent-The-Ultimate-Optimizer/requirements.txt
Executable file
5
Old/Gradient-Descent-The-Ultimate-Optimizer/requirements.txt
Executable file
|
@ -0,0 +1,5 @@
|
|||
numpy==1.17.2
|
||||
Pillow==6.2.0
|
||||
six==1.12.0
|
||||
torch==1.2.0
|
||||
torchvision==0.4.0
|
344
Old/Gradient-Descent-The-Ultimate-Optimizer/tests.py
Executable file
344
Old/Gradient-Descent-The-Ultimate-Optimizer/tests.py
Executable file
|
@ -0,0 +1,344 @@
|
|||
import numpy as np
|
||||
import json, math, time, os
|
||||
from data_aug import *
|
||||
#from data_aug_v2 import *
|
||||
import gc
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from torchviz import make_dot, make_dot_from_trace
|
||||
|
||||
from torch.utils.data import SubsetRandomSampler
|
||||
|
||||
BATCH_SIZE = 300
|
||||
#TEST_SIZE = 10000
|
||||
TEST_SIZE = 300
|
||||
DATA_LIMIT = 10
|
||||
|
||||
'''
|
||||
data_train = torchvision.datasets.MNIST(
|
||||
"./data", train=True, download=True,
|
||||
transform=torchvision.transforms.Compose([
|
||||
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
||||
torchvision.transforms.ToTensor()
|
||||
])
|
||||
)
|
||||
data_test = torchvision.datasets.MNIST(
|
||||
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
||||
)
|
||||
|
||||
'''
|
||||
data_train = torchvision.datasets.CIFAR10(
|
||||
"./data", train=True, download=True,
|
||||
transform=torchvision.transforms.Compose([
|
||||
#torchvision.transforms.RandomAffine(degrees=180, translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
||||
torchvision.transforms.ToTensor()
|
||||
])
|
||||
)
|
||||
|
||||
data_test = torchvision.datasets.CIFAR10(
|
||||
"./data", train=False, download=True, transform=torchvision.transforms.ToTensor()
|
||||
)
|
||||
|
||||
train_subset_indices=range(int(len(data_train)/2))
|
||||
val_subset_indices=range(int(len(data_train)/2),len(data_train))
|
||||
|
||||
dl_train = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(train_subset_indices))
|
||||
dl_val = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=False, sampler=SubsetRandomSampler(val_subset_indices))
|
||||
dl_test = torch.utils.data.DataLoader(data_test, batch_size=TEST_SIZE, shuffle=False)
|
||||
|
||||
def test(model, reshape_in=True, device = torch.device('cuda')):
|
||||
for i, (features_, labels_) in enumerate(dl_test):
|
||||
if reshape_in :
|
||||
features, labels = torch.reshape(features_, (TEST_SIZE, 28 * 28)), labels_
|
||||
else:
|
||||
features, labels =features_, labels_
|
||||
|
||||
features, labels = features.to(device), labels.to(device)
|
||||
|
||||
pred = model.forward(features)
|
||||
return pred.argmax(dim=1).eq(labels).sum().item() / TEST_SIZE * 100
|
||||
|
||||
def train_one_epoch(model, optimizer, epoch=0, reshape_in=True, device = torch.device('cuda'), train_data=True):
|
||||
if train_data: dl = dl_train
|
||||
else: dl = dl_val
|
||||
for i, (features_, labels_) in enumerate(dl):
|
||||
if i > DATA_LIMIT : break
|
||||
#t0 = time.process_time()
|
||||
|
||||
if reshape_in :
|
||||
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
|
||||
else:
|
||||
features, labels =features_, labels_
|
||||
|
||||
features, labels = features.to(device), labels.to(device)
|
||||
|
||||
#optimizer.begin()
|
||||
#optimizer.zero_grad()
|
||||
model.begin()
|
||||
model.zero_grad()
|
||||
pred = model.forward(features)
|
||||
|
||||
#loss = F.nll_loss(pred, labels)
|
||||
loss = F.cross_entropy(pred,labels)
|
||||
|
||||
#model.print_grad_fn()
|
||||
#optimizer.print_grad_fn()
|
||||
#print('-'*50)
|
||||
|
||||
loss.backward(create_graph=True)
|
||||
|
||||
#optimizer.step()
|
||||
if train_data: model.adjust()
|
||||
else: model.adjust_val()
|
||||
|
||||
#tf = time.process_time()
|
||||
#data = {
|
||||
# "time": tf - t0,
|
||||
# "iter": epoch * len(dl_train) + i,
|
||||
# "loss": loss.item(),
|
||||
# "params": {
|
||||
# k: v.item()
|
||||
# for k, v in model.optimizer.parameters.items()
|
||||
# if "." not in k
|
||||
# },
|
||||
#}
|
||||
#stats.append(data)
|
||||
|
||||
#print_torch_mem(i)
|
||||
return loss.item()
|
||||
|
||||
def train_v2(model, optimizer, epochs=3, reshape_in=True, device = torch.device('cuda')):
|
||||
log = []
|
||||
for epoch in range(epochs):
|
||||
|
||||
#dl_train.dataset.transform=torchvision.transforms.Compose([
|
||||
# torchvision.transforms.RandomAffine(degrees=model.param('mag'), translate=None, scale=None, shear=None, resample=False, fillcolor=0),
|
||||
# torchvision.transforms.ToTensor()
|
||||
#])
|
||||
viz_data(fig_name='res/data_sample')
|
||||
t0 = time.process_time()
|
||||
loss = train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device)
|
||||
train_one_epoch(model=model, optimizer=optimizer, epoch=epoch, reshape_in=reshape_in, device=device,train_data=False)
|
||||
|
||||
#acc = test(model=model, reshape_in=reshape_in, device=device)
|
||||
acc = 0
|
||||
|
||||
|
||||
tf = time.process_time()
|
||||
data = {
|
||||
"time": tf - t0,
|
||||
"epoch": epoch,
|
||||
"loss": loss,
|
||||
"acc": acc,
|
||||
"params": {
|
||||
k: v.item()
|
||||
for k, v in model.optimizer.parameters.items()
|
||||
#for k, v in model.mods.data_aug.params.named_parameters()
|
||||
if "." not in k
|
||||
|
||||
},
|
||||
}
|
||||
log.append(data)
|
||||
|
||||
|
||||
print("Epoch :",epoch+1, "/",epochs, "- Loss :",log[-1]["loss"])
|
||||
param = [p for p in model.param_grad() if p.grad is not None]
|
||||
if(len(param)!=0):
|
||||
print(param[-2],' / ', param[-2].grad)
|
||||
print(param[-1],' / ', param[-1].grad)
|
||||
return log
|
||||
|
||||
def train(model, epochs=3, height=1, reshape_in=True, device = torch.device('cuda')):
|
||||
stats = []
|
||||
for epoch in range(epochs):
|
||||
for i, (features_, labels_) in enumerate(dl_train):
|
||||
t0 = time.process_time()
|
||||
model.begin()
|
||||
if reshape_in :
|
||||
features, labels = torch.reshape(features_, (BATCH_SIZE, 28 * 28)), labels_
|
||||
else:
|
||||
features, labels =features_, labels_
|
||||
|
||||
features, labels = features.to(device), labels.to(device)
|
||||
|
||||
pred = model.forward(
|
||||
features
|
||||
) # typo in https://www.groundai.com/project/gradient-descent-the-ultimate-optimizer/
|
||||
#loss = F.nll_loss(pred, labels)
|
||||
loss = F.cross_entropy(pred,labels)
|
||||
|
||||
#print('-'*50)
|
||||
#param = [p for p in model.param_grad() if p.grad is not None]
|
||||
#if(len(param)!=0):
|
||||
# print(param[-2],' / ', param[-2].grad)
|
||||
# print(param[-1],' / ', param[-1].grad)
|
||||
|
||||
model.zero_grad()
|
||||
loss.backward(create_graph=True)
|
||||
model.adjust()
|
||||
tf = time.process_time()
|
||||
data = {
|
||||
"time": tf - t0,
|
||||
"iter": epoch * len(dl_train) + i,
|
||||
"loss": loss.item(),
|
||||
"params": {
|
||||
k: v.item()
|
||||
for k, v in model.optimizer.parameters.items()
|
||||
if "." not in k
|
||||
},
|
||||
}
|
||||
stats.append(data)
|
||||
|
||||
print('-'*50)
|
||||
i=0
|
||||
for obj in gc.get_objects():
|
||||
try:
|
||||
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)) and len(obj.size())>1:
|
||||
print(i, type(obj), obj.size())
|
||||
i+=1
|
||||
except:
|
||||
pass
|
||||
print("Epoch :",epoch+1, "/",epochs, "- Loss :",stats[-1]["loss"])
|
||||
param = [p for p in model.param_grad() if p.grad is not None]
|
||||
if(len(param)!=0):
|
||||
print(param[-2],' / ', param[-2].grad)
|
||||
print(param[-1],' / ', param[-1].grad)
|
||||
return stats
|
||||
|
||||
def run(opt, name="out", usr={}, epochs=10, height=1, cnn=True, device = torch.device('cuda')):
|
||||
torch.manual_seed(0x42)
|
||||
if not cnn:
|
||||
reshape_in = True
|
||||
#model = MNIST_FullyConnected(28 * 28, 128, 10, opt)
|
||||
model = MNIST_FullyConnected_Augmented(28 * 28, 128, 10, opt, device=device)
|
||||
|
||||
else:
|
||||
reshape_in = False
|
||||
#model = LeNet(1, 10,opt, device)
|
||||
#model = LeNet_v2(1, 10,opt, device).to(device=device)
|
||||
model = LeNet_v2(3, 10,opt, device).to(device=device)
|
||||
optimizer=None
|
||||
'''
|
||||
m = LeNet_v3(1, 10)
|
||||
a = Data_aug()
|
||||
model = Augmented_model(model=m,
|
||||
data_augmenter=a,
|
||||
optimizer=opt).to(device) #deux fois le meme optimizer ?...
|
||||
'''
|
||||
'''
|
||||
m = LeNet_v3(1, 10)
|
||||
a = Data_aug()
|
||||
model = Augmented_model(model=m, data_augmenter=a).to(device)
|
||||
#optimizer = SGD(model.parameters())
|
||||
optimizer = SGD(model.parameters(), lr=0.01, height=1)
|
||||
'''
|
||||
|
||||
|
||||
#for idx, m in enumerate(model.modules()):
|
||||
# print(idx, '->', m)
|
||||
print("Running...", str(model))
|
||||
model.initialize()
|
||||
#print_model(model)
|
||||
#model.data_augmentation(False)
|
||||
#model.eval()
|
||||
|
||||
log = train_v2(model=model, optimizer=optimizer, epochs=epochs, reshape_in=reshape_in, device=device)
|
||||
model.eval()
|
||||
acc = test(model, reshape_in, device=device)
|
||||
|
||||
|
||||
#param = [p for p in model.param_grad() if p.grad is not None]
|
||||
#if(len(param)!=0):
|
||||
# print(param[-2],' / ', param[-2].grad)
|
||||
# print(param[-1],' / ', param[-1].grad)
|
||||
|
||||
out = {"acc": acc, "log": log, "usr": usr}
|
||||
with open("log/%s.json" % name, "w+") as f:
|
||||
json.dump(out, f, indent=True)
|
||||
times = [x["time"] for x in log]
|
||||
print("Times (ms):", np.mean(times), "+/-", np.std(times))
|
||||
print("Final accuracy:", acc)
|
||||
|
||||
#plot_res(log, fig_name='res/'+name)
|
||||
|
||||
return out
|
||||
|
||||
def make_adam_stack(height, top=0.0000001, device = torch.device('cuda')):
|
||||
#print(height,device)
|
||||
if height == 0:
|
||||
return Adam(alpha=top, device=device)
|
||||
return Adam(alpha=top, optimizer=make_adam_stack(height - 1, top, device=device), device=device)
|
||||
|
||||
def plot_res(log, fig_name='res'):
|
||||
|
||||
fig, ax = plt.subplots(ncols=3, figsize=(15, 3))
|
||||
ax[0].set_title('Loss')
|
||||
ax[0].plot([x["loss"] for x in log])
|
||||
|
||||
ax[1].set_title('Acc')
|
||||
ax[1].plot([x["acc"] for x in log])
|
||||
|
||||
ax[2].set_title('mag')
|
||||
ax[2].plot([x["data_aug"] for x in log])
|
||||
|
||||
plt.savefig(fig_name)
|
||||
|
||||
def print_torch_mem(add_info=''):
|
||||
|
||||
nb=0
|
||||
max_size=0
|
||||
for obj in gc.get_objects():
|
||||
try:
|
||||
if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): # and len(obj.size())>1:
|
||||
#print(i, type(obj), obj.size())
|
||||
size = np.sum(obj.size())
|
||||
if(size>max_size): max_size=size
|
||||
nb+=1
|
||||
except:
|
||||
pass
|
||||
print(add_info, "-Pytroch tensor nb:",nb," / Max dim:", max_size)
|
||||
|
||||
def print_model(model, fig_name='graph/graph'): #Semble ne pas marcher pour les models en fonctionnel
|
||||
x = torch.randn(1,1,28,28, device=device)
|
||||
dot=make_dot(model(x), params=dict(model.named_parameters()))
|
||||
dot.format = 'svg' #https://graphviz.readthedocs.io/en/stable/manual.html#formats
|
||||
dot.render(fig_name)
|
||||
print("Model graph generated !")
|
||||
|
||||
def viz_data(fig_name='data_sample'):
|
||||
|
||||
features_, labels_ = next(iter(dl_train))
|
||||
plt.figure(figsize=(10,10))
|
||||
#for i, (features_, labels_) in enumerate(dl_train):
|
||||
for i in range(25):
|
||||
if i==25: break
|
||||
#print(features_.size(), labels_.size())
|
||||
|
||||
plt.subplot(5,5,i+1)
|
||||
plt.xticks([])
|
||||
plt.yticks([])
|
||||
plt.grid(False)
|
||||
|
||||
img = features_[i,0,:,:]
|
||||
|
||||
#print('im shape',img.shape)
|
||||
plt.imshow(img, cmap=plt.cm.binary)
|
||||
plt.xlabel(labels_[i].item())
|
||||
|
||||
plt.savefig(fig_name)
|
||||
|
||||
##########################################
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
os.mkdir("log")
|
||||
except:
|
||||
print("log/ exists already")
|
||||
|
||||
device = torch.device('cuda')
|
||||
|
||||
run(make_adam_stack(height=1, top=0.001, device=device),
|
||||
"Augmented_MNIST",
|
||||
epochs=100,
|
||||
cnn=True,
|
||||
device = device)
|
||||
print()
|
Loading…
Add table
Add a link
Reference in a new issue